From cdad72207d164569cb4bf647eb824a7f93e8d388 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Sep 2008 16:00:57 +0200 Subject: lockstat: documentation update Christoph noted that the documentation doesn't tell in what unit the lockstat times are reported, ammend this. Reported-by: Christoph Hellwig Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- Documentation/lockstat.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index 4ba4664ce5c..02f36f5c64f 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -100,6 +100,7 @@ The first lock (05-10) is a read/write lock, and shows two lines above the short separator. The contention points don't match the column descriptors, they have two: contentions and [] symbol. +The integer part of the time values is in us. View the top contending locks: -- cgit v1.2.3-70-g09d2 From c7e78cff6b7518212247fb20b1dc6411540dc9af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Oct 2008 23:17:09 +0200 Subject: lockstat: contend with points We currently only provide points that have to wait on contention, also lists the points we have to wait for. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- Documentation/lockstat.txt | 50 +++++++++++++++++++++++++++++----------------- include/linux/lockdep.h | 13 ++++++++---- kernel/lockdep.c | 33 +++++++++++++++++++----------- kernel/lockdep_proc.c | 21 +++++++++++++++++-- kernel/mutex.c | 2 +- 5 files changed, 82 insertions(+), 37 deletions(-) (limited to 'Documentation') diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index 02f36f5c64f..9cb9138f7a7 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -71,34 +71,48 @@ Look at the current lock statistics: # less /proc/lock_stat -01 lock_stat version 0.2 +01 lock_stat version 0.3 02 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total 04 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 05 -06 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 -07 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 -08 -------------------------- -09 &inode->i_data.tree_lock 0 [] add_to_page_cache+0x5f/0x190 -10 -11 ............................................................................................................................................................................................... -12 -13 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 -14 ----------- -15 dcache_lock 180 [] sys_getcwd+0x11e/0x230 -16 dcache_lock 165 [] d_alloc+0x15a/0x210 -17 dcache_lock 33 [] _atomic_dec_and_lock+0x4d/0x70 -18 dcache_lock 1 [] shrink_dcache_parent+0x18/0x130 +06 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34 +07 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88 +08 --------------- +09 &mm->mmap_sem 487 [] do_page_fault+0x466/0x928 +10 &mm->mmap_sem 179 [] sys_mprotect+0xcd/0x21d +11 &mm->mmap_sem 279 [] sys_mmap+0x75/0xce +12 &mm->mmap_sem 76 [] sys_munmap+0x32/0x59 +13 --------------- +14 &mm->mmap_sem 270 [] sys_mmap+0x75/0xce +15 &mm->mmap_sem 431 [] do_page_fault+0x466/0x928 +16 &mm->mmap_sem 138 [] sys_munmap+0x32/0x59 +17 &mm->mmap_sem 145 [] sys_mprotect+0xcd/0x21d +18 +19 ............................................................................................................................................................................................... +20 +21 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41 +22 ----------- +23 dcache_lock 179 [] _atomic_dec_and_lock+0x34/0x54 +24 dcache_lock 113 [] d_alloc+0x19a/0x1eb +25 dcache_lock 99 [] d_rehash+0x1b/0x44 +26 dcache_lock 104 [] d_instantiate+0x36/0x8a +27 ----------- +28 dcache_lock 192 [] _atomic_dec_and_lock+0x34/0x54 +29 dcache_lock 98 [] d_rehash+0x1b/0x44 +30 dcache_lock 72 [] d_alloc+0x19a/0x1eb +31 dcache_lock 112 [] d_instantiate+0x36/0x8a This excerpt shows the first two lock class statistics. Line 01 shows the output version - each time the format changes this will be updated. Line 02-04 -show the header with column descriptions. Lines 05-10 and 13-18 show the actual +show the header with column descriptions. Lines 05-18 and 20-31 show the actual statistics. These statistics come in two parts; the actual stats separated by a -short separator (line 08, 14) from the contention points. +short separator (line 08, 13) from the contention points. -The first lock (05-10) is a read/write lock, and shows two lines above the +The first lock (05-18) is a read/write lock, and shows two lines above the short separator. The contention points don't match the column descriptors, -they have two: contentions and [] symbol. +they have two: contentions and [] symbol. The second set of contention +points are the points we're contending with. The integer part of the time values is in us. diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 0aa657aa8a1..fc9f8e88123 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -73,6 +73,8 @@ struct lock_class_key { struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; }; +#define LOCKSTAT_POINTS 4 + /* * The lock-class itself: */ @@ -119,7 +121,8 @@ struct lock_class { int name_version; #ifdef CONFIG_LOCK_STAT - unsigned long contention_point[4]; + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; #endif }; @@ -144,6 +147,7 @@ enum bounce_type { struct lock_class_stats { unsigned long contention_point[4]; + unsigned long contending_point[4]; struct lock_time read_waittime; struct lock_time write_waittime; struct lock_time read_holdtime; @@ -165,6 +169,7 @@ struct lockdep_map { const char *name; #ifdef CONFIG_LOCK_STAT int cpu; + unsigned long ip; #endif }; @@ -355,7 +360,7 @@ struct lock_class_key { }; #ifdef CONFIG_LOCK_STAT extern void lock_contended(struct lockdep_map *lock, unsigned long ip); -extern void lock_acquired(struct lockdep_map *lock); +extern void lock_acquired(struct lockdep_map *lock, unsigned long ip); #define LOCK_CONTENDED(_lock, try, lock) \ do { \ @@ -363,13 +368,13 @@ do { \ lock_contended(&(_lock)->dep_map, _RET_IP_); \ lock(_lock); \ } \ - lock_acquired(&(_lock)->dep_map); \ + lock_acquired(&(_lock)->dep_map, _RET_IP_); \ } while (0) #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) -#define lock_acquired(lockdep_map) do {} while (0) +#define lock_acquired(lockdep_map, ip) do {} while (0) #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index dbda475b13b..234a9dccb4b 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) #ifdef CONFIG_LOCK_STAT static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); -static int lock_contention_point(struct lock_class *class, unsigned long ip) +static int lock_point(unsigned long points[], unsigned long ip) { int i; - for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { - if (class->contention_point[i] == 0) { - class->contention_point[i] = ip; + for (i = 0; i < LOCKSTAT_POINTS; i++) { + if (points[i] == 0) { + points[i] = ip; break; } - if (class->contention_point[i] == ip) + if (points[i] == ip) break; } @@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class) for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) stats.contention_point[i] += pcs->contention_point[i]; + for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++) + stats.contending_point[i] += pcs->contending_point[i]; + lock_time_add(&pcs->read_waittime, &stats.read_waittime); lock_time_add(&pcs->write_waittime, &stats.write_waittime); @@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class) memset(cpu_stats, 0, sizeof(struct lock_class_stats)); } memset(class->contention_point, 0, sizeof(class->contention_point)); + memset(class->contending_point, 0, sizeof(class->contending_point)); } static struct lock_class_stats *get_lock_stats(struct lock_class *class) @@ -3001,7 +3005,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) struct held_lock *hlock, *prev_hlock; struct lock_class_stats *stats; unsigned int depth; - int i, point; + int i, contention_point, contending_point; depth = curr->lockdep_depth; if (DEBUG_LOCKS_WARN_ON(!depth)) @@ -3025,18 +3029,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) found_it: hlock->waittime_stamp = sched_clock(); - point = lock_contention_point(hlock_class(hlock), ip); + contention_point = lock_point(hlock_class(hlock)->contention_point, ip); + contending_point = lock_point(hlock_class(hlock)->contending_point, + lock->ip); stats = get_lock_stats(hlock_class(hlock)); - if (point < ARRAY_SIZE(stats->contention_point)) - stats->contention_point[point]++; + if (contention_point < LOCKSTAT_POINTS) + stats->contention_point[contention_point]++; + if (contending_point < LOCKSTAT_POINTS) + stats->contending_point[contending_point]++; if (lock->cpu != smp_processor_id()) stats->bounces[bounce_contended + !!hlock->read]++; put_lock_stats(stats); } static void -__lock_acquired(struct lockdep_map *lock) +__lock_acquired(struct lockdep_map *lock, unsigned long ip) { struct task_struct *curr = current; struct held_lock *hlock, *prev_hlock; @@ -3085,6 +3093,7 @@ found_it: put_lock_stats(stats); lock->cpu = cpu; + lock->ip = ip; } void lock_contended(struct lockdep_map *lock, unsigned long ip) @@ -3106,7 +3115,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) } EXPORT_SYMBOL_GPL(lock_contended); -void lock_acquired(struct lockdep_map *lock) +void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; @@ -3119,7 +3128,7 @@ void lock_acquired(struct lockdep_map *lock) raw_local_irq_save(flags); check_flags(flags); current->lockdep_recursion = 1; - __lock_acquired(lock); + __lock_acquired(lock, ip); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 8d3a6eba8d5..13716b81389 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -557,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) if (stats->read_holdtime.nr) namelen += 2; - for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { + for (i = 0; i < LOCKSTAT_POINTS; i++) { char sym[KSYM_SYMBOL_LEN]; char ip[32]; @@ -574,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) stats->contention_point[i], ip, sym); } + for (i = 0; i < LOCKSTAT_POINTS; i++) { + char sym[KSYM_SYMBOL_LEN]; + char ip[32]; + + if (class->contending_point[i] == 0) + break; + + if (!i) + seq_line(m, '-', 40-namelen, namelen); + + sprint_symbol(sym, class->contending_point[i]); + snprintf(ip, sizeof(ip), "[<%p>]", + (void *)class->contending_point[i]); + seq_printf(m, "%40s %14lu %29s %s\n", name, + stats->contending_point[i], + ip, sym); + } if (i) { seq_puts(m, "\n"); seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); @@ -583,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) static void seq_header(struct seq_file *m) { - seq_printf(m, "lock_stat version 0.2\n"); + seq_printf(m, "lock_stat version 0.3\n"); seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " "%14s %14s\n", diff --git a/kernel/mutex.c b/kernel/mutex.c index 12c779dc65d..39a3816b68d 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } done: - lock_acquired(&lock->dep_map); + lock_acquired(&lock->dep_map, ip); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, task_thread_info(task)); debug_mutex_set_owner(lock, task_thread_info(task)); -- cgit v1.2.3-70-g09d2 From f3f0d7b026ae34d6ed5ae67cd4dd5909f9cd70a5 Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Thu, 30 Oct 2008 18:30:09 +1100 Subject: [XFS] remove restricted chown parameter from xfs linux On Linux all filesystems are supposed to be operating under Posix' restricted chown. Restricted chown means it restricts chown to the owner unless you have CAP_FOWNER. NOTE: that 2 files outside of fs/xfs have been modified too for this change. Reviewed-by: Dave Chinner SGI-PV: 988919 SGI-Modid: 2.6.x-xfs-melb:linux:32413b Signed-off-by: Tim Shimmin Signed-off-by: Christoph Hellwig Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- Documentation/filesystems/xfs.txt | 4 ---- kernel/sysctl_check.c | 1 - 2 files changed, 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 0a1668ba260..9878f50d6ed 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt @@ -229,10 +229,6 @@ The following sysctls are available for the XFS filesystem: ISGID bit is cleared if the irix_sgid_inherit compatibility sysctl is set. - fs.xfs.restrict_chown (Min: 0 Default: 1 Max: 1) - Controls whether unprivileged users can use chown to "give away" - a file to another user. - fs.xfs.inherit_sync (Min: 0 Default: 1 Max: 1) Setting this to "1" will cause the "sync" flag set by the xfs_io(8) chattr command on a directory to be diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index c35da23ab8f..fafeb48f27c 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -730,7 +730,6 @@ static const struct trans_ctl_table trans_fs_quota_table[] = { }; static const struct trans_ctl_table trans_fs_xfs_table[] = { - { XFS_RESTRICT_CHOWN, "restrict_chown" }, { XFS_SGID_INHERIT, "irix_sgid_inherit" }, { XFS_SYMLINK_MODE, "irix_symlink_mode" }, { XFS_PANIC_MASK, "panic_mask" }, -- cgit v1.2.3-70-g09d2 From 8e1a4857cd92e32e642b3e7184c7f6bf85c96e2e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Jan 2009 14:53:06 -0500 Subject: Update Documentation/filesystems/ext4.txt Fix paragraph with recommendations on how to tune ext4 for benchmarks. Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 42 +++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 10 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 174eaff7ded..f75ab101c00 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -58,13 +58,22 @@ Note: More extensive information for getting started with ext4 can be # mount -t ext4 /dev/hda1 /wherever - - When comparing performance with other filesystems, remember that - ext3/4 by default offers higher data integrity guarantees than most. - So when comparing with a metadata-only journalling filesystem, such - as ext3, use `mount -o data=writeback'. And you might as well use - `mount -o nobh' too along with it. Making the journal larger than - the mke2fs default often helps performance with metadata-intensive - workloads. + - When comparing performance with other filesystems, it's always + important to try multiple workloads; very often a subtle change in a + workload parameter can completely change the ranking of which + filesystems do well compared to others. When comparing versus ext3, + note that ext4 enables write barriers by default, while ext3 does + not enable write barriers by default. So it is useful to use + explicitly specify whether barriers are enabled or not when via the + '-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems + for a fair comparison. When tuning ext3 for best benchmark numbers, + it is often worthwhile to try changing the data journaling mode; '-o + data=writeback,nobh' can be faster for some workloads. (Note + however that running mounted with data=writeback can potentially + leave stale data exposed in recently written files in case of an + unclean shutdown, which could be a security exposure in some + situations.) Configuring the filesystem with a large journal can + also be helpful for metadata-intensive workloads. 2. Features =========== @@ -74,7 +83,7 @@ Note: More extensive information for getting started with ext4 can be * ability to use filesystems > 16TB (e2fsprogs support not available yet) * extent format reduces metadata overhead (RAM, IO for access, transactions) * extent format more robust in face of on-disk corruption due to magics, -* internal redunancy in tree +* internal redundancy in tree * improved file allocation (multi-block alloc) * fix 32000 subdirectory limit * nsec timestamps for mtime, atime, ctime, create time @@ -116,6 +125,12 @@ grouping of bitmaps and inode tables. Some test results available here: When mounting an ext4 filesystem, the following option are accepted: (*) == default +ro Mount filesystem read only. Note that ext4 will + replay the journal (and thus write to the + partition) even when mounted "read only". The + mount options "ro,noload" can be used to prevent + writes to the filesystem. + extents (*) ext4 will use extents to address file data. The file system will no longer be mountable by ext3. @@ -144,7 +159,11 @@ journal_dev=devnum When the external journal device's major/minor numbers identified through its new major/minor numbers encoded in devnum. -noload Don't load the journal on mounting. +noload Don't load the journal on mounting. Note that + if the filesystem was not unmounted cleanly, + skipping the journal replay will lead to the + filesystem containing inconsistencies that can + lead to any number of problems. data=journal All data are committed into the journal prior to being written into the main file system. @@ -219,9 +238,12 @@ minixdf Make 'df' act like Minix. debug Extra debugging information is sent to syslog. -errors=remount-ro(*) Remount the filesystem read-only on an error. +errors=remount-ro Remount the filesystem read-only on an error. errors=continue Keep going on a filesystem error. errors=panic Panic and halt the machine if an error occurs. + (These mount options override the errors behavior + specified in the superblock, which can be configured + using tune2fs) data_err=ignore(*) Just print an error message if an error occurs in a file data buffer in ordered mode. -- cgit v1.2.3-70-g09d2 From 30773840c19cea60dcef39545960d541b1ac1cf8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 20:27:38 -0500 Subject: ext4: add fsync batch tuning knobs Add new mount options, min_batch_time and max_batch_time, which controls how long the jbd2 layer should wait for additional filesystem operations to get batched with a synchronous write transaction. Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 29 +++++++++++++++++++++++ fs/ext4/ext4.h | 7 ++++++ fs/ext4/ext4_sb.h | 2 ++ fs/ext4/super.c | 47 ++++++++++++++++++++++++++++++++------ fs/jbd2/journal.c | 2 ++ fs/jbd2/transaction.c | 4 +++- include/linux/jbd2.h | 8 +++++++ 7 files changed, 91 insertions(+), 8 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index f75ab101c00..e3fcbea3ec8 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -283,6 +283,35 @@ delalloc (*) Deferring block allocation until write-out time. nodelalloc Disable delayed allocation. Blocks are allocation when data is copied from user to page cache. +max_batch_time=usec Maximum amount of time ext4 should wait for + additional filesystem operations to be batch + together with a synchronous write operation. + Since a synchronous write operation is going to + force a commit and then a wait for the I/O + complete, it doesn't cost much, and can be a + huge throughput win, we wait for a small amount + of time to see if any other transactions can + piggyback on the synchronous write. The + algorithm used is designed to automatically tune + for the speed of the disk, by measuring the + amount of time (on average) that it takes to + finish committing a transaction. Call this time + the "commit time". If the time that the + transactoin has been running is less than the + commit time, ext4 will try sleeping for the + commit time to see if other operations will join + the transaction. The commit time is capped by + the max_batch_time, which defaults to 15000us + (15ms). This optimization can be turned off + entirely by setting max_batch_time to 0. + +min_batch_time=usec This parameter sets the commit time (as + described above) to be at least min_batch_time. + It defaults to zero microseconds. Increasing + this parameter may improve the throughput of + multi-threaded, synchronous workloads on very + fast disks, at the cost of increasing latency. + Data Mode ========= There are 3 different data modes: diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ac8551e0b70..9ba9fd6d14d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -328,6 +328,7 @@ struct ext4_mount_options { uid_t s_resuid; gid_t s_resgid; unsigned long s_commit_interval; + u32 s_min_batch_time, s_max_batch_time; #ifdef CONFIG_QUOTA int s_jquota_fmt; char *s_qf_names[MAXQUOTAS]; @@ -805,6 +806,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_DEFM_JMODE_ORDERED 0x0040 #define EXT4_DEFM_JMODE_WBACK 0x0060 +/* + * Default journal batch times + */ +#define EXT4_DEF_MIN_BATCH_TIME 0 +#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ + /* * Structure of a directory entry */ diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 3db800f399a..039b6ea1a04 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h @@ -74,6 +74,8 @@ struct ext4_sb_info { struct journal_s *s_journal; struct list_head s_orphan; unsigned long s_commit_interval; + u32 s_max_batch_time; + u32 s_min_batch_time; struct block_device *journal_bdev; #ifdef CONFIG_JBD2_DEBUG struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dc27d4c613c..da377f9521b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -705,10 +705,19 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) #endif if (!test_opt(sb, RESERVATION)) seq_puts(seq, ",noreservation"); - if (sbi->s_commit_interval) { + if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { seq_printf(seq, ",commit=%u", (unsigned) (sbi->s_commit_interval / HZ)); } + if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { + seq_printf(seq, ",min_batch_time=%u", + (unsigned) sbi->s_min_batch_time); + } + if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { + seq_printf(seq, ",max_batch_time=%u", + (unsigned) sbi->s_min_batch_time); + } + /* * We're changing the default of barrier mount option, so * let's always display its mount state so it's clear what its @@ -874,7 +883,8 @@ enum { Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, - Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, + Opt_commit, Opt_min_batch_time, Opt_max_batch_time, + Opt_journal_update, Opt_journal_inum, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, @@ -913,6 +923,8 @@ static const match_table_t tokens = { {Opt_nobh, "nobh"}, {Opt_bh, "bh"}, {Opt_commit, "commit=%u"}, + {Opt_min_batch_time, "min_batch_time=%u"}, + {Opt_max_batch_time, "max_batch_time=%u"}, {Opt_journal_update, "journal=update"}, {Opt_journal_inum, "journal=%u"}, {Opt_journal_dev, "journal_dev=%u"}, @@ -1131,6 +1143,22 @@ static int parse_options(char *options, struct super_block *sb, option = JBD2_DEFAULT_MAX_COMMIT_AGE; sbi->s_commit_interval = HZ * option; break; + case Opt_max_batch_time: + if (match_int(&args[0], &option)) + return 0; + if (option < 0) + return 0; + if (option == 0) + option = EXT4_DEF_MAX_BATCH_TIME; + sbi->s_max_batch_time = option; + break; + case Opt_min_batch_time: + if (match_int(&args[0], &option)) + return 0; + if (option < 0) + return 0; + sbi->s_min_batch_time = option; + break; case Opt_data_journal: data_opt = EXT4_MOUNT_JOURNAL_DATA; goto datacheck; @@ -1979,6 +2007,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); + sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; + sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; + sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; set_opt(sbi->s_mount_opt, RESERVATION); set_opt(sbi->s_mount_opt, BARRIER); @@ -2524,11 +2555,9 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) { struct ext4_sb_info *sbi = EXT4_SB(sb); - if (sbi->s_commit_interval) - journal->j_commit_interval = sbi->s_commit_interval; - /* We could also set up an ext4-specific default for the commit - * interval here, but for now we'll just fall back to the jbd - * default. */ + journal->j_commit_interval = sbi->s_commit_interval; + journal->j_min_batch_time = sbi->s_min_batch_time; + journal->j_max_batch_time = sbi->s_max_batch_time; spin_lock(&journal->j_state_lock); if (test_opt(sb, BARRIER)) @@ -3042,6 +3071,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) old_opts.s_resuid = sbi->s_resuid; old_opts.s_resgid = sbi->s_resgid; old_opts.s_commit_interval = sbi->s_commit_interval; + old_opts.s_min_batch_time = sbi->s_min_batch_time; + old_opts.s_max_batch_time = sbi->s_max_batch_time; #ifdef CONFIG_QUOTA old_opts.s_jquota_fmt = sbi->s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) @@ -3178,6 +3209,8 @@ restore_opts: sbi->s_resuid = old_opts.s_resuid; sbi->s_resgid = old_opts.s_resgid; sbi->s_commit_interval = old_opts.s_commit_interval; + sbi->s_min_batch_time = old_opts.s_min_batch_time; + sbi->s_max_batch_time = old_opts.s_max_batch_time; #ifdef CONFIG_QUOTA sbi->s_jquota_fmt = old_opts.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 74d87290381..fd1d7557a09 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -964,6 +964,8 @@ static journal_t * journal_init_common (void) spin_lock_init(&journal->j_state_lock); journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); + journal->j_min_batch_time = 0; + journal->j_max_batch_time = 15000; /* 15ms */ /* The journal is marked for error until we succeed with recovery! */ journal->j_flags = JBD2_ABORT; diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 13dcbc990f4..48c21bac5a5 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1255,8 +1255,10 @@ int jbd2_journal_stop(handle_t *handle) trans_time = ktime_to_ns(ktime_sub(ktime_get(), transaction->t_start_time)); + commit_time = max_t(u64, commit_time, + 1000*journal->j_min_batch_time); commit_time = min_t(u64, commit_time, - 1000*jiffies_to_usecs(1)); + 1000*journal->j_max_batch_time); if (trans_time < commit_time) { ktime_t expires = ktime_add_ns(ktime_get(), diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ab8cef130c2..a3cd647ea1b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -956,6 +956,14 @@ struct journal_s */ u64 j_average_commit_time; + /* + * minimum and maximum times that we should wait for + * additional filesystem operations to get batched into a + * synchronous handle in microseconds + */ + u32 j_min_batch_time; + u32 j_max_batch_time; + /* This function is called when a transaction is closed */ void (*j_commit_callback)(journal_t *, transaction_t *); -- cgit v1.2.3-70-g09d2 From 6fae35f9cea92793a98b2d9ab21235e5ae035581 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 17 Nov 2008 15:53:42 +0000 Subject: uwb: add basic radio manager The UWB radio manager coordinates the use of the radio between the PALs that may be using it. PALs request use of the radio with uwb_radio_start() and the radio manager will start beaconing if its not already doing so. When the last PAL has called uwb_radio_stop() beaconing will be stopped. In the future, the radio manager will have a more sophisticated channel selection algorithm, probably following the Channel Selection Policy from the WiMedia Alliance when it is finalized. For now, channel 9 (BG1, TFC1) is selected. The user may override the channel selected by the radio manager and may force the radio to stop beaconing. The WUSB Host Controller PAL makes use of this and there are two new debug PAL commands that can be used for testing. Signed-off-by: David Vrabel --- Documentation/ABI/testing/sysfs-class-uwb_rc | 14 +- Documentation/usb/wusb-cbaf | 9 -- drivers/usb/host/hwa-hc.c | 1 - drivers/usb/host/whci/hcd.c | 2 - drivers/usb/wusbcore/devconnect.c | 5 +- drivers/usb/wusbcore/mmc.c | 75 ++-------- drivers/usb/wusbcore/pal.c | 16 ++- drivers/usb/wusbcore/wusbhc.h | 8 +- drivers/uwb/Makefile | 1 + drivers/uwb/beacon.c | 26 ++-- drivers/uwb/drp.c | 24 +--- drivers/uwb/lc-rc.c | 11 +- drivers/uwb/pal.c | 20 +-- drivers/uwb/radio.c | 202 +++++++++++++++++++++++++++ drivers/uwb/reset.c | 6 +- drivers/uwb/rsv.c | 4 +- drivers/uwb/uwb-debug.c | 26 +++- drivers/uwb/uwb-internal.h | 5 + drivers/uwb/wlp/wlp-lc.c | 5 +- include/linux/uwb.h | 23 ++- include/linux/uwb/debug-cmd.h | 2 + 21 files changed, 323 insertions(+), 162 deletions(-) create mode 100644 drivers/uwb/radio.c (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-class-uwb_rc b/Documentation/ABI/testing/sysfs-class-uwb_rc index a0d18dbeb7a..6a5fd072849 100644 --- a/Documentation/ABI/testing/sysfs-class-uwb_rc +++ b/Documentation/ABI/testing/sysfs-class-uwb_rc @@ -32,14 +32,16 @@ Contact: linux-usb@vger.kernel.org Description: Write: - [] + - to start beaconing on a specific channel, or stop - beaconing if is -1. Valid channels depends - on the radio controller's supported band groups. + to force a specific channel to be used when beaconing, + or, if is -1, to prohibit beaconing. If + is 0, then the default channel selection + algorithm will be used. Valid channels depends on the + radio controller's supported band groups. - may be used to try and join a specific - beacon group if more than one was found during a scan. + Reading returns the currently active channel, or -1 if + the radio controller is not beaconing. What: /sys/class/uwb_rc/uwbN/scan Date: July 2008 diff --git a/Documentation/usb/wusb-cbaf b/Documentation/usb/wusb-cbaf index 2e78b70f3ad..426ddaaef96 100644 --- a/Documentation/usb/wusb-cbaf +++ b/Documentation/usb/wusb-cbaf @@ -80,12 +80,6 @@ case $1 in start) for dev in ${2:-$hdevs} do - uwb_rc=$(readlink -f $dev/uwb_rc) - if cat $uwb_rc/beacon | grep -q -- "-1" - then - echo 13 0 > $uwb_rc/beacon - echo I: started beaconing on ch 13 on $(basename $uwb_rc) >&2 - fi echo $host_CHID > $dev/wusb_chid echo I: started host $(basename $dev) >&2 done @@ -95,9 +89,6 @@ case $1 in do echo 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > $dev/wusb_chid echo I: stopped host $(basename $dev) >&2 - uwb_rc=$(readlink -f $dev/uwb_rc) - echo -1 | cat > $uwb_rc/beacon - echo I: stopped beaconing on $(basename $uwb_rc) >&2 done ;; set-chid) diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c index 2827353e97e..2a4d36fa70b 100644 --- a/drivers/usb/host/hwa-hc.c +++ b/drivers/usb/host/hwa-hc.c @@ -221,7 +221,6 @@ static void hwahc_op_stop(struct usb_hcd *usb_hcd) d_fnstart(4, dev, "(hwahc %p)\n", hwahc); mutex_lock(&wusbhc->mutex); - wusbhc_stop(wusbhc); wusb_cluster_id_put(wusbhc->cluster_id); mutex_unlock(&wusbhc->mutex); d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result); diff --git a/drivers/usb/host/whci/hcd.c b/drivers/usb/host/whci/hcd.c index de1e07271b8..f599f89d3be 100644 --- a/drivers/usb/host/whci/hcd.c +++ b/drivers/usb/host/whci/hcd.c @@ -91,8 +91,6 @@ static void whc_stop(struct usb_hcd *usb_hcd) mutex_lock(&wusbhc->mutex); - wusbhc_stop(wusbhc); - /* stop HC */ le_writel(0, whc->base + WUSBINTR); whc_write_wusbcmd(whc, WUSBCMD_RUN, 0); diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c index c01c7a80744..08a1ec90386 100644 --- a/drivers/usb/wusbcore/devconnect.c +++ b/drivers/usb/wusbcore/devconnect.c @@ -1124,8 +1124,7 @@ void wusbhc_devconnect_destroy(struct wusbhc *wusbhc) * FIXME: This also enables the keep alives but this is not necessary * until there are connected and authenticated devices. */ -int wusbhc_devconnect_start(struct wusbhc *wusbhc, - const struct wusb_ckhdid *chid) +int wusbhc_devconnect_start(struct wusbhc *wusbhc) { struct device *dev = wusbhc->dev; struct wuie_host_info *hi; @@ -1138,7 +1137,7 @@ int wusbhc_devconnect_start(struct wusbhc *wusbhc, hi->hdr.bLength = sizeof(*hi); hi->hdr.bIEIdentifier = WUIE_ID_HOST_INFO; hi->attributes = cpu_to_le16((wusbhc->rsv->stream << 3) | WUIE_HI_CAP_ALL); - hi->CHID = *chid; + hi->CHID = wusbhc->chid; result = wusbhc_mmcie_set(wusbhc, 0, 0, &hi->hdr); if (result < 0) { dev_err(dev, "Cannot add Host Info MMCIE: %d\n", result); diff --git a/drivers/usb/wusbcore/mmc.c b/drivers/usb/wusbcore/mmc.c index af2aee0fdff..5463ecebafd 100644 --- a/drivers/usb/wusbcore/mmc.c +++ b/drivers/usb/wusbcore/mmc.c @@ -162,12 +162,11 @@ EXPORT_SYMBOL_GPL(wusbhc_mmcie_rm); /* * wusbhc_start - start transmitting MMCs and accepting connections * @wusbhc: the HC to start - * @chid: the CHID to use for this host * * Establishes a cluster reservation, enables device connections, and * starts MMCs with appropriate DNTS parameters. */ -int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid) +int wusbhc_start(struct wusbhc *wusbhc) { int result; struct device *dev = wusbhc->dev; @@ -181,7 +180,7 @@ int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid) goto error_rsv_establish; } - result = wusbhc_devconnect_start(wusbhc, chid); + result = wusbhc_devconnect_start(wusbhc); if (result < 0) { dev_err(dev, "error enabling device connections: %d\n", result); goto error_devconnect_start; @@ -218,34 +217,6 @@ error_rsv_establish: return result; } -/* - * Disconnect all from the WUSB Channel - * - * Send a Host Disconnect IE in the MMC, wait, don't send it any more - */ -static int __wusbhc_host_disconnect_ie(struct wusbhc *wusbhc) -{ - int result = -ENOMEM; - struct wuie_host_disconnect *host_disconnect_ie; - might_sleep(); - host_disconnect_ie = kmalloc(sizeof(*host_disconnect_ie), GFP_KERNEL); - if (host_disconnect_ie == NULL) - goto error_alloc; - host_disconnect_ie->hdr.bLength = sizeof(*host_disconnect_ie); - host_disconnect_ie->hdr.bIEIdentifier = WUIE_ID_HOST_DISCONNECT; - result = wusbhc_mmcie_set(wusbhc, 0, 0, &host_disconnect_ie->hdr); - if (result < 0) - goto error_mmcie_set; - - /* WUSB1.0[8.5.3.1 & 7.5.2] */ - msleep(100); - wusbhc_mmcie_rm(wusbhc, &host_disconnect_ie->hdr); -error_mmcie_set: - kfree(host_disconnect_ie); -error_alloc: - return result; -} - /* * wusbhc_stop - stop transmitting MMCs * @wusbhc: the HC to stop @@ -264,29 +235,6 @@ void wusbhc_stop(struct wusbhc *wusbhc) } EXPORT_SYMBOL_GPL(wusbhc_stop); -/* - * Change the CHID in a WUSB Channel - * - * If it is just a new CHID, send a Host Disconnect IE and then change - * the CHID IE. - */ -static int __wusbhc_chid_change(struct wusbhc *wusbhc, - const struct wusb_ckhdid *chid) -{ - int result = -ENOSYS; - struct device *dev = wusbhc->dev; - dev_err(dev, "%s() not implemented yet\n", __func__); - return result; - - BUG_ON(wusbhc->wuie_host_info == NULL); - __wusbhc_host_disconnect_ie(wusbhc); - wusbhc->wuie_host_info->CHID = *chid; - result = wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->wuie_host_info->hdr); - if (result < 0) - dev_err(dev, "Can't update Host Info WUSB IE: %d\n", result); - return result; -} - /* * Set/reset/update a new CHID * @@ -302,16 +250,19 @@ int wusbhc_chid_set(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid) chid = NULL; mutex_lock(&wusbhc->mutex); - if (wusbhc->active) { - if (chid) - result = __wusbhc_chid_change(wusbhc, chid); - else - wusbhc_stop(wusbhc); - } else { - if (chid) - wusbhc_start(wusbhc, chid); + if (chid) { + if (wusbhc->active) { + mutex_unlock(&wusbhc->mutex); + return -EBUSY; + } + wusbhc->chid = *chid; } mutex_unlock(&wusbhc->mutex); + + if (chid) + result = uwb_radio_start(&wusbhc->pal); + else + uwb_radio_stop(&wusbhc->pal); return result; } EXPORT_SYMBOL_GPL(wusbhc_chid_set); diff --git a/drivers/usb/wusbcore/pal.c b/drivers/usb/wusbcore/pal.c index 7cc51e9905c..d0b172c5ecc 100644 --- a/drivers/usb/wusbcore/pal.c +++ b/drivers/usb/wusbcore/pal.c @@ -18,6 +18,16 @@ */ #include "wusbhc.h" +static void wusbhc_channel_changed(struct uwb_pal *pal, int channel) +{ + struct wusbhc *wusbhc = container_of(pal, struct wusbhc, pal); + + if (channel < 0) + wusbhc_stop(wusbhc); + else + wusbhc_start(wusbhc); +} + /** * wusbhc_pal_register - register the WUSB HC as a UWB PAL * @wusbhc: the WUSB HC @@ -28,8 +38,10 @@ int wusbhc_pal_register(struct wusbhc *wusbhc) wusbhc->pal.name = "wusbhc"; wusbhc->pal.device = wusbhc->usb_hcd.self.controller; + wusbhc->pal.rc = wusbhc->uwb_rc; + wusbhc->pal.channel_changed = wusbhc_channel_changed; - return uwb_pal_register(wusbhc->uwb_rc, &wusbhc->pal); + return uwb_pal_register(&wusbhc->pal); } /** @@ -38,5 +50,5 @@ int wusbhc_pal_register(struct wusbhc *wusbhc) */ void wusbhc_pal_unregister(struct wusbhc *wusbhc) { - uwb_pal_unregister(wusbhc->uwb_rc, &wusbhc->pal); + uwb_pal_unregister(&wusbhc->pal); } diff --git a/drivers/usb/wusbcore/wusbhc.h b/drivers/usb/wusbcore/wusbhc.h index 8fef934ad2f..797c2453a35 100644 --- a/drivers/usb/wusbcore/wusbhc.h +++ b/drivers/usb/wusbcore/wusbhc.h @@ -252,7 +252,8 @@ struct wusbhc { struct uwb_pal pal; unsigned trust_timeout; /* in jiffies */ - struct wuie_host_info *wuie_host_info; /* Includes CHID */ + struct wusb_ckhdid chid; + struct wuie_host_info *wuie_host_info; struct mutex mutex; /* locks everything else */ u16 cluster_id; /* Wireless USB Cluster ID */ @@ -376,15 +377,14 @@ static inline void wusbhc_put(struct wusbhc *wusbhc) usb_put_hcd(&wusbhc->usb_hcd); } -int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid); +int wusbhc_start(struct wusbhc *wusbhc); void wusbhc_stop(struct wusbhc *wusbhc); extern int wusbhc_chid_set(struct wusbhc *, const struct wusb_ckhdid *); /* Device connect handling */ extern int wusbhc_devconnect_create(struct wusbhc *); extern void wusbhc_devconnect_destroy(struct wusbhc *); -extern int wusbhc_devconnect_start(struct wusbhc *wusbhc, - const struct wusb_ckhdid *chid); +extern int wusbhc_devconnect_start(struct wusbhc *wusbhc); extern void wusbhc_devconnect_stop(struct wusbhc *wusbhc); extern void wusbhc_handle_dn(struct wusbhc *, u8 srcaddr, struct wusb_dn_hdr *dn_hdr, size_t size); diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile index 2b99c3e6167..ce21a95da04 100644 --- a/drivers/uwb/Makefile +++ b/drivers/uwb/Makefile @@ -18,6 +18,7 @@ uwb-objs := \ lc-rc.o \ neh.o \ pal.o \ + radio.o \ reset.o \ rsv.o \ scan.o \ diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c index d9f2a8acc59..247956098af 100644 --- a/drivers/uwb/beacon.c +++ b/drivers/uwb/beacon.c @@ -119,7 +119,6 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset) int result; struct device *dev = &rc->uwb_dev.dev; - mutex_lock(&rc->uwb_dev.mutex); if (channel < 0) channel = -1; if (channel == -1) @@ -128,7 +127,7 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset) /* channel >= 0...dah */ result = uwb_rc_start_beacon(rc, bpst_offset, channel); if (result < 0) - goto out_up; + return result; if (le16_to_cpu(rc->ies->wIELength) > 0) { result = uwb_rc_set_ie(rc, rc->ies); if (result < 0) { @@ -137,19 +136,14 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset) result = uwb_rc_stop_beacon(rc); channel = -1; bpst_offset = 0; - } else - result = 0; + } } } - if (result < 0) - goto out_up; - rc->beaconing = channel; - - uwb_notify(rc, NULL, uwb_bg_joined(rc) ? UWB_NOTIF_BG_JOIN : UWB_NOTIF_BG_LEAVE); - -out_up: - mutex_unlock(&rc->uwb_dev.mutex); + if (result >= 0) { + rc->beaconing = channel; + uwb_notify(rc, NULL, uwb_bg_joined(rc) ? UWB_NOTIF_BG_JOIN : UWB_NOTIF_BG_LEAVE); + } return result; } @@ -618,9 +612,6 @@ static ssize_t uwb_rc_beacon_show(struct device *dev, /* * Start beaconing on the specified channel, or stop beaconing. - * - * The BPST offset of when to start searching for a beacon group to - * join may be specified. */ static ssize_t uwb_rc_beacon_store(struct device *dev, struct device_attribute *attr, @@ -629,12 +620,11 @@ static ssize_t uwb_rc_beacon_store(struct device *dev, struct uwb_dev *uwb_dev = to_uwb_dev(dev); struct uwb_rc *rc = uwb_dev->rc; int channel; - unsigned bpst_offset = 0; ssize_t result = -EINVAL; - result = sscanf(buf, "%d %u\n", &channel, &bpst_offset); + result = sscanf(buf, "%d", &channel); if (result >= 1) - result = uwb_rc_beacon(rc, channel, bpst_offset); + result = uwb_radio_force_channel(rc, channel); return result < 0 ? result : size; } diff --git a/drivers/uwb/drp.c b/drivers/uwb/drp.c index c0b1e5e2bd0..fe328146adb 100644 --- a/drivers/uwb/drp.c +++ b/drivers/uwb/drp.c @@ -37,14 +37,13 @@ * * A DRP Availability IE is appended. * - * rc->uwb_dev.mutex is held + * rc->rsvs_mutex is held * * FIXME We currently ignore the returned value indicating the remaining space * in beacon. This could be used to deny reservation requests earlier if * determined that they would cause the beacon space to be exceeded. */ -static -int uwb_rc_gen_send_drp_ie(struct uwb_rc *rc) +int uwb_rc_send_all_drp_ie(struct uwb_rc *rc) { int result; struct device *dev = &rc->uwb_dev.dev; @@ -102,25 +101,6 @@ error_cmd: kfree(cmd); error: return result; - -} -/** - * Send all DRP IEs associated with this host - * - * @returns: >= 0 number of bytes still available in the beacon - * < 0 errno code on error. - * - * As per the protocol we obtain the host controller device lock to access - * bandwidth structures. - */ -int uwb_rc_send_all_drp_ie(struct uwb_rc *rc) -{ - int result; - - mutex_lock(&rc->uwb_dev.mutex); - result = uwb_rc_gen_send_drp_ie(rc); - mutex_unlock(&rc->uwb_dev.mutex); - return result; } void uwb_drp_handle_timeout(struct uwb_rsv *rsv) diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c index f00633d334d..9cf21e6bb62 100644 --- a/drivers/uwb/lc-rc.c +++ b/drivers/uwb/lc-rc.c @@ -189,9 +189,9 @@ static int uwb_rc_setup(struct uwb_rc *rc) int result; struct device *dev = &rc->uwb_dev.dev; - result = uwb_rc_reset(rc); + result = uwb_radio_setup(rc); if (result < 0) { - dev_err(dev, "cannot reset UWB radio: %d\n", result); + dev_err(dev, "cannot setup UWB radio: %d\n", result); goto error; } result = uwb_rc_mac_addr_setup(rc); @@ -311,12 +311,7 @@ void uwb_rc_rm(struct uwb_rc *rc) uwb_dbg_del_rc(rc); uwb_rsv_remove_all(rc); - uwb_rc_ie_rm(rc, UWB_IDENTIFICATION_IE); - if (rc->beaconing >= 0) - uwb_rc_beacon(rc, -1, 0); - if (rc->scan_type != UWB_SCAN_DISABLED) - uwb_rc_scan(rc, rc->scanning, UWB_SCAN_DISABLED, 0); - uwb_rc_reset(rc); + uwb_radio_shutdown(rc); rc->stop(rc); diff --git a/drivers/uwb/pal.c b/drivers/uwb/pal.c index 1afb38eacb9..605765124f5 100644 --- a/drivers/uwb/pal.c +++ b/drivers/uwb/pal.c @@ -32,13 +32,13 @@ EXPORT_SYMBOL_GPL(uwb_pal_init); /** * uwb_pal_register - register a UWB PAL - * @rc: the radio controller the PAL will be using * @pal: the PAL * * The PAL must be initialized with uwb_pal_init(). */ -int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal) +int uwb_pal_register(struct uwb_pal *pal) { + struct uwb_rc *rc = pal->rc; int ret; if (pal->device) { @@ -54,9 +54,9 @@ int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal) } } - spin_lock(&rc->pal_lock); + mutex_lock(&rc->uwb_dev.mutex); list_add(&pal->node, &rc->pals); - spin_unlock(&rc->pal_lock); + mutex_unlock(&rc->uwb_dev.mutex); return 0; } @@ -64,14 +64,17 @@ EXPORT_SYMBOL_GPL(uwb_pal_register); /** * uwb_pal_register - unregister a UWB PAL - * @rc: the radio controller the PAL was using * @pal: the PAL */ -void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal) +void uwb_pal_unregister(struct uwb_pal *pal) { - spin_lock(&rc->pal_lock); + struct uwb_rc *rc = pal->rc; + + uwb_radio_stop(pal); + + mutex_lock(&rc->uwb_dev.mutex); list_del(&pal->node); - spin_unlock(&rc->pal_lock); + mutex_unlock(&rc->uwb_dev.mutex); if (pal->device) { sysfs_remove_link(&rc->uwb_dev.dev.kobj, pal->name); @@ -86,6 +89,5 @@ EXPORT_SYMBOL_GPL(uwb_pal_unregister); */ void uwb_rc_pal_init(struct uwb_rc *rc) { - spin_lock_init(&rc->pal_lock); INIT_LIST_HEAD(&rc->pals); } diff --git a/drivers/uwb/radio.c b/drivers/uwb/radio.c new file mode 100644 index 00000000000..f0d55495f5e --- /dev/null +++ b/drivers/uwb/radio.c @@ -0,0 +1,202 @@ +/* + * UWB radio (channel) management. + * + * Copyright (C) 2008 Cambridge Silicon Radio Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include + +#include "uwb-internal.h" + + +static int uwb_radio_select_channel(struct uwb_rc *rc) +{ + /* + * Default to channel 9 (BG1, TFC1) unless the user has + * selected a specific channel or there are no active PALs. + */ + if (rc->active_pals == 0) + return -1; + if (rc->beaconing_forced) + return rc->beaconing_forced; + return 9; +} + + +/* + * Notify all active PALs that the channel has changed. + */ +static void uwb_radio_channel_changed(struct uwb_rc *rc, int channel) +{ + struct uwb_pal *pal; + + list_for_each_entry(pal, &rc->pals, node) { + if (pal->channel && channel != pal->channel) { + pal->channel = channel; + if (pal->channel_changed) + pal->channel_changed(pal, pal->channel); + } + } +} + +/* + * Change to a new channel and notify any active PALs of the new + * channel. + * + * When stopping the radio, PALs need to be notified first so they can + * terminate any active reservations. + */ +static int uwb_radio_change_channel(struct uwb_rc *rc, int channel) +{ + int ret = 0; + + if (channel == -1) + uwb_radio_channel_changed(rc, channel); + + if (channel != rc->beaconing) { + if (rc->beaconing != -1 && channel != -1) { + /* + * FIXME: should signal the channel change + * with a Channel Change IE. + */ + ret = uwb_radio_change_channel(rc, -1); + if (ret < 0) + return ret; + } + ret = uwb_rc_beacon(rc, channel, 0); + } + + if (channel != -1) + uwb_radio_channel_changed(rc, rc->beaconing); + + return ret; +} + +/** + * uwb_radio_start - request that the radio be started + * @pal: the PAL making the request. + * + * If the radio is not already active, aa suitable channel is selected + * and beacons are started. + */ +int uwb_radio_start(struct uwb_pal *pal) +{ + struct uwb_rc *rc = pal->rc; + int ret = 0; + + mutex_lock(&rc->uwb_dev.mutex); + + if (!pal->channel) { + pal->channel = -1; + rc->active_pals++; + ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc)); + } + + mutex_unlock(&rc->uwb_dev.mutex); + return ret; +} +EXPORT_SYMBOL_GPL(uwb_radio_start); + +/** + * uwb_radio_stop - request tha the radio be stopped. + * @pal: the PAL making the request. + * + * Stops the radio if no other PAL is making use of it. + */ +void uwb_radio_stop(struct uwb_pal *pal) +{ + struct uwb_rc *rc = pal->rc; + + mutex_lock(&rc->uwb_dev.mutex); + + if (pal->channel) { + rc->active_pals--; + uwb_radio_change_channel(rc, uwb_radio_select_channel(rc)); + pal->channel = 0; + } + + mutex_unlock(&rc->uwb_dev.mutex); +} +EXPORT_SYMBOL_GPL(uwb_radio_stop); + +/* + * uwb_radio_force_channel - force a specific channel to be used + * @rc: the radio controller. + * @channel: the channel to use; -1 to force the radio to stop; 0 to + * use the default channel selection algorithm. + */ +int uwb_radio_force_channel(struct uwb_rc *rc, int channel) +{ + int ret = 0; + + mutex_lock(&rc->uwb_dev.mutex); + + rc->beaconing_forced = channel; + ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc)); + + mutex_unlock(&rc->uwb_dev.mutex); + return ret; +} + +/* + * uwb_radio_setup - setup the radio manager + * @rc: the radio controller. + * + * The radio controller is reset to ensure it's in a known state + * before it's used. + */ +int uwb_radio_setup(struct uwb_rc *rc) +{ + return uwb_rc_reset(rc); +} + +/* + * uwb_radio_reset_state - reset any radio manager state + * @rc: the radio controller. + * + * All internal radio manager state is reset to values corresponding + * to a reset radio controller. + */ +void uwb_radio_reset_state(struct uwb_rc *rc) +{ + struct uwb_pal *pal; + + mutex_lock(&rc->uwb_dev.mutex); + + list_for_each_entry(pal, &rc->pals, node) { + if (pal->channel) { + pal->channel = -1; + if (pal->channel_changed) + pal->channel_changed(pal, -1); + } + } + + rc->beaconing = -1; + rc->scanning = -1; + + mutex_unlock(&rc->uwb_dev.mutex); +} + +/* + * uwb_radio_shutdown - shutdown the radio manager + * @rc: the radio controller. + * + * The radio controller is reset. + */ +void uwb_radio_shutdown(struct uwb_rc *rc) +{ + uwb_radio_reset_state(rc); + uwb_rc_reset(rc); +} diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c index e39b32099af..ce8283cc809 100644 --- a/drivers/uwb/reset.c +++ b/drivers/uwb/reset.c @@ -365,11 +365,7 @@ void uwb_rc_pre_reset(struct uwb_rc *rc) rc->stop(rc); uwbd_flush(rc); - mutex_lock(&rc->uwb_dev.mutex); - rc->beaconing = -1; - rc->scanning = -1; - mutex_unlock(&rc->uwb_dev.mutex); - + uwb_radio_reset_state(rc); uwb_rsv_remove_all(rc); } EXPORT_SYMBOL_GPL(uwb_rc_pre_reset); diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c index 935d5b536db..1cd84f92754 100644 --- a/drivers/uwb/rsv.c +++ b/drivers/uwb/rsv.c @@ -555,14 +555,14 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc, * deny the request. */ rsv->state = UWB_RSV_STATE_T_DENIED; - spin_lock(&rc->pal_lock); + mutex_lock(&rc->uwb_dev.mutex); list_for_each_entry(pal, &rc->pals, node) { if (pal->new_rsv) pal->new_rsv(pal, rsv); if (rsv->state == UWB_RSV_STATE_T_ACCEPTED) break; } - spin_unlock(&rc->pal_lock); + mutex_unlock(&rc->uwb_dev.mutex); list_add_tail(&rsv->rc_node, &rc->reservations); state = rsv->state; diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c index 217ebaac128..0e58071a232 100644 --- a/drivers/uwb/uwb-debug.c +++ b/drivers/uwb/uwb-debug.c @@ -192,7 +192,7 @@ static ssize_t command_write(struct file *file, const char __user *buf, { struct uwb_rc *rc = file->private_data; struct uwb_dbg_cmd cmd; - int ret; + int ret = 0; if (len != sizeof(struct uwb_dbg_cmd)) return -EINVAL; @@ -213,6 +213,12 @@ static ssize_t command_write(struct file *file, const char __user *buf, case UWB_DBG_CMD_IE_RM: ret = cmd_ie_rm(rc, &cmd.ie_rm); break; + case UWB_DBG_CMD_RADIO_START: + ret = uwb_radio_start(&rc->dbg->pal); + break; + case UWB_DBG_CMD_RADIO_STOP: + uwb_radio_stop(&rc->dbg->pal); + break; default: return -EINVAL; } @@ -306,6 +312,17 @@ static struct file_operations drp_avail_fops = { .owner = THIS_MODULE, }; +static void uwb_dbg_channel_changed(struct uwb_pal *pal, int channel) +{ + struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal); + struct device *dev = &pal->rc->uwb_dev.dev; + + if (channel > 0) + dev_info(dev, "debug: channel %d started\n", channel); + else + dev_info(dev, "debug: channel stopped\n"); +} + static void uwb_dbg_new_rsv(struct uwb_pal *pal, struct uwb_rsv *rsv) { struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal); @@ -329,8 +346,11 @@ void uwb_dbg_add_rc(struct uwb_rc *rc) INIT_LIST_HEAD(&rc->dbg->rsvs); uwb_pal_init(&rc->dbg->pal); + rc->dbg->pal.rc = rc; + rc->dbg->pal.channel_changed = uwb_dbg_channel_changed; rc->dbg->pal.new_rsv = uwb_dbg_new_rsv; - uwb_pal_register(rc, &rc->dbg->pal); + uwb_pal_register(&rc->dbg->pal); + if (root_dir) { rc->dbg->root_d = debugfs_create_dir(dev_name(&rc->uwb_dev.dev), root_dir); @@ -364,7 +384,7 @@ void uwb_dbg_del_rc(struct uwb_rc *rc) uwb_rsv_terminate(rsv); } - uwb_pal_unregister(rc, &rc->dbg->pal); + uwb_pal_unregister(&rc->dbg->pal); if (root_dir) { debugfs_remove(rc->dbg->drp_avail_f); diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h index af95541dabc..9c0cdb4ded0 100644 --- a/drivers/uwb/uwb-internal.h +++ b/drivers/uwb/uwb-internal.h @@ -238,6 +238,11 @@ struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc, struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc, const struct uwb_mac_addr *macaddr); +int uwb_radio_setup(struct uwb_rc *rc); +void uwb_radio_reset_state(struct uwb_rc *rc); +void uwb_radio_shutdown(struct uwb_rc *rc); +int uwb_radio_force_channel(struct uwb_rc *rc, int channel); + /* -- UWB Sysfs representation */ extern struct class uwb_rc_class; extern struct device_attribute dev_attr_mac_address; diff --git a/drivers/uwb/wlp/wlp-lc.c b/drivers/uwb/wlp/wlp-lc.c index 0799402e73f..7e5eb49b03b 100644 --- a/drivers/uwb/wlp/wlp-lc.c +++ b/drivers/uwb/wlp/wlp-lc.c @@ -543,7 +543,8 @@ int wlp_setup(struct wlp *wlp, struct uwb_rc *rc) uwb_notifs_register(rc, &wlp->uwb_notifs_handler); uwb_pal_init(&wlp->pal); - result = uwb_pal_register(rc, &wlp->pal); + wlp->pal.rc = rc; + result = uwb_pal_register(&wlp->pal); if (result < 0) uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler); @@ -557,7 +558,7 @@ void wlp_remove(struct wlp *wlp) struct device *dev = &wlp->rc->uwb_dev.dev; d_fnstart(6, dev, "wlp %p\n", wlp); wlp_neighbors_release(wlp); - uwb_pal_unregister(wlp->rc, &wlp->pal); + uwb_pal_unregister(&wlp->pal); uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler); wlp_eda_release(&wlp->eda); mutex_lock(&wlp->mutex); diff --git a/include/linux/uwb.h b/include/linux/uwb.h index effd97998fd..7d3ebf046f9 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -355,6 +355,7 @@ struct uwb_rc { u8 ctx_roll; int beaconing; /* Beaconing state [channel number] */ + int beaconing_forced; int scanning; enum uwb_scan_type scan_type:3; unsigned ready:1; @@ -373,8 +374,8 @@ struct uwb_rc { struct uwb_rc_cmd_set_ie *ies; size_t ies_capacity; - spinlock_t pal_lock; struct list_head pals; + int active_pals; struct uwb_dbg *dbg; }; @@ -382,11 +383,17 @@ struct uwb_rc { /** * struct uwb_pal - a UWB PAL - * @name: descriptive name for this PAL (wushc, wlp, etc.). + * @name: descriptive name for this PAL (wusbhc, wlp, etc.). * @device: a device for the PAL. Used to link the PAL and the radio * controller in sysfs. + * @rc: the radio controller the PAL uses. + * @channel_changed: called when the channel used by the radio changes. + * A channel of -1 means the channel has been stopped. * @new_rsv: called when a peer requests a reservation (may be NULL if * the PAL cannot accept reservation requests). + * @channel: channel being used by the PAL; 0 if the PAL isn't using + * the radio; -1 if the PAL wishes to use the radio but + * cannot. * * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP). @@ -405,12 +412,20 @@ struct uwb_pal { struct list_head node; const char *name; struct device *device; + struct uwb_rc *rc; + + void (*channel_changed)(struct uwb_pal *pal, int channel); void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv); + + int channel; }; void uwb_pal_init(struct uwb_pal *pal); -int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal); -void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal); +int uwb_pal_register(struct uwb_pal *pal); +void uwb_pal_unregister(struct uwb_pal *pal); + +int uwb_radio_start(struct uwb_pal *pal); +void uwb_radio_stop(struct uwb_pal *pal); /* * General public API diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h index 6a16566f022..07efbe17db5 100644 --- a/include/linux/uwb/debug-cmd.h +++ b/include/linux/uwb/debug-cmd.h @@ -34,6 +34,8 @@ enum uwb_dbg_cmd_type { UWB_DBG_CMD_RSV_TERMINATE = 2, UWB_DBG_CMD_IE_ADD = 3, UWB_DBG_CMD_IE_RM = 4, + UWB_DBG_CMD_RADIO_START = 5, + UWB_DBG_CMD_RADIO_STOP = 6, }; struct uwb_dbg_cmd_rsv_establish { -- cgit v1.2.3-70-g09d2 From cec87e38e92cdfe86678ca2a5c29c38d05127601 Mon Sep 17 00:00:00 2001 From: Peter Popovec Date: Tue, 11 Nov 2008 14:46:14 -0500 Subject: Input: add joystick driver for Walkera WK-0701 RC transmitter Signed-off-by: Peter Popovec Signed-off-by: Dmitry Torokhov --- Documentation/input/walkera0701.txt | 109 +++++++++++++ drivers/input/joystick/Kconfig | 12 ++ drivers/input/joystick/Makefile | 1 + drivers/input/joystick/walkera0701.c | 292 +++++++++++++++++++++++++++++++++++ 4 files changed, 414 insertions(+) create mode 100644 Documentation/input/walkera0701.txt create mode 100644 drivers/input/joystick/walkera0701.c (limited to 'Documentation') diff --git a/Documentation/input/walkera0701.txt b/Documentation/input/walkera0701.txt new file mode 100644 index 00000000000..8f4289efc5c --- /dev/null +++ b/Documentation/input/walkera0701.txt @@ -0,0 +1,109 @@ + +Walkera WK-0701 transmitter is supplied with a ready to fly Walkera +helicopters such as HM36, HM37, HM60. The walkera0701 module enables to use +this transmitter as joystick + +Devel homepage and download: +http://zub.fei.tuke.sk/walkera-wk0701/ + +or use cogito: +cg-clone http://zub.fei.tuke.sk/GIT/walkera0701-joystick + + +Connecting to PC: + +At back side of transmitter S-video connector can be found. Modulation +pulses from processor to HF part can be found at pin 2 of this connector, +pin 3 is GND. Between pin 3 and CPU 5k6 resistor can be found. To get +modulation pulses to PC, signal pulses must be amplified. + +Cable: (walkera TX to parport) + +Walkera WK-0701 TX S-VIDEO connector: + (back side of TX) + __ __ S-video: canon25 + / |_| \ pin 2 (signal) NPN parport + / O 4 3 O \ pin 3 (GND) LED ________________ 10 ACK + ( O 2 1 O ) | C + \ ___ / 2 ________________________|\|_____|/ + | [___] | |/| B |\ + ------- 3 __________________________________|________________ 25 GND + E + + +I use green LED and BC109 NPN transistor. + +Software: + +Build kernel with walkera0701 module. Module walkera0701 need exclusive +access to parport, modules like lp must be unloaded before loading +walkera0701 module, check dmesg for error messages. Connect TX to PC by +cable and run jstest /dev/input/js0 to see values from TX. If no value can +be changed by TX "joystick", check output from /proc/interrupts. Value for +(usually irq7) parport must increase if TX is on. + + + +Technical details: + +Driver use interrupt from parport ACK input bit to measure pulse length +using hrtimers. + +Frame format: +Based on walkera WK-0701 PCM Format description by Shaul Eizikovich. +(downloaded from http://www.smartpropoplus.com/Docs/Walkera_Wk-0701_PCM.pdf) + +Signal pulses: + (ANALOG) + SYNC BIN OCT + +---------+ +------+ + | | | | +--+ +------+ +--- + +Frame: + SYNC , BIN1, OCT1, BIN2, OCT2 ... BIN24, OCT24, BIN25, next frame SYNC .. + +pulse length: + Binary values: Analog octal values: + + 288 uS Binary 0 318 uS 000 + 438 uS Binary 1 398 uS 001 + 478 uS 010 + 558 uS 011 + 638 uS 100 + 1306 uS SYNC 718 uS 101 + 798 uS 110 + 878 uS 111 + +24 bin+oct values + 1 bin value = 24*4+1 bits = 97 bits + +(Warning, pulses on ACK ar inverted by transistor, irq is rised up on sync +to bin change or octal value to bin change). + +Binary data representations: + +One binary and octal value can be grouped to nibble. 24 nibbles + one binary +values can be sampled between sync pulses. + +Values for first four channels (analog joystick values) can be found in +first 10 nibbles. Analog value is represented by one sign bit and 9 bit +absolute binary value. (10 bits per channel). Next nibble is checksum for +first ten nibbles. + +Next nibbles 12 .. 21 represents four channels (not all channels can be +directly controlled from TX). Binary representations ar the same as in first +four channels. In nibbles 22 and 23 is a special magic number. Nibble 24 is +checksum for nibbles 12..23. + +After last octal value for nibble 24 and next sync pulse one additional +binary value can be sampled. This bit and magic number is not used in +software driver. Some details about this magic numbers can be found in +Walkera_Wk-0701_PCM.pdf. + +Checksum calculation: + +Summary of octal values in nibbles must be same as octal value in checksum +nibble (only first 3 bits are used). Binary value for checksum nibble is +calculated by sum of binary values in checked nibbles + sum of octal values +in checked nibbles divided by 8. Only bit 0 of this sum is used. + diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig index be5c14a5a0a..268dd3fef0a 100644 --- a/drivers/input/joystick/Kconfig +++ b/drivers/input/joystick/Kconfig @@ -294,4 +294,16 @@ config JOYSTICK_XPAD_LEDS This option enables support for the LED which surrounds the Big X on XBox 360 controller. +config JOYSTICK_WALKERA0701 + tristate "Walkera WK-0701 RC transmitter" + depends on HIGH_RES_TIMERS && PARPORT + help + Say Y or M here if you have a Walkera WK-0701 transmitter which is + supplied with a ready to fly Walkera helicopters such as HM36, + HM37, HM60 and want to use it via parport as a joystick. More + information is available: + + To compile this driver as a module, choose M here: the + module will be called walkera0701. + endif diff --git a/drivers/input/joystick/Makefile b/drivers/input/joystick/Makefile index fdbf8c4c287..72303629568 100644 --- a/drivers/input/joystick/Makefile +++ b/drivers/input/joystick/Makefile @@ -29,4 +29,5 @@ obj-$(CONFIG_JOYSTICK_TWIDJOY) += twidjoy.o obj-$(CONFIG_JOYSTICK_WARRIOR) += warrior.o obj-$(CONFIG_JOYSTICK_XPAD) += xpad.o obj-$(CONFIG_JOYSTICK_ZHENHUA) += zhenhua.o +obj-$(CONFIG_JOYSTICK_WALKERA0701) += walkera0701.o diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c new file mode 100644 index 00000000000..4dfa1eed4b7 --- /dev/null +++ b/drivers/input/joystick/walkera0701.c @@ -0,0 +1,292 @@ +/* + * Parallel port to Walkera WK-0701 TX joystick + * + * Copyright (c) 2008 Peter Popovec + * + * More about driver: + */ + +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. +*/ + +/* #define WK0701_DEBUG */ + +#define RESERVE 20000 +#define SYNC_PULSE 1306000 +#define BIN0_PULSE 288000 +#define BIN1_PULSE 438000 + +#define ANALOG_MIN_PULSE 318000 +#define ANALOG_MAX_PULSE 878000 +#define ANALOG_DELTA 80000 + +#define BIN_SAMPLE ((BIN0_PULSE + BIN1_PULSE) / 2) + +#define NO_SYNC 25 + +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Peter Popovec "); +MODULE_DESCRIPTION("Walkera WK-0701 TX as joystick"); +MODULE_LICENSE("GPL"); + +static unsigned int walkera0701_pp_no; +module_param_named(port, walkera0701_pp_no, int, 0); +MODULE_PARM_DESC(port, + "Parallel port adapter for Walkera WK-0701 TX (default is 0)"); + +/* + * For now, only one device is supported, if somebody need more devices, code + * can be expanded, one struct walkera_dev per device must be allocated and + * set up by walkera0701_connect (release of device by walkera0701_disconnect) + */ + +struct walkera_dev { + unsigned char buf[25]; + u64 irq_time, irq_lasttime; + int counter; + int ack; + + struct input_dev *input_dev; + struct hrtimer timer; + + struct parport *parport; + struct pardevice *pardevice; +}; + +static struct walkera_dev w_dev; + +static inline void walkera0701_parse_frame(struct walkera_dev *w) +{ + int i; + int val1, val2, val3, val4, val5, val6, val7, val8; + int crc1, crc2; + + for (crc1 = crc2 = i = 0; i < 10; i++) { + crc1 += w->buf[i] & 7; + crc2 += (w->buf[i] & 8) >> 3; + } + if ((w->buf[10] & 7) != (crc1 & 7)) + return; + if (((w->buf[10] & 8) >> 3) != (((crc1 >> 3) + crc2) & 1)) + return; + for (crc1 = crc2 = 0, i = 11; i < 23; i++) { + crc1 += w->buf[i] & 7; + crc2 += (w->buf[i] & 8) >> 3; + } + if ((w->buf[23] & 7) != (crc1 & 7)) + return; + if (((w->buf[23] & 8) >> 3) != (((crc1 >> 3) + crc2) & 1)) + return; + val1 = ((w->buf[0] & 7) * 256 + w->buf[1] * 16 + w->buf[2]) >> 2; + val1 *= ((w->buf[0] >> 2) & 2) - 1; /* sign */ + val2 = (w->buf[2] & 1) << 8 | (w->buf[3] << 4) | w->buf[4]; + val2 *= (w->buf[2] & 2) - 1; /* sign */ + val3 = ((w->buf[5] & 7) * 256 + w->buf[6] * 16 + w->buf[7]) >> 2; + val3 *= ((w->buf[5] >> 2) & 2) - 1; /* sign */ + val4 = (w->buf[7] & 1) << 8 | (w->buf[8] << 4) | w->buf[9]; + val4 *= (w->buf[7] & 2) - 1; /* sign */ + val5 = ((w->buf[11] & 7) * 256 + w->buf[12] * 16 + w->buf[13]) >> 2; + val5 *= ((w->buf[11] >> 2) & 2) - 1; /* sign */ + val6 = (w->buf[13] & 1) << 8 | (w->buf[14] << 4) | w->buf[15]; + val6 *= (w->buf[13] & 2) - 1; /* sign */ + val7 = ((w->buf[16] & 7) * 256 + w->buf[17] * 16 + w->buf[18]) >> 2; + val7 *= ((w->buf[16] >> 2) & 2) - 1; /*sign */ + val8 = (w->buf[18] & 1) << 8 | (w->buf[19] << 4) | w->buf[20]; + val8 *= (w->buf[18] & 2) - 1; /*sign */ + +#ifdef WK0701_DEBUG + { + int magic, magic_bit; + magic = (w->buf[21] << 4) | w->buf[22]; + magic_bit = (w->buf[24] & 8) >> 3; + printk(KERN_DEBUG + "walkera0701: %4d %4d %4d %4d %4d %4d %4d %4d (magic %2x %d)\n", + val1, val2, val3, val4, val5, val6, val7, val8, magic, + magic_bit); + } +#endif + input_report_abs(w->input_dev, ABS_X, val2); + input_report_abs(w->input_dev, ABS_Y, val1); + input_report_abs(w->input_dev, ABS_Z, val6); + input_report_abs(w->input_dev, ABS_THROTTLE, val3); + input_report_abs(w->input_dev, ABS_RUDDER, val4); + input_report_abs(w->input_dev, ABS_MISC, val7); + input_report_key(w->input_dev, BTN_GEAR_DOWN, val5 > 0); +} + +static inline int read_ack(struct pardevice *p) +{ + return parport_read_status(p->port) & 0x40; +} + +/* falling edge, prepare to BIN value calculation */ +static void walkera0701_irq_handler(void *handler_data) +{ + u64 pulse_time; + struct walkera_dev *w = handler_data; + + w->irq_time = ktime_to_ns(ktime_get()); + pulse_time = w->irq_time - w->irq_lasttime; + w->irq_lasttime = w->irq_time; + + /* cancel timer, if in handler or active do resync */ + if (unlikely(0 != hrtimer_try_to_cancel(&w->timer))) { + w->counter = NO_SYNC; + return; + } + + if (w->counter < NO_SYNC) { + if (w->ack) { + pulse_time -= BIN1_PULSE; + w->buf[w->counter] = 8; + } else { + pulse_time -= BIN0_PULSE; + w->buf[w->counter] = 0; + } + if (w->counter == 24) { /* full frame */ + walkera0701_parse_frame(w); + w->counter = NO_SYNC; + if (abs(pulse_time - SYNC_PULSE) < RESERVE) /* new frame sync */ + w->counter = 0; + } else { + if ((pulse_time > (ANALOG_MIN_PULSE - RESERVE) + && (pulse_time < (ANALOG_MAX_PULSE + RESERVE)))) { + pulse_time -= (ANALOG_MIN_PULSE - RESERVE); + pulse_time = (u32) pulse_time / ANALOG_DELTA; /* overtiping is safe, pulsetime < s32.. */ + w->buf[w->counter++] |= (pulse_time & 7); + } else + w->counter = NO_SYNC; + } + } else if (abs(pulse_time - SYNC_PULSE - BIN0_PULSE) < + RESERVE + BIN1_PULSE - BIN0_PULSE) /* frame sync .. */ + w->counter = 0; + + hrtimer_start(&w->timer, ktime_set(0, BIN_SAMPLE), HRTIMER_MODE_REL); +} + +static enum hrtimer_restart timer_handler(struct hrtimer + *handle) +{ + struct walkera_dev *w; + + w = container_of(handle, struct walkera_dev, timer); + w->ack = read_ack(w->pardevice); + + return HRTIMER_NORESTART; +} + +static int walkera0701_open(struct input_dev *dev) +{ + struct walkera_dev *w = input_get_drvdata(dev); + + parport_enable_irq(w->parport); + return 0; +} + +static void walkera0701_close(struct input_dev *dev) +{ + struct walkera_dev *w = input_get_drvdata(dev); + + parport_disable_irq(w->parport); +} + +static int walkera0701_connect(struct walkera_dev *w, int parport) +{ + int err = -ENODEV; + + w->parport = parport_find_number(parport); + if (w->parport == NULL) + return -ENODEV; + + if (w->parport->irq == -1) { + printk(KERN_ERR "walkera0701: parport without interrupt\n"); + goto init_err; + } + + err = -EBUSY; + w->pardevice = parport_register_device(w->parport, "walkera0701", + NULL, NULL, walkera0701_irq_handler, + PARPORT_DEV_EXCL, w); + if (!w->pardevice) + goto init_err; + + if (parport_negotiate(w->pardevice->port, IEEE1284_MODE_COMPAT)) + goto init_err1; + + if (parport_claim(w->pardevice)) + goto init_err1; + + w->input_dev = input_allocate_device(); + if (!w->input_dev) + goto init_err2; + + input_set_drvdata(w->input_dev, w); + w->input_dev->name = "Walkera WK-0701 TX"; + w->input_dev->phys = w->parport->name; + w->input_dev->id.bustype = BUS_PARPORT; + + /* TODO what id vendor/product/version ? */ + w->input_dev->id.vendor = 0x0001; + w->input_dev->id.product = 0x0001; + w->input_dev->id.version = 0x0100; + w->input_dev->open = walkera0701_open; + w->input_dev->close = walkera0701_close; + + w->input_dev->evbit[0] = BIT(EV_ABS) | BIT_MASK(EV_KEY); + w->input_dev->keybit[BIT_WORD(BTN_GEAR_DOWN)] = BIT_MASK(BTN_GEAR_DOWN); + + input_set_abs_params(w->input_dev, ABS_X, -512, 512, 0, 0); + input_set_abs_params(w->input_dev, ABS_Y, -512, 512, 0, 0); + input_set_abs_params(w->input_dev, ABS_Z, -512, 512, 0, 0); + input_set_abs_params(w->input_dev, ABS_THROTTLE, -512, 512, 0, 0); + input_set_abs_params(w->input_dev, ABS_RUDDER, -512, 512, 0, 0); + input_set_abs_params(w->input_dev, ABS_MISC, -512, 512, 0, 0); + + err = input_register_device(w->input_dev); + if (err) + goto init_err3; + + hrtimer_init(&w->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + w->timer.function = timer_handler; + return 0; + + init_err3: + input_free_device(w->input_dev); + init_err2: + parport_release(w->pardevice); + init_err1: + parport_unregister_device(w->pardevice); + init_err: + parport_put_port(w->parport); + return err; +} + +static void walkera0701_disconnect(struct walkera_dev *w) +{ + hrtimer_cancel(&w->timer); + input_unregister_device(w->input_dev); + parport_release(w->pardevice); + parport_unregister_device(w->pardevice); + parport_put_port(w->parport); +} + +static int __init walkera0701_init(void) +{ + return walkera0701_connect(&w_dev, walkera0701_pp_no); +} + +static void __exit walkera0701_exit(void) +{ + walkera0701_disconnect(&w_dev); +} + +module_init(walkera0701_init); +module_exit(walkera0701_exit); -- cgit v1.2.3-70-g09d2 From 553dea4dd531562688ba01c641c7f8fc7abaaf8c Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 1 Nov 2008 14:57:49 +0200 Subject: UBIFS: introduce compression mount options It is very handy to be able to change default UBIFS compressor via mount options. Introduce -o compr= mount option support. Currently only "none", "lzo" and "zlib" compressors are supported. Signed-off-by: Artem Bityutskiy --- Documentation/filesystems/ubifs.txt | 3 +++ fs/ubifs/compress.c | 6 ++--- fs/ubifs/sb.c | 10 +++++---- fs/ubifs/super.c | 44 +++++++++++++++++++++++++++++++------ fs/ubifs/ubifs.h | 12 ++++++++-- 5 files changed, 59 insertions(+), 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt index dd84ea3c10d..2d0db5482d2 100644 --- a/Documentation/filesystems/ubifs.txt +++ b/Documentation/filesystems/ubifs.txt @@ -95,6 +95,9 @@ no_chk_data_crc skip checking of CRCs on data nodes in order to of this option is that corruption of the contents of a file can go unnoticed. chk_data_crc (*) do not skip checking CRCs on data nodes +compr=none override defoult comressor and set it to "none" +compr=lzo override defoult comressor and set it to "lzo" +compr=zlib override defoult comressor and set it to "zlib" Quick usage instructions diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 6414d50780e..4afb3ea24d4 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c @@ -33,7 +33,7 @@ /* Fake description object for the "none" compressor */ static struct ubifs_compressor none_compr = { .compr_type = UBIFS_COMPR_NONE, - .name = "no compression", + .name = "none", .capi_name = "", }; @@ -43,13 +43,13 @@ static DEFINE_MUTEX(lzo_mutex); static struct ubifs_compressor lzo_compr = { .compr_type = UBIFS_COMPR_LZO, .comp_mutex = &lzo_mutex, - .name = "LZO", + .name = "lzo", .capi_name = "lzo", }; #else static struct ubifs_compressor lzo_compr = { .compr_type = UBIFS_COMPR_LZO, - .name = "LZO", + .name = "lzo", }; #endif diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 0f392351dc5..c5da201ab54 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -179,8 +179,11 @@ static int create_default_filesystem(struct ubifs_info *c) sup->fanout = cpu_to_le32(DEFAULT_FANOUT); sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); - sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); + if (c->mount_opts.override_compr) + sup->default_compr = cpu_to_le16(c->mount_opts.compr_type); + else + sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); generate_random_uuid(sup->uuid); @@ -582,16 +585,15 @@ int ubifs_read_superblock(struct ubifs_info *c) c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; c->fanout = le32_to_cpu(sup->fanout); c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); - c->default_compr = le16_to_cpu(sup->default_compr); c->rp_size = le64_to_cpu(sup->rp_size); c->rp_uid = le32_to_cpu(sup->rp_uid); c->rp_gid = le32_to_cpu(sup->rp_gid); sup_flags = le32_to_cpu(sup->flags); + if (!c->mount_opts.override_compr) + c->default_compr = le16_to_cpu(sup->default_compr); c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); - memcpy(&c->uuid, &sup->uuid, 16); - c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); /* Automatically increase file system size to the maximum size */ diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 21b4103271e..fc81022cc26 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -417,6 +417,11 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) else if (c->mount_opts.chk_data_crc == 1) seq_printf(s, ",no_chk_data_crc"); + if (c->mount_opts.override_compr) { + seq_printf(s, ",compr="); + seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type)); + } + return 0; } @@ -878,6 +883,7 @@ static int check_volume_empty(struct ubifs_info *c) * Opt_no_bulk_read: disable bulk-reads * Opt_chk_data_crc: check CRCs when reading data nodes * Opt_no_chk_data_crc: do not check CRCs when reading data nodes + * Opt_override_compr: override default compressor * Opt_err: just end of array marker */ enum { @@ -887,6 +893,7 @@ enum { Opt_no_bulk_read, Opt_chk_data_crc, Opt_no_chk_data_crc, + Opt_override_compr, Opt_err, }; @@ -897,6 +904,7 @@ static const match_table_t tokens = { {Opt_no_bulk_read, "no_bulk_read"}, {Opt_chk_data_crc, "chk_data_crc"}, {Opt_no_chk_data_crc, "no_chk_data_crc"}, + {Opt_override_compr, "compr=%s"}, {Opt_err, NULL}, }; @@ -950,6 +958,28 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, c->mount_opts.chk_data_crc = 1; c->no_chk_data_crc = 1; break; + case Opt_override_compr: + { + char *name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (!strcmp(name, "none")) + c->mount_opts.compr_type = UBIFS_COMPR_NONE; + else if (!strcmp(name, "lzo")) + c->mount_opts.compr_type = UBIFS_COMPR_LZO; + else if (!strcmp(name, "zlib")) + c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; + else { + ubifs_err("unknown compressor \"%s\"", name); + kfree(name); + return -EINVAL; + } + kfree(name); + c->mount_opts.override_compr = 1; + c->default_compr = c->mount_opts.compr_type; + break; + } default: ubifs_err("unrecognized mount option \"%s\" " "or missing value", p); @@ -1100,13 +1130,13 @@ static int mount_ubifs(struct ubifs_info *c) goto out_free; /* - * Make sure the compressor which is set as the default on in the - * superblock was actually compiled in. + * Make sure the compressor which is set as default in the superblock + * or overriden by mount options is actually compiled in. */ if (!ubifs_compr_present(c->default_compr)) { - ubifs_warn("'%s' compressor is set by superblock, but not " - "compiled in", ubifs_compr_name(c->default_compr)); - c->default_compr = UBIFS_COMPR_NONE; + ubifs_err("'compressor \"%s\" is not compiled in", + ubifs_compr_name(c->default_compr)); + goto out_free; } dbg_failure_mode_registration(c); @@ -2023,8 +2053,8 @@ static int __init ubifs_init(void) /* * We use 2 bit wide bit-fields to store compression type, which should * be amended if more compressors are added. The bit-fields are: - * @compr_type in 'struct ubifs_inode' and @default_compr in - * 'struct ubifs_info'. + * @compr_type in 'struct ubifs_inode', @default_compr in + * 'struct ubifs_info' and @compr_type in 'struct ubifs_mount_opts'. */ BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 4d76aba57ee..16840e099ef 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -893,13 +893,21 @@ struct ubifs_orphan { /** * struct ubifs_mount_opts - UBIFS-specific mount options information. * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) - * @bulk_read: enable bulk-reads - * @chk_data_crc: check CRCs when reading data nodes + * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable) + * @chk_data_crc: enable/disable CRC data checking when reading data nodes + * (%0 default, %1 disabe, %2 enable) + * @override_compr: override default compressor (%0 - do not override and use + * superblock compressor, %1 - override and use compressor + * specified in @compr_type) + * @compr_type: compressor type to override the superblock compressor with + * (%UBIFS_COMPR_NONE, etc) */ struct ubifs_mount_opts { unsigned int unmount_mode:2; unsigned int bulk_read:2; unsigned int chk_data_crc:2; + unsigned int override_compr:1; + unsigned int compr_type:2; }; /** -- cgit v1.2.3-70-g09d2 From 1c12757c56b4c9ab5aab1f6c1248ae4ea8af3a01 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 13 Nov 2008 18:11:52 -0800 Subject: Document RCU and unloadable modules Signed-off-by: Paul E. McKenney Reviewed-by: Lai Jiangshan Signed-off-by: Jonathan Corbet --- Documentation/RCU/00-INDEX | 2 + Documentation/RCU/rcubarrier.txt | 304 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 306 insertions(+) create mode 100644 Documentation/RCU/rcubarrier.txt (limited to 'Documentation') diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX index 461481dfb7c..0f2a8d08168 100644 --- a/Documentation/RCU/00-INDEX +++ b/Documentation/RCU/00-INDEX @@ -12,6 +12,8 @@ rcuref.txt - Reference-count design for elements of lists/arrays protected by RCU rcu.txt - RCU Concepts +rcubarrier.txt + - Unloading modules that use RCU callbacks RTFP.txt - List of RCU papers (bibliography) going back to 1980. torture.txt diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt new file mode 100644 index 00000000000..909602d409b --- /dev/null +++ b/Documentation/RCU/rcubarrier.txt @@ -0,0 +1,304 @@ +RCU and Unloadable Modules + +[Originally published in LWN Jan. 14, 2007: http://lwn.net/Articles/217484/] + +RCU (read-copy update) is a synchronization mechanism that can be thought +of as a replacement for read-writer locking (among other things), but with +very low-overhead readers that are immune to deadlock, priority inversion, +and unbounded latency. RCU read-side critical sections are delimited +by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT +kernels, generate no code whatsoever. + +This means that RCU writers are unaware of the presence of concurrent +readers, so that RCU updates to shared data must be undertaken quite +carefully, leaving an old version of the data structure in place until all +pre-existing readers have finished. These old versions are needed because +such readers might hold a reference to them. RCU updates can therefore be +rather expensive, and RCU is thus best suited for read-mostly situations. + +How can an RCU writer possibly determine when all readers are finished, +given that readers might well leave absolutely no trace of their +presence? There is a synchronize_rcu() primitive that blocks until all +pre-existing readers have completed. An updater wishing to delete an +element p from a linked list might do the following, while holding an +appropriate lock, of course: + + list_del_rcu(p); + synchronize_rcu(); + kfree(p); + +But the above code cannot be used in IRQ context -- the call_rcu() +primitive must be used instead. This primitive takes a pointer to an +rcu_head struct placed within the RCU-protected data structure and +another pointer to a function that may be invoked later to free that +structure. Code to delete an element p from the linked list from IRQ +context might then be as follows: + + list_del_rcu(p); + call_rcu(&p->rcu, p_callback); + +Since call_rcu() never blocks, this code can safely be used from within +IRQ context. The function p_callback() might be defined as follows: + + static void p_callback(struct rcu_head *rp) + { + struct pstruct *p = container_of(rp, struct pstruct, rcu); + + kfree(p); + } + + +Unloading Modules That Use call_rcu() + +But what if p_callback is defined in an unloadable module? + +If we unload the module while some RCU callbacks are pending, +the CPUs executing these callbacks are going to be severely +disappointed when they are later invoked, as fancifully depicted at +http://lwn.net/images/ns/kernel/rcu-drop.jpg. + +We could try placing a synchronize_rcu() in the module-exit code path, +but this is not sufficient. Although synchronize_rcu() does wait for a +grace period to elapse, it does not wait for the callbacks to complete. + +One might be tempted to try several back-to-back synchronize_rcu() +calls, but this is still not guaranteed to work. If there is a very +heavy RCU-callback load, then some of the callbacks might be deferred +in order to allow other processing to proceed. Such deferral is required +in realtime kernels in order to avoid excessive scheduling latencies. + + +rcu_barrier() + +We instead need the rcu_barrier() primitive. This primitive is similar +to synchronize_rcu(), but instead of waiting solely for a grace +period to elapse, it also waits for all outstanding RCU callbacks to +complete. Pseudo-code using rcu_barrier() is as follows: + + 1. Prevent any new RCU callbacks from being posted. + 2. Execute rcu_barrier(). + 3. Allow the module to be unloaded. + +Quick Quiz #1: Why is there no srcu_barrier()? + +The rcutorture module makes use of rcu_barrier in its exit function +as follows: + + 1 static void + 2 rcu_torture_cleanup(void) + 3 { + 4 int i; + 5 + 6 fullstop = 1; + 7 if (shuffler_task != NULL) { + 8 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task"); + 9 kthread_stop(shuffler_task); +10 } +11 shuffler_task = NULL; +12 +13 if (writer_task != NULL) { +14 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task"); +15 kthread_stop(writer_task); +16 } +17 writer_task = NULL; +18 +19 if (reader_tasks != NULL) { +20 for (i = 0; i < nrealreaders; i++) { +21 if (reader_tasks[i] != NULL) { +22 VERBOSE_PRINTK_STRING( +23 "Stopping rcu_torture_reader task"); +24 kthread_stop(reader_tasks[i]); +25 } +26 reader_tasks[i] = NULL; +27 } +28 kfree(reader_tasks); +29 reader_tasks = NULL; +30 } +31 rcu_torture_current = NULL; +32 +33 if (fakewriter_tasks != NULL) { +34 for (i = 0; i < nfakewriters; i++) { +35 if (fakewriter_tasks[i] != NULL) { +36 VERBOSE_PRINTK_STRING( +37 "Stopping rcu_torture_fakewriter task"); +38 kthread_stop(fakewriter_tasks[i]); +39 } +40 fakewriter_tasks[i] = NULL; +41 } +42 kfree(fakewriter_tasks); +43 fakewriter_tasks = NULL; +44 } +45 +46 if (stats_task != NULL) { +47 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task"); +48 kthread_stop(stats_task); +49 } +50 stats_task = NULL; +51 +52 /* Wait for all RCU callbacks to fire. */ +53 rcu_barrier(); +54 +55 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ +56 +57 if (cur_ops->cleanup != NULL) +58 cur_ops->cleanup(); +59 if (atomic_read(&n_rcu_torture_error)) +60 rcu_torture_print_module_parms("End of test: FAILURE"); +61 else +62 rcu_torture_print_module_parms("End of test: SUCCESS"); +63 } + +Line 6 sets a global variable that prevents any RCU callbacks from +re-posting themselves. This will not be necessary in most cases, since +RCU callbacks rarely include calls to call_rcu(). However, the rcutorture +module is an exception to this rule, and therefore needs to set this +global variable. + +Lines 7-50 stop all the kernel tasks associated with the rcutorture +module. Therefore, once execution reaches line 53, no more rcutorture +RCU callbacks will be posted. The rcu_barrier() call on line 53 waits +for any pre-existing callbacks to complete. + +Then lines 55-62 print status and do operation-specific cleanup, and +then return, permitting the module-unload operation to be completed. + +Quick Quiz #2: Is there any other situation where rcu_barrier() might + be required? + +Your module might have additional complications. For example, if your +module invokes call_rcu() from timers, you will need to first cancel all +the timers, and only then invoke rcu_barrier() to wait for any remaining +RCU callbacks to complete. + + +Implementing rcu_barrier() + +Dipankar Sarma's implementation of rcu_barrier() makes use of the fact +that RCU callbacks are never reordered once queued on one of the per-CPU +queues. His implementation queues an RCU callback on each of the per-CPU +callback queues, and then waits until they have all started executing, at +which point, all earlier RCU callbacks are guaranteed to have completed. + +The original code for rcu_barrier() was as follows: + + 1 void rcu_barrier(void) + 2 { + 3 BUG_ON(in_interrupt()); + 4 /* Take cpucontrol mutex to protect against CPU hotplug */ + 5 mutex_lock(&rcu_barrier_mutex); + 6 init_completion(&rcu_barrier_completion); + 7 atomic_set(&rcu_barrier_cpu_count, 0); + 8 on_each_cpu(rcu_barrier_func, NULL, 0, 1); + 9 wait_for_completion(&rcu_barrier_completion); +10 mutex_unlock(&rcu_barrier_mutex); +11 } + +Line 3 verifies that the caller is in process context, and lines 5 and 10 +use rcu_barrier_mutex to ensure that only one rcu_barrier() is using the +global completion and counters at a time, which are initialized on lines +6 and 7. Line 8 causes each CPU to invoke rcu_barrier_func(), which is +shown below. Note that the final "1" in on_each_cpu()'s argument list +ensures that all the calls to rcu_barrier_func() will have completed +before on_each_cpu() returns. Line 9 then waits for the completion. + +This code was rewritten in 2008 to support rcu_barrier_bh() and +rcu_barrier_sched() in addition to the original rcu_barrier(). + +The rcu_barrier_func() runs on each CPU, where it invokes call_rcu() +to post an RCU callback, as follows: + + 1 static void rcu_barrier_func(void *notused) + 2 { + 3 int cpu = smp_processor_id(); + 4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + 5 struct rcu_head *head; + 6 + 7 head = &rdp->barrier; + 8 atomic_inc(&rcu_barrier_cpu_count); + 9 call_rcu(head, rcu_barrier_callback); +10 } + +Lines 3 and 4 locate RCU's internal per-CPU rcu_data structure, +which contains the struct rcu_head that needed for the later call to +call_rcu(). Line 7 picks up a pointer to this struct rcu_head, and line +8 increments a global counter. This counter will later be decremented +by the callback. Line 9 then registers the rcu_barrier_callback() on +the current CPU's queue. + +The rcu_barrier_callback() function simply atomically decrements the +rcu_barrier_cpu_count variable and finalizes the completion when it +reaches zero, as follows: + + 1 static void rcu_barrier_callback(struct rcu_head *notused) + 2 { + 3 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + 4 complete(&rcu_barrier_completion); + 5 } + +Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes + immediately (thus incrementing rcu_barrier_cpu_count to the + value one), but the other CPU's rcu_barrier_func() invocations + are delayed for a full grace period? Couldn't this result in + rcu_barrier() returning prematurely? + + +rcu_barrier() Summary + +The rcu_barrier() primitive has seen relatively little use, since most +code using RCU is in the core kernel rather than in modules. However, if +you are using RCU from an unloadable module, you need to use rcu_barrier() +so that your module may be safely unloaded. + + +Answers to Quick Quizzes + +Quick Quiz #1: Why is there no srcu_barrier()? + +Answer: Since there is no call_srcu(), there can be no outstanding SRCU + callbacks. Therefore, there is no need to wait for them. + +Quick Quiz #2: Is there any other situation where rcu_barrier() might + be required? + +Answer: Interestingly enough, rcu_barrier() was not originally + implemented for module unloading. Nikita Danilov was using + RCU in a filesystem, which resulted in a similar situation at + filesystem-unmount time. Dipankar Sarma coded up rcu_barrier() + in response, so that Nikita could invoke it during the + filesystem-unmount process. + + Much later, yours truly hit the RCU module-unload problem when + implementing rcutorture, and found that rcu_barrier() solves + this problem as well. + +Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes + immediately (thus incrementing rcu_barrier_cpu_count to the + value one), but the other CPU's rcu_barrier_func() invocations + are delayed for a full grace period? Couldn't this result in + rcu_barrier() returning prematurely? + +Answer: This cannot happen. The reason is that on_each_cpu() has its last + argument, the wait flag, set to "1". This flag is passed through + to smp_call_function() and further to smp_call_function_on_cpu(), + causing this latter to spin until the cross-CPU invocation of + rcu_barrier_func() has completed. This by itself would prevent + a grace period from completing on non-CONFIG_PREEMPT kernels, + since each CPU must undergo a context switch (or other quiescent + state) before the grace period can complete. However, this is + of no use in CONFIG_PREEMPT kernels. + + Therefore, on_each_cpu() disables preemption across its call + to smp_call_function() and also across the local call to + rcu_barrier_func(). This prevents the local CPU from context + switching, again preventing grace periods from completing. This + means that all CPUs have executed rcu_barrier_func() before + the first rcu_barrier_callback() can possibly execute, in turn + preventing rcu_barrier_cpu_count from prematurely reaching zero. + + Currently, -rt implementations of RCU keep but a single global + queue for RCU callbacks, and thus do not suffer from this + problem. However, when the -rt RCU eventually does have per-CPU + callback queues, things will have to change. One simple change + is to add an rcu_read_lock() before line 8 of rcu_barrier() + and an rcu_read_unlock() after line 8 of this same function. If + you can think of a better change, please let me know! -- cgit v1.2.3-70-g09d2 From 2884f00b94be73a6a7875bada739bf9bb2f9a1b6 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 26 Nov 2008 17:15:21 +0100 Subject: Document handling of bad memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document how to deal with bad memory reported with memtest. Signed-off-by: Jan-Simon Möller Signed-off-by: Pavel Machek Signed-off-by: Jonathan Corbet --- Documentation/bad_memory.txt | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 Documentation/bad_memory.txt (limited to 'Documentation') diff --git a/Documentation/bad_memory.txt b/Documentation/bad_memory.txt new file mode 100644 index 00000000000..df841621320 --- /dev/null +++ b/Documentation/bad_memory.txt @@ -0,0 +1,45 @@ +March 2008 +Jan-Simon Moeller, dl9pf@gmx.de + + +How to deal with bad memory e.g. reported by memtest86+ ? +######################################################### + +There are three possibilities I know of: + +1) Reinsert/swap the memory modules + +2) Buy new modules (best!) or try to exchange the memory + if you have spare-parts + +3) Use BadRAM or memmap + +This Howto is about number 3) . + + +BadRAM +###### +BadRAM is the actively developed and available as kernel-patch +here: http://rick.vanrein.org/linux/badram/ + +For more details see the BadRAM documentation. + +memmap +###### + +memmap is already in the kernel and usable as kernel-parameter at +boot-time. Its syntax is slightly strange and you may need to +calculate the values by yourself! + +Syntax to exclude a memory area (see kernel-parameters.txt for details): +memmap=$
+ +Example: memtest86+ reported here errors at address 0x18691458, 0x18698424 and + some others. All had 0x1869xxxx in common, so I chose a pattern of + 0x18690000,0xffff0000. + +With the numbers of the example above: +memmap=64K$0x18690000 + or +memmap=0x10000$0x18690000 + -- cgit v1.2.3-70-g09d2 From 8e26e1d7bce73acf6f995a4d252610e46ee831a5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 9 Dec 2008 14:41:44 +0200 Subject: UBI: document UBI ioctls Update the ioctl-numbers.txt file, add UBI and DVB there (because they use the same ioctl numbers). Signed-off-by: Artem Bityutskiy --- Documentation/ioctl/ioctl-number.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Documentation') diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index b880ce5dbd3..82469917443 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -97,6 +97,7 @@ Code Seq# Include File Comments 'M' all linux/soundcard.h 'N' 00-1F drivers/usb/scanner.h +'O' 00-02 include/mtd/ubi-user.h UBI 'P' all linux/soundcard.h 'Q' all linux/soundcard.h 'R' 00-1F linux/random.h @@ -142,6 +143,9 @@ Code Seq# Include File Comments 'n' 00-7F linux/ncp_fs.h 'n' E0-FF video/matrox.h matroxfb 'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2 +'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps) +'o' 40-41 include/mtd/ubi-user.h UBI +'o' 01-A1 include/linux/dvb/*.h DVB 'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this) 'p' 00-3F linux/mc146818rtc.h conflict! 'p' 40-7F linux/nvram.h -- cgit v1.2.3-70-g09d2 From 3b11ce7f542e415c90267b4482d4611410b468e6 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 17 Dec 2008 15:21:39 -0800 Subject: x86: use possible_cpus=NUM to extend the possible cpus allowed Impact: add new boot parameter Use possible_cpus=NUM kernel parameter to extend the number of possible cpus. The ability to HOTPLUG ON cpus that are "possible" but not "present" is dealt with in a later patch. Signed-off-by: Mike Travis --- Documentation/cpu-hotplug.txt | 17 +++++++++-------- arch/x86/kernel/apic.c | 20 ++++++++++++-------- arch/x86/kernel/smpboot.c | 25 +++++++++++++++++++++---- 3 files changed, 42 insertions(+), 20 deletions(-) (limited to 'Documentation') diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 94bbc27ddd4..9d620c153b0 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -50,16 +50,17 @@ additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets cpu_possible_map = cpu_present_map + additional_cpus (*) Option valid only for following architectures -- x86_64, ia64 +- ia64 -ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT -to determine the number of potentially hot-pluggable cpus. The implementation -should only rely on this to count the # of cpus, but *MUST* not rely on the -apicid values in those tables for disabled apics. In the event BIOS doesn't -mark such hot-pluggable cpus as disabled entries, one could use this -parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map. +ia64 uses the number of disabled local apics in ACPI tables MADT to +determine the number of potentially hot-pluggable cpus. The implementation +should only rely on this to count the # of cpus, but *MUST* not rely +on the apicid values in those tables for disabled apics. In the event +BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could +use this parameter "additional_cpus=x" to represent those cpus in the +cpu_possible_map. -possible_cpus=n [s390 only] use this to set hotpluggable cpus. +possible_cpus=n [s390,x86_64] use this to set hotpluggable cpus. This option sets possible_cpus bits in cpu_possible_map. Thus keeping the numbers of bits set constant even if the machine gets rebooted. diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 93cf2d13f33..f7a32a3beb2 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1819,28 +1819,32 @@ void disconnect_bsp_APIC(int virt_wire_setup) void __cpuinit generic_processor_info(int apicid, int version) { int cpu; - cpumask_t tmp_map; /* * Validate version */ if (version == 0x0) { pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); + "fixing up to 0x10. (tell your hw vendor)\n", + version); version = 0x10; } apic_version[apicid] = version; - if (num_processors >= NR_CPUS) { - pr_warning("WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); + if (num_processors >= nr_cpu_ids) { + int max = nr_cpu_ids; + int thiscpu = max + disabled_cpus; + + pr_warning( + "ACPI: NR_CPUS/possible_cpus limit of %i reached." + " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); + + disabled_cpus++; return; } num_processors++; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); + cpu = cpumask_next_zero(-1, cpu_present_mask); physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index be946678804..1a9941b1115 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1252,6 +1252,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus) check_nmi_watchdog(); } +static int __initdata setup_possible_cpus = -1; +static int __init _setup_possible_cpus(char *str) +{ + get_option(&str, &setup_possible_cpus); + return 0; +} +early_param("possible_cpus", _setup_possible_cpus); + + /* * cpu_possible_map should be static, it cannot change as cpu's * are onlined, or offlined. The reason is per-cpu data-structures @@ -1264,7 +1273,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) * * Three ways to find out the number of additional hotplug CPUs: * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with additional_cpus=NUM + * - The user can overwrite it with possible_cpus=NUM * - Otherwise don't reserve additional CPUs. * We do this because additional CPUs waste a lot of memory. * -AK @@ -1277,9 +1286,17 @@ __init void prefill_possible_map(void) if (!num_processors) num_processors = 1; - possible = num_processors + disabled_cpus; - if (possible > NR_CPUS) - possible = NR_CPUS; + if (setup_possible_cpus == -1) + possible = num_processors + disabled_cpus; + else + possible = setup_possible_cpus; + + if (possible > CONFIG_NR_CPUS) { + printk(KERN_WARNING + "%d Processors exceeds NR_CPUS limit of %d\n", + possible, CONFIG_NR_CPUS); + possible = CONFIG_NR_CPUS; + } printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); -- cgit v1.2.3-70-g09d2 From 64db4cfff99c04cd5f550357edcc8780f96b54a2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 18 Dec 2008 21:55:32 +0100 Subject: "Tree RCU": scalable classic RCU implementation This patch fixes a long-standing performance bug in classic RCU that results in massive internal-to-RCU lock contention on systems with more than a few hundred CPUs. Although this patch creates a separate flavor of RCU for ease of review and patch maintenance, it is intended to replace classic RCU. This patch still handles stress better than does mainline, so I am still calling it ready for inclusion. This patch is against the -tip tree. Nevertheless, experience on an actual 1000+ CPU machine would still be most welcome. Most of the changes noted below were found while creating an rcutiny (which should permit ejecting the current rcuclassic) and while doing detailed line-by-line documentation. Updates from v9 (http://lkml.org/lkml/2008/12/2/334): o Fixes from remainder of line-by-line code walkthrough, including comment spelling, initialization, undesirable narrowing due to type conversion, removing redundant memory barriers, removing redundant local-variable initialization, and removing redundant local variables. I do not believe that any of these fixes address the CPU-hotplug issues that Andi Kleen was seeing, but please do give it a whirl in case the machine is smarter than I am. A writeup from the walkthrough may be found at the following URL, in case you are suffering from terminal insomnia or masochism: http://www.kernel.org/pub/linux/kernel/people/paulmck/tmp/rcutree-walkthrough.2008.12.16a.pdf o Made rcutree tracing use seq_file, as suggested some time ago by Lai Jiangshan. o Added a .csv variant of the rcudata debugfs trace file, to allow people having thousands of CPUs to drop the data into a spreadsheet. Tested with oocalc and gnumeric. Updated documentation to suit. Updates from v8 (http://lkml.org/lkml/2008/11/15/139): o Fix a theoretical race between grace-period initialization and force_quiescent_state() that could occur if more than three jiffies were required to carry out the grace-period initialization. Which it might, if you had enough CPUs. o Apply Ingo's printk-standardization patch. o Substitute local variables for repeated accesses to global variables. o Fix comment misspellings and redundant (but harmless) increments of ->n_rcu_pending (this latter after having explicitly added it). o Apply checkpatch fixes. Updates from v7 (http://lkml.org/lkml/2008/10/10/291): o Fixed a number of problems noted by Gautham Shenoy, including the cpu-stall-detection bug that he was having difficulty convincing me was real. ;-) o Changed cpu-stall detection to wait for ten seconds rather than three in order to reduce false positive, as suggested by Ingo Molnar. o Produced a design document (http://lwn.net/Articles/305782/). The act of writing this document uncovered a number of both theoretical and "here and now" bugs as noted below. o Fix dynticks_nesting accounting confusion, simplify WARN_ON() condition, fix kerneldoc comments, and add memory barriers in dynticks interface functions. o Add more data to tracing. o Remove unused "rcu_barrier" field from rcu_data structure. o Count calls to rcu_pending() from scheduling-clock interrupt to use as a surrogate timebase should jiffies stop counting. o Fix a theoretical race between force_quiescent_state() and grace-period initialization. Yes, initialization does have to go on for some jiffies for this race to occur, but given enough CPUs... Updates from v6 (http://lkml.org/lkml/2008/9/23/448): o Fix a number of checkpatch.pl complaints. o Apply review comments from Ingo Molnar and Lai Jiangshan on the stall-detection code. o Fix several bugs in !CONFIG_SMP builds. o Fix a misspelled config-parameter name so that RCU now announces at boot time if stall detection is configured. o Run tests on numerous combinations of configurations parameters, which after the fixes above, now build and run correctly. Updates from v5 (http://lkml.org/lkml/2008/9/15/92, bad subject line): o Fix a compiler error in the !CONFIG_FANOUT_EXACT case (blew a changeset some time ago, and finally got around to retesting this option). o Fix some tracing bugs in rcupreempt that caused incorrect totals to be printed. o I now test with a more brutal random-selection online/offline script (attached). Probably more brutal than it needs to be on the people reading it as well, but so it goes. o A number of optimizations and usability improvements: o Make rcu_pending() ignore the grace-period timeout when there is no grace period in progress. o Make force_quiescent_state() avoid going for a global lock in the case where there is no grace period in progress. o Rearrange struct fields to improve struct layout. o Make call_rcu() initiate a grace period if RCU was idle, rather than waiting for the next scheduling clock interrupt. o Invoke rcu_irq_enter() and rcu_irq_exit() only when idle, as suggested by Andi Kleen. I still don't completely trust this change, and might back it out. o Make CONFIG_RCU_TRACE be the single config variable manipulated for all forms of RCU, instead of the prior confusion. o Document tracing files and formats for both rcupreempt and rcutree. Updates from v4 for those missing v5 given its bad subject line: o Separated dynticks interface so that NMIs and irqs call separate functions, greatly simplifying it. In particular, this code no longer requires a proof of correctness. ;-) o Separated dynticks state out into its own per-CPU structure, avoiding the duplicated accounting. o The case where a dynticks-idle CPU runs an irq handler that invokes call_rcu() is now correctly handled, forcing that CPU out of dynticks-idle mode. o Review comments have been applied (thank you all!!!). For but one example, fixed the dynticks-ordering issue that Manfred pointed out, saving me much debugging. ;-) o Adjusted rcuclassic and rcupreempt to handle dynticks changes. Attached is an updated patch to Classic RCU that applies a hierarchy, greatly reducing the contention on the top-level lock for large machines. This passes 10-hour concurrent rcutorture and online-offline testing on 128-CPU ppc64 without dynticks enabled, and exposes some timekeeping bugs in presence of dynticks (exciting working on a system where "sleep 1" hangs until interrupted...), which were fixed in the 2.6.27 kernel. It is getting more reliable than mainline by some measures, so the next version will be against -tip for inclusion. See also Manfred Spraul's recent patches (or his earlier work from 2004 at http://marc.info/?l=linux-kernel&m=108546384711797&w=2). We will converge onto a common patch in the fullness of time, but are currently exploring different regions of the design space. That said, I have already gratefully stolen quite a few of Manfred's ideas. This patch provides CONFIG_RCU_FANOUT, which controls the bushiness of the RCU hierarchy. Defaults to 32 on 32-bit machines and 64 on 64-bit machines. If CONFIG_NR_CPUS is less than CONFIG_RCU_FANOUT, there is no hierarchy. By default, the RCU initialization code will adjust CONFIG_RCU_FANOUT to balance the hierarchy, so strongly NUMA architectures may choose to set CONFIG_RCU_FANOUT_EXACT to disable this balancing, allowing the hierarchy to be exactly aligned to the underlying hardware. Up to two levels of hierarchy are permitted (in addition to the root node), allowing up to 16,384 CPUs on 32-bit systems and up to 262,144 CPUs on 64-bit systems. I just know that I am going to regret saying this, but this seems more than sufficient for the foreseeable future. (Some architectures might wish to set CONFIG_RCU_FANOUT=4, which would limit such architectures to 64 CPUs. If this becomes a real problem, additional levels can be added, but I doubt that it will make a significant difference on real hardware.) In the common case, a given CPU will manipulate its private rcu_data structure and the rcu_node structure that it shares with its immediate neighbors. This can reduce both lock and memory contention by multiple orders of magnitude, which should eliminate the need for the strange manipulations that are reported to be required when running Linux on very large systems. Some shortcomings: o More bugs will probably surface as a result of an ongoing line-by-line code inspection. Patches will be provided as required. o There are probably hangs, rcutorture failures, &c. Seems quite stable on a 128-CPU machine, but that is kind of small compared to 4096 CPUs. However, seems to do better than mainline. Patches will be provided as required. o The memory footprint of this version is several KB larger than rcuclassic. A separate UP-only rcutiny patch will be provided, which will reduce the memory footprint significantly, even compared to the old rcuclassic. One such patch passes light testing, and has a memory footprint smaller even than rcuclassic. Initial reaction from various embedded guys was "it is not worth it", so am putting it aside. Credits: o Manfred Spraul for ideas, review comments, and bugs spotted, as well as some good friendly competition. ;-) o Josh Triplett, Ingo Molnar, Peter Zijlstra, Mathieu Desnoyers, Lai Jiangshan, Andi Kleen, Andy Whitcroft, and Andrew Morton for reviews and comments. o Thomas Gleixner for much-needed help with some timer issues (see patches below). o Jon M. Tollefson, Tim Pepper, Andrew Theurer, Jose R. Santos, Andy Whitcroft, Darrick Wong, Nishanth Aravamudan, Anton Blanchard, Dave Kleikamp, and Nathan Lynch for keeping machines alive despite my heavy abuse^Wtesting. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- Documentation/RCU/00-INDEX | 2 + Documentation/RCU/trace.txt | 413 +++++++++ arch/powerpc/platforms/pseries/rtasd.c | 4 + include/linux/hardirq.h | 14 +- include/linux/rcupdate.h | 10 +- include/linux/rcutree.h | 329 +++++++ init/Kconfig | 18 +- kernel/Kconfig.preempt | 62 +- kernel/Makefile | 6 +- kernel/rcupreempt.c | 10 + kernel/rcupreempt_trace.c | 10 +- kernel/rcutree.c | 1535 ++++++++++++++++++++++++++++++++ kernel/rcutree_trace.c | 271 ++++++ kernel/softirq.c | 5 +- lib/Kconfig.debug | 13 + 15 files changed, 2671 insertions(+), 31 deletions(-) create mode 100644 Documentation/RCU/trace.txt create mode 100644 include/linux/rcutree.h create mode 100644 kernel/rcutree.c create mode 100644 kernel/rcutree_trace.c (limited to 'Documentation') diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX index 461481dfb7c..7dc0695a8f9 100644 --- a/Documentation/RCU/00-INDEX +++ b/Documentation/RCU/00-INDEX @@ -16,6 +16,8 @@ RTFP.txt - List of RCU papers (bibliography) going back to 1980. torture.txt - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST) +trace.txt + - CONFIG_RCU_TRACE debugfs files and formats UP.txt - RCU on Uniprocessor Systems whatisRCU.txt diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt new file mode 100644 index 00000000000..068848240a8 --- /dev/null +++ b/Documentation/RCU/trace.txt @@ -0,0 +1,413 @@ +CONFIG_RCU_TRACE debugfs Files and Formats + + +The rcupreempt and rcutree implementations of RCU provide debugfs trace +output that summarizes counters and state. This information is useful for +debugging RCU itself, and can sometimes also help to debug abuses of RCU. +Note that the rcuclassic implementation of RCU does not provide debugfs +trace output. + +The following sections describe the debugfs files and formats for +preemptable RCU (rcupreempt) and hierarchical RCU (rcutree). + + +Preemptable RCU debugfs Files and Formats + +This implementation of RCU provides three debugfs files under the +top-level directory RCU: rcu/rcuctrs (which displays the per-CPU +counters used by preemptable RCU) rcu/rcugp (which displays grace-period +counters), and rcu/rcustats (which internal counters for debugging RCU). + +The output of "cat rcu/rcuctrs" looks as follows: + +CPU last cur F M + 0 5 -5 0 0 + 1 -1 0 0 0 + 2 0 1 0 0 + 3 0 1 0 0 + 4 0 1 0 0 + 5 0 1 0 0 + 6 0 2 0 0 + 7 0 -1 0 0 + 8 0 1 0 0 +ggp = 26226, state = waitzero + +The per-CPU fields are as follows: + +o "CPU" gives the CPU number. Offline CPUs are not displayed. + +o "last" gives the value of the counter that is being decremented + for the current grace period phase. In the example above, + the counters sum to 4, indicating that there are still four + RCU read-side critical sections still running that started + before the last counter flip. + +o "cur" gives the value of the counter that is currently being + both incremented (by rcu_read_lock()) and decremented (by + rcu_read_unlock()). In the example above, the counters sum to + 1, indicating that there is only one RCU read-side critical section + still running that started after the last counter flip. + +o "F" indicates whether RCU is waiting for this CPU to acknowledge + a counter flip. In the above example, RCU is not waiting on any, + which is consistent with the state being "waitzero" rather than + "waitack". + +o "M" indicates whether RCU is waiting for this CPU to execute a + memory barrier. In the above example, RCU is not waiting on any, + which is consistent with the state being "waitzero" rather than + "waitmb". + +o "ggp" is the global grace-period counter. + +o "state" is the RCU state, which can be one of the following: + + o "idle": there is no grace period in progress. + + o "waitack": RCU just incremented the global grace-period + counter, which has the effect of reversing the roles of + the "last" and "cur" counters above, and is waiting for + all the CPUs to acknowledge the flip. Once the flip has + been acknowledged, CPUs will no longer be incrementing + what are now the "last" counters, so that their sum will + decrease monotonically down to zero. + + o "waitzero": RCU is waiting for the sum of the "last" counters + to decrease to zero. + + o "waitmb": RCU is waiting for each CPU to execute a memory + barrier, which ensures that instructions from a given CPU's + last RCU read-side critical section cannot be reordered + with instructions following the memory-barrier instruction. + +The output of "cat rcu/rcugp" looks as follows: + +oldggp=48870 newggp=48873 + +Note that reading from this file provokes a synchronize_rcu(). The +"oldggp" value is that of "ggp" from rcu/rcuctrs above, taken before +executing the synchronize_rcu(), and the "newggp" value is also the +"ggp" value, but taken after the synchronize_rcu() command returns. + + +The output of "cat rcu/rcugp" looks as follows: + +na=1337955 nl=40 wa=1337915 wl=44 da=1337871 dl=0 dr=1337871 di=1337871 +1=50989 e1=6138 i1=49722 ie1=82 g1=49640 a1=315203 ae1=265563 a2=49640 +z1=1401244 ze1=1351605 z2=49639 m1=5661253 me1=5611614 m2=49639 + +These are counters tracking internal preemptable-RCU events, however, +some of them may be useful for debugging algorithms using RCU. In +particular, the "nl", "wl", and "dl" values track the number of RCU +callbacks in various states. The fields are as follows: + +o "na" is the total number of RCU callbacks that have been enqueued + since boot. + +o "nl" is the number of RCU callbacks waiting for the previous + grace period to end so that they can start waiting on the next + grace period. + +o "wa" is the total number of RCU callbacks that have started waiting + for a grace period since boot. "na" should be roughly equal to + "nl" plus "wa". + +o "wl" is the number of RCU callbacks currently waiting for their + grace period to end. + +o "da" is the total number of RCU callbacks whose grace periods + have completed since boot. "wa" should be roughly equal to + "wl" plus "da". + +o "dr" is the total number of RCU callbacks that have been removed + from the list of callbacks ready to invoke. "dr" should be roughly + equal to "da". + +o "di" is the total number of RCU callbacks that have been invoked + since boot. "di" should be roughly equal to "da", though some + early versions of preemptable RCU had a bug so that only the + last CPU's count of invocations was displayed, rather than the + sum of all CPU's counts. + +o "1" is the number of calls to rcu_try_flip(). This should be + roughly equal to the sum of "e1", "i1", "a1", "z1", and "m1" + described below. In other words, the number of times that + the state machine is visited should be equal to the sum of the + number of times that each state is visited plus the number of + times that the state-machine lock acquisition failed. + +o "e1" is the number of times that rcu_try_flip() was unable to + acquire the fliplock. + +o "i1" is the number of calls to rcu_try_flip_idle(). + +o "ie1" is the number of times rcu_try_flip_idle() exited early + due to the calling CPU having no work for RCU. + +o "g1" is the number of times that rcu_try_flip_idle() decided + to start a new grace period. "i1" should be roughly equal to + "ie1" plus "g1". + +o "a1" is the number of calls to rcu_try_flip_waitack(). + +o "ae1" is the number of times that rcu_try_flip_waitack() found + that at least one CPU had not yet acknowledge the new grace period + (AKA "counter flip"). + +o "a2" is the number of time rcu_try_flip_waitack() found that + all CPUs had acknowledged. "a1" should be roughly equal to + "ae1" plus "a2". (This particular output was collected on + a 128-CPU machine, hence the smaller-than-usual fraction of + calls to rcu_try_flip_waitack() finding all CPUs having already + acknowledged.) + +o "z1" is the number of calls to rcu_try_flip_waitzero(). + +o "ze1" is the number of times that rcu_try_flip_waitzero() found + that not all of the old RCU read-side critical sections had + completed. + +o "z2" is the number of times that rcu_try_flip_waitzero() finds + the sum of the counters equal to zero, in other words, that + all of the old RCU read-side critical sections had completed. + The value of "z1" should be roughly equal to "ze1" plus + "z2". + +o "m1" is the number of calls to rcu_try_flip_waitmb(). + +o "me1" is the number of times that rcu_try_flip_waitmb() finds + that at least one CPU has not yet executed a memory barrier. + +o "m2" is the number of times that rcu_try_flip_waitmb() finds that + all CPUs have executed a memory barrier. + + +Hierarchical RCU debugfs Files and Formats + +This implementation of RCU provides three debugfs files under the +top-level directory RCU: rcu/rcudata (which displays fields in struct +rcu_data), rcu/rcugp (which displays grace-period counters), and +rcu/rcuhier (which displays the struct rcu_node hierarchy). + +The output of "cat rcu/rcudata" looks as follows: + +rcu: + 0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10 + 1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10 + 2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10 + 3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10 + 4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10 + 5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10 + 6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10 + 7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10 +rcu_bh: + 0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10 + 1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10 + 2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10 + 3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10 + 4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 + 5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 + 6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 + 7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 + +The first section lists the rcu_data structures for rcu, the second for +rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. +The fields are as follows: + +o The number at the beginning of each line is the CPU number. + CPUs numbers followed by an exclamation mark are offline, + but have been online at least once since boot. There will be + no output for CPUs that have never been online, which can be + a good thing in the surprisingly common case where NR_CPUS is + substantially larger than the number of actual CPUs. + +o "c" is the count of grace periods that this CPU believes have + completed. CPUs in dynticks idle mode may lag quite a ways + behind, for example, CPU 4 under "rcu" above, which has slept + through the past 25 RCU grace periods. It is not unusual to + see CPUs lagging by thousands of grace periods. + +o "g" is the count of grace periods that this CPU believes have + started. Again, CPUs in dynticks idle mode may lag behind. + If the "c" and "g" values are equal, this CPU has already + reported a quiescent state for the last RCU grace period that + it is aware of, otherwise, the CPU believes that it owes RCU a + quiescent state. + +o "pq" indicates that this CPU has passed through a quiescent state + for the current grace period. It is possible for "pq" to be + "1" and "c" different than "g", which indicates that although + the CPU has passed through a quiescent state, either (1) this + CPU has not yet reported that fact, (2) some other CPU has not + yet reported for this grace period, or (3) both. + +o "pqc" indicates which grace period the last-observed quiescent + state for this CPU corresponds to. This is important for handling + the race between CPU 0 reporting an extended dynticks-idle + quiescent state for CPU 1 and CPU 1 suddenly waking up and + reporting its own quiescent state. If CPU 1 was the last CPU + for the current grace period, then the CPU that loses this race + will attempt to incorrectly mark CPU 1 as having checked in for + the next grace period! + +o "qp" indicates that RCU still expects a quiescent state from + this CPU. + +o "rpfq" is the number of rcu_pending() calls on this CPU required + to induce this CPU to invoke force_quiescent_state(). + +o "rp" is low-order four hex digits of the count of how many times + rcu_pending() has been invoked on this CPU. + +o "dt" is the current value of the dyntick counter that is incremented + when entering or leaving dynticks idle state, either by the + scheduler or by irq. The number after the "/" is the interrupt + nesting depth when in dyntick-idle state, or one greater than + the interrupt-nesting depth otherwise. + + This field is displayed only for CONFIG_NO_HZ kernels. + +o "dn" is the current value of the dyntick counter that is incremented + when entering or leaving dynticks idle state via NMI. If both + the "dt" and "dn" values are even, then this CPU is in dynticks + idle mode and may be ignored by RCU. If either of these two + counters is odd, then RCU must be alert to the possibility of + an RCU read-side critical section running on this CPU. + + This field is displayed only for CONFIG_NO_HZ kernels. + +o "df" is the number of times that some other CPU has forced a + quiescent state on behalf of this CPU due to this CPU being in + dynticks-idle state. + + This field is displayed only for CONFIG_NO_HZ kernels. + +o "of" is the number of times that some other CPU has forced a + quiescent state on behalf of this CPU due to this CPU being + offline. In a perfect world, this might neve happen, but it + turns out that offlining and onlining a CPU can take several grace + periods, and so there is likely to be an extended period of time + when RCU believes that the CPU is online when it really is not. + Please note that erring in the other direction (RCU believing a + CPU is offline when it is really alive and kicking) is a fatal + error, so it makes sense to err conservatively. + +o "ri" is the number of times that RCU has seen fit to send a + reschedule IPI to this CPU in order to get it to report a + quiescent state. + +o "ql" is the number of RCU callbacks currently residing on + this CPU. This is the total number of callbacks, regardless + of what state they are in (new, waiting for grace period to + start, waiting for grace period to end, ready to invoke). + +o "b" is the batch limit for this CPU. If more than this number + of RCU callbacks is ready to invoke, then the remainder will + be deferred. + + +The output of "cat rcu/rcugp" looks as follows: + +rcu: completed=33062 gpnum=33063 +rcu_bh: completed=464 gpnum=464 + +Again, this output is for both "rcu" and "rcu_bh". The fields are +taken from the rcu_state structure, and are as follows: + +o "completed" is the number of grace periods that have completed. + It is comparable to the "c" field from rcu/rcudata in that a + CPU whose "c" field matches the value of "completed" is aware + that the corresponding RCU grace period has completed. + +o "gpnum" is the number of grace periods that have started. It is + comparable to the "g" field from rcu/rcudata in that a CPU + whose "g" field matches the value of "gpnum" is aware that the + corresponding RCU grace period has started. + + If these two fields are equal (as they are for "rcu_bh" above), + then there is no grace period in progress, in other words, RCU + is idle. On the other hand, if the two fields differ (as they + do for "rcu" above), then an RCU grace period is in progress. + + +The output of "cat rcu/rcuhier" looks as follows, with very long lines: + +c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 +1/1 0:127 ^0 +3/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 +3/3f 0:5 ^0 2/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 +rcu_bh: +c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 +0/1 0:127 ^0 +0/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 +0/3f 0:5 ^0 0/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 + +This is once again split into "rcu" and "rcu_bh" portions. The fields are +as follows: + +o "c" is exactly the same as "completed" under rcu/rcugp. + +o "g" is exactly the same as "gpnum" under rcu/rcugp. + +o "s" is the "signaled" state that drives force_quiescent_state()'s + state machine. + +o "jfq" is the number of jiffies remaining for this grace period + before force_quiescent_state() is invoked to help push things + along. Note that CPUs in dyntick-idle mode thoughout the grace + period will not report on their own, but rather must be check by + some other CPU via force_quiescent_state(). + +o "j" is the low-order four hex digits of the jiffies counter. + Yes, Paul did run into a number of problems that turned out to + be due to the jiffies counter no longer counting. Why do you ask? + +o "nfqs" is the number of calls to force_quiescent_state() since + boot. + +o "nfqsng" is the number of useless calls to force_quiescent_state(), + where there wasn't actually a grace period active. This can + happen due to races. The number in parentheses is the difference + between "nfqs" and "nfqsng", or the number of times that + force_quiescent_state() actually did some real work. + +o "fqlh" is the number of calls to force_quiescent_state() that + exited immediately (without even being counted in nfqs above) + due to contention on ->fqslock. + +o Each element of the form "1/1 0:127 ^0" represents one struct + rcu_node. Each line represents one level of the hierarchy, from + root to leaves. It is best to think of the rcu_data structures + as forming yet another level after the leaves. Note that there + might be either one, two, or three levels of rcu_node structures, + depending on the relationship between CONFIG_RCU_FANOUT and + CONFIG_NR_CPUS. + + o The numbers separated by the "/" are the qsmask followed + by the qsmaskinit. The qsmask will have one bit + set for each entity in the next lower level that + has not yet checked in for the current grace period. + The qsmaskinit will have one bit for each entity that is + currently expected to check in during each grace period. + The value of qsmaskinit is assigned to that of qsmask + at the beginning of each grace period. + + For example, for "rcu", the qsmask of the first entry + of the lowest level is 0x14, meaning that we are still + waiting for CPUs 2 and 4 to check in for the current + grace period. + + o The numbers separated by the ":" are the range of CPUs + served by this struct rcu_node. This can be helpful + in working out how the hierarchy is wired together. + + For example, the first entry at the lowest level shows + "0:5", indicating that it covers CPUs 0 through 5. + + o The number after the "^" indicates the bit in the + next higher level rcu_node structure that this + rcu_node structure corresponds to. + + For example, the first entry at the lowest level shows + "^0", indicating that it corresponds to bit zero in + the first entry at the middle level. diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c index f4e55be2eea..afad9f5ac0a 100644 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ b/arch/powerpc/platforms/pseries/rtasd.c @@ -208,6 +208,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) break; case ERR_TYPE_KERNEL_PANIC: default: + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } @@ -227,6 +228,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) /* Check to see if we need to or have stopped logging */ if (fatal || !logging_enabled) { logging_enabled = 0; + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } @@ -249,11 +251,13 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) else rtas_log_start += 1; + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); wake_up_interruptible(&rtas_log_wait); break; case ERR_TYPE_KERNEL_PANIC: default: + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 181006cc94a..9b70b923169 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -118,13 +118,17 @@ static inline void account_system_vtime(struct task_struct *tsk) } #endif -#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ) +#if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); +extern void rcu_nmi_enter(void); +extern void rcu_nmi_exit(void); #else # define rcu_irq_enter() do { } while (0) # define rcu_irq_exit() do { } while (0) -#endif /* CONFIG_PREEMPT_RCU */ +# define rcu_nmi_enter() do { } while (0) +# define rcu_nmi_exit() do { } while (0) +#endif /* #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) */ /* * It is safe to do non-atomic ops on ->hardirq_context, @@ -134,7 +138,6 @@ extern void rcu_irq_exit(void); */ #define __irq_enter() \ do { \ - rcu_irq_enter(); \ account_system_vtime(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ @@ -153,7 +156,6 @@ extern void irq_enter(void); trace_hardirq_exit(); \ account_system_vtime(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ - rcu_irq_exit(); \ } while (0) /* @@ -161,7 +163,7 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() do { lockdep_off(); rcu_nmi_enter(); __irq_enter(); } while (0) +#define nmi_exit() do { __irq_exit(); rcu_nmi_exit(); lockdep_on(); } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 86f1f5e43e3..bfd289aff57 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,11 +52,15 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; -#ifdef CONFIG_CLASSIC_RCU +#if defined(CONFIG_CLASSIC_RCU) #include -#else /* #ifdef CONFIG_CLASSIC_RCU */ +#elif defined(CONFIG_TREE_RCU) +#include +#elif defined(CONFIG_PREEMPT_RCU) #include -#endif /* #else #ifdef CONFIG_CLASSIC_RCU */ +#else +#error "Unknown RCU implementation specified to kernel configuration" +#endif /* #else #if defined(CONFIG_CLASSIC_RCU) */ #define RCU_HEAD_INIT { .next = NULL, .func = NULL } #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h new file mode 100644 index 00000000000..d4368b7975c --- /dev/null +++ b/include/linux/rcutree.h @@ -0,0 +1,329 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (tree-based version) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Dipankar Sarma + * Paul E. McKenney Hierarchical algorithm + * + * Based on the original work by Paul McKenney + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ + +#ifndef __LINUX_RCUTREE_H +#define __LINUX_RCUTREE_H + +#include +#include +#include +#include +#include +#include + +/* + * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. + * In theory, it should be possible to add more levels straightforwardly. + * In practice, this has not been tested, so there is probably some + * bug somewhere. + */ +#define MAX_RCU_LVLS 3 +#define RCU_FANOUT (CONFIG_RCU_FANOUT) +#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) +#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) + +#if NR_CPUS <= RCU_FANOUT +# define NUM_RCU_LVLS 1 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (NR_CPUS) +# define NUM_RCU_LVL_2 0 +# define NUM_RCU_LVL_3 0 +#elif NR_CPUS <= RCU_FANOUT_SQ +# define NUM_RCU_LVLS 2 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT) +# define NUM_RCU_LVL_2 (NR_CPUS) +# define NUM_RCU_LVL_3 0 +#elif NR_CPUS <= RCU_FANOUT_CUBE +# define NUM_RCU_LVLS 3 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ) +# define NUM_RCU_LVL_2 (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT)) +# define NUM_RCU_LVL_3 NR_CPUS +#else +# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" +#endif /* #if (NR_CPUS) <= RCU_FANOUT */ + +#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) +#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) + +/* + * Dynticks per-CPU state. + */ +struct rcu_dynticks { + int dynticks_nesting; /* Track nesting level, sort of. */ + int dynticks; /* Even value for dynticks-idle, else odd. */ + int dynticks_nmi; /* Even value for either dynticks-idle or */ + /* not in nmi handler, else odd. So this */ + /* remains even for nmi from irq handler. */ +}; + +/* + * Definition for node within the RCU grace-period-detection hierarchy. + */ +struct rcu_node { + spinlock_t lock; + unsigned long qsmask; /* CPUs or groups that need to switch in */ + /* order for current grace period to proceed.*/ + unsigned long qsmaskinit; + /* Per-GP initialization for qsmask. */ + unsigned long grpmask; /* Mask to apply to parent qsmask. */ + int grplo; /* lowest-numbered CPU or group here. */ + int grphi; /* highest-numbered CPU or group here. */ + u8 grpnum; /* CPU/group number for next level up. */ + u8 level; /* root is at level 0. */ + struct rcu_node *parent; +} ____cacheline_internodealigned_in_smp; + +/* Index values for nxttail array in struct rcu_data. */ +#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ +#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ +#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ +#define RCU_NEXT_TAIL 3 +#define RCU_NEXT_SIZE 4 + +/* Per-CPU data for read-copy update. */ +struct rcu_data { + /* 1) quiescent-state and grace-period handling : */ + long completed; /* Track rsp->completed gp number */ + /* in order to detect GP end. */ + long gpnum; /* Highest gp number that this CPU */ + /* is aware of having started. */ + long passed_quiesc_completed; + /* Value of completed at time of qs. */ + bool passed_quiesc; /* User-mode/idle loop etc. */ + bool qs_pending; /* Core waits for quiesc state. */ + bool beenonline; /* CPU online at least once. */ + struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ + unsigned long grpmask; /* Mask to apply to leaf qsmask. */ + + /* 2) batch handling */ + /* + * If nxtlist is not NULL, it is partitioned as follows. + * Any of the partitions might be empty, in which case the + * pointer to that partition will be equal to the pointer for + * the following partition. When the list is empty, all of + * the nxttail elements point to nxtlist, which is NULL. + * + * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]): + * Entries that might have arrived after current GP ended + * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): + * Entries known to have arrived before current GP ended + * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): + * Entries that batch # <= ->completed - 1: waiting for current GP + * [nxtlist, *nxttail[RCU_DONE_TAIL]): + * Entries that batch # <= ->completed + * The grace period for these entries has completed, and + * the other grace-period-completed entries may be moved + * here temporarily in rcu_process_callbacks(). + */ + struct rcu_head *nxtlist; + struct rcu_head **nxttail[RCU_NEXT_SIZE]; + long qlen; /* # of queued callbacks */ + long blimit; /* Upper limit on a processed batch */ + +#ifdef CONFIG_NO_HZ + /* 3) dynticks interface. */ + struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ + int dynticks_snap; /* Per-GP tracking for dynticks. */ + int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ +#endif /* #ifdef CONFIG_NO_HZ */ + + /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ +#ifdef CONFIG_NO_HZ + unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ +#endif /* #ifdef CONFIG_NO_HZ */ + unsigned long offline_fqs; /* Kicked due to being offline. */ + unsigned long resched_ipi; /* Sent a resched IPI. */ + + /* 5) state to allow this CPU to force_quiescent_state on others */ + long n_rcu_pending; /* rcu_pending() calls since boot. */ + long n_rcu_pending_force_qs; /* when to force quiescent states. */ + + int cpu; +}; + +/* Values for signaled field in struct rcu_state. */ +#define RCU_GP_INIT 0 /* Grace period being initialized. */ +#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */ +#define RCU_FORCE_QS 2 /* Need to force quiescent state. */ +#ifdef CONFIG_NO_HZ +#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK +#else /* #ifdef CONFIG_NO_HZ */ +#define RCU_SIGNAL_INIT RCU_FORCE_QS +#endif /* #else #ifdef CONFIG_NO_HZ */ + +#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ +#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ + /* to take at least one */ + /* scheduling clock irq */ + /* before ratting on them. */ + +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +/* + * RCU global state, including node hierarchy. This hierarchy is + * represented in "heap" form in a dense array. The root (first level) + * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second + * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), + * and the third level in ->node[m+1] and following (->node[m+1] referenced + * by ->level[2]). The number of levels is determined by the number of + * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" + * consisting of a single rcu_node. + */ +struct rcu_state { + struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ + struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ + u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ + u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ + struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ + + /* The following fields are guarded by the root rcu_node's lock. */ + + u8 signaled ____cacheline_internodealigned_in_smp; + /* Force QS state. */ + long gpnum; /* Current gp number. */ + long completed; /* # of last completed gp. */ + spinlock_t onofflock; /* exclude on/offline and */ + /* starting new GP. */ + spinlock_t fqslock; /* Only one task forcing */ + /* quiescent states. */ + unsigned long jiffies_force_qs; /* Time at which to invoke */ + /* force_quiescent_state(). */ + unsigned long n_force_qs; /* Number of calls to */ + /* force_quiescent_state(). */ + unsigned long n_force_qs_lh; /* ~Number of calls leaving */ + /* due to lock unavailable. */ + unsigned long n_force_qs_ngp; /* Number of calls leaving */ + /* due to no GP active. */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + unsigned long gp_start; /* Time at which GP started, */ + /* but in jiffies. */ + unsigned long jiffies_stall; /* Time at which to check */ + /* for CPU stalls. */ +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ +#ifdef CONFIG_NO_HZ + long dynticks_completed; /* Value of completed @ snap. */ +#endif /* #ifdef CONFIG_NO_HZ */ +}; + +extern struct rcu_state rcu_state; +DECLARE_PER_CPU(struct rcu_data, rcu_data); + +extern struct rcu_state rcu_bh_state; +DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); + +/* + * Increment the quiescent state counter. + * The counter is a bit degenerated: We do not need to know + * how many quiescent states passed, just if there was at least + * one since the start of the grace period. Thus just a flag. + */ +static inline void rcu_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + rdp->passed_quiesc = 1; + rdp->passed_quiesc_completed = rdp->completed; +} +static inline void rcu_bh_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); + rdp->passed_quiesc = 1; + rdp->passed_quiesc_completed = rdp->completed; +} + +extern int rcu_pending(int cpu); +extern int rcu_needs_cpu(int cpu); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern struct lockdep_map rcu_lock_map; +# define rcu_read_acquire() \ + lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) +#else +# define rcu_read_acquire() do { } while (0) +# define rcu_read_release() do { } while (0) +#endif + +static inline void __rcu_read_lock(void) +{ + preempt_disable(); + __acquire(RCU); + rcu_read_acquire(); +} +static inline void __rcu_read_unlock(void) +{ + rcu_read_release(); + __release(RCU); + preempt_enable(); +} +static inline void __rcu_read_lock_bh(void) +{ + local_bh_disable(); + __acquire(RCU_BH); + rcu_read_acquire(); +} +static inline void __rcu_read_unlock_bh(void) +{ + rcu_read_release(); + __release(RCU_BH); + local_bh_enable(); +} + +#define __synchronize_sched() synchronize_rcu() + +#define call_rcu_sched(head, func) call_rcu(head, func) + +static inline void rcu_init_sched(void) +{ +} + +extern void __rcu_init(void); +extern void rcu_check_callbacks(int cpu, int user); +extern void rcu_restart_cpu(int cpu); + +extern long rcu_batches_completed(void); +extern long rcu_batches_completed_bh(void); + +#ifdef CONFIG_NO_HZ +void rcu_enter_nohz(void); +void rcu_exit_nohz(void); +#else /* CONFIG_NO_HZ */ +static inline void rcu_enter_nohz(void) +{ +} +static inline void rcu_exit_nohz(void) +{ +} +#endif /* CONFIG_NO_HZ */ + +#endif /* __LINUX_RCUTREE_H */ diff --git a/init/Kconfig b/init/Kconfig index f763762d544..9dd7958a71f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -928,10 +928,16 @@ source "block/Kconfig" config PREEMPT_NOTIFIERS bool -config CLASSIC_RCU - def_bool !PREEMPT_RCU +config TREE_RCU_TRACE + def_bool RCU_TRACE && TREE_RCU + select DEBUG_FS help - This option selects the classic RCU implementation that is - designed for best read-side performance on non-realtime - systems. Classic RCU is the default. Note that the - PREEMPT_RCU symbol is used to select/deselect this option. + This option provides tracing for the TREE_RCU implementation, + permitting Makefile to trivially select kernel/rcutree_trace.c. + +config PREEMPT_RCU_TRACE + def_bool RCU_TRACE && PREEMPT_RCU + select DEBUG_FS + help + This option provides tracing for the PREEMPT_RCU implementation, + permitting Makefile to trivially select kernel/rcupreempt_trace.c. diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 9fdba03dc1f..463f29743ea 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -52,10 +52,29 @@ config PREEMPT endchoice +choice + prompt "RCU Implementation" + default CLASSIC_RCU + +config CLASSIC_RCU + bool "Classic RCU" + help + This option selects the classic RCU implementation that is + designed for best read-side performance on non-realtime + systems. + + Select this option if you are unsure. + +config TREE_RCU + bool "Tree-based hierarchical RCU" + help + This option selects the RCU implementation that is + designed for very large SMP system with hundreds or + thousands of CPUs. + config PREEMPT_RCU bool "Preemptible RCU" depends on PREEMPT - default n help This option reduces the latency of the kernel by making certain RCU sections preemptible. Normally RCU code is non-preemptible, if @@ -64,16 +83,47 @@ config PREEMPT_RCU now-naive assumptions about each RCU read-side critical section remaining on a given CPU through its execution. - Say N if you are unsure. +endchoice config RCU_TRACE - bool "Enable tracing for RCU - currently stats in debugfs" - depends on PREEMPT_RCU - select DEBUG_FS - default y + bool "Enable tracing for RCU" + depends on TREE_RCU || PREEMPT_RCU help This option provides tracing in RCU which presents stats in debugfs for debugging RCU implementation. Say Y here if you want to enable RCU tracing Say N if you are unsure. + +config RCU_FANOUT + int "Tree-based hierarchical RCU fanout value" + range 2 64 if 64BIT + range 2 32 if !64BIT + depends on TREE_RCU + default 64 if 64BIT + default 32 if !64BIT + help + This option controls the fanout of hierarchical implementations + of RCU, allowing RCU to work efficiently on machines with + large numbers of CPUs. This value must be at least the cube + root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit + systems and up to 262,144 for 64-bit systems. + + Select a specific number if testing RCU itself. + Take the default if unsure. + +config RCU_FANOUT_EXACT + bool "Disable tree-based hierarchical RCU auto-balancing" + depends on TREE_RCU + default n + help + This option forces use of the exact RCU_FANOUT value specified, + regardless of imbalances in the hierarchy. This is useful for + testing RCU itself, and might one day be useful on systems with + strong NUMA behavior. + + Without RCU_FANOUT_EXACT, the code will balance the hierarchy. + + Say n if unsure. + + diff --git a/kernel/Makefile b/kernel/Makefile index 19fad003b19..b4fdbbff5ec 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -74,10 +74,10 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o +obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o -ifeq ($(CONFIG_PREEMPT_RCU),y) -obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o -endif +obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o +obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 59236e8b9da..04982659875 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -551,6 +551,16 @@ void rcu_irq_exit(void) } } +void rcu_nmi_enter(void) +{ + rcu_irq_enter(); +} + +void rcu_nmi_exit(void) +{ + rcu_irq_exit(); +} + static void dyntick_save_progress_counter(int cpu) { struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c index 35c2d3360ec..7c2665cac17 100644 --- a/kernel/rcupreempt_trace.c +++ b/kernel/rcupreempt_trace.c @@ -149,12 +149,12 @@ static void rcupreempt_trace_sum(struct rcupreempt_trace *sp) sp->done_length += cp->done_length; sp->done_add += cp->done_add; sp->done_remove += cp->done_remove; - atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked)); + atomic_add(atomic_read(&cp->done_invoked), &sp->done_invoked); sp->rcu_check_callbacks += cp->rcu_check_callbacks; - atomic_set(&sp->rcu_try_flip_1, - atomic_read(&cp->rcu_try_flip_1)); - atomic_set(&sp->rcu_try_flip_e1, - atomic_read(&cp->rcu_try_flip_e1)); + atomic_add(atomic_read(&cp->rcu_try_flip_1), + &sp->rcu_try_flip_1); + atomic_add(atomic_read(&cp->rcu_try_flip_e1), + &sp->rcu_try_flip_e1); sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1; sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1; sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1; diff --git a/kernel/rcutree.c b/kernel/rcutree.c new file mode 100644 index 00000000000..a342b032112 --- /dev/null +++ b/kernel/rcutree.c @@ -0,0 +1,1535 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Authors: Dipankar Sarma + * Manfred Spraul + * Paul E. McKenney Hierarchical version + * + * Based on the original work by Paul McKenney + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key rcu_lock_key; +struct lockdep_map rcu_lock_map = + STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); +EXPORT_SYMBOL_GPL(rcu_lock_map); +#endif + +/* Data structures. */ + +#define RCU_STATE_INITIALIZER(name) { \ + .level = { &name.node[0] }, \ + .levelcnt = { \ + NUM_RCU_LVL_0, /* root of hierarchy. */ \ + NUM_RCU_LVL_1, \ + NUM_RCU_LVL_2, \ + NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \ + }, \ + .signaled = RCU_SIGNAL_INIT, \ + .gpnum = -300, \ + .completed = -300, \ + .onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \ + .fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \ + .n_force_qs = 0, \ + .n_force_qs_ngp = 0, \ +} + +struct rcu_state rcu_state = RCU_STATE_INITIALIZER(rcu_state); +DEFINE_PER_CPU(struct rcu_data, rcu_data); + +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); +DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); + +#ifdef CONFIG_NO_HZ +DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks); +#endif /* #ifdef CONFIG_NO_HZ */ + +static int blimit = 10; /* Maximum callbacks per softirq. */ +static int qhimark = 10000; /* If this many pending, ignore blimit. */ +static int qlowmark = 100; /* Once only this many pending, use blimit. */ + +static void force_quiescent_state(struct rcu_state *rsp, int relaxed); + +/* + * Return the number of RCU batches processed thus far for debug & stats. + */ +long rcu_batches_completed(void) +{ + return rcu_state.completed; +} +EXPORT_SYMBOL_GPL(rcu_batches_completed); + +/* + * Return the number of RCU BH batches processed thus far for debug & stats. + */ +long rcu_batches_completed_bh(void) +{ + return rcu_bh_state.completed; +} +EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); + +/* + * Does the CPU have callbacks ready to be invoked? + */ +static int +cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) +{ + return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; +} + +/* + * Does the current CPU require a yet-as-unscheduled grace period? + */ +static int +cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) +{ + /* ACCESS_ONCE() because we are accessing outside of lock. */ + return *rdp->nxttail[RCU_DONE_TAIL] && + ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum); +} + +/* + * Return the root node of the specified rcu_state structure. + */ +static struct rcu_node *rcu_get_root(struct rcu_state *rsp) +{ + return &rsp->node[0]; +} + +#ifdef CONFIG_SMP + +/* + * If the specified CPU is offline, tell the caller that it is in + * a quiescent state. Otherwise, whack it with a reschedule IPI. + * Grace periods can end up waiting on an offline CPU when that + * CPU is in the process of coming online -- it will be added to the + * rcu_node bitmasks before it actually makes it online. The same thing + * can happen while a CPU is in the process of coming online. Because this + * race is quite rare, we check for it after detecting that the grace + * period has been delayed rather than checking each and every CPU + * each and every time we start a new grace period. + */ +static int rcu_implicit_offline_qs(struct rcu_data *rdp) +{ + /* + * If the CPU is offline, it is in a quiescent state. We can + * trust its state not to change because interrupts are disabled. + */ + if (cpu_is_offline(rdp->cpu)) { + rdp->offline_fqs++; + return 1; + } + + /* The CPU is online, so send it a reschedule IPI. */ + if (rdp->cpu != smp_processor_id()) + smp_send_reschedule(rdp->cpu); + else + set_need_resched(); + rdp->resched_ipi++; + return 0; +} + +#endif /* #ifdef CONFIG_SMP */ + +#ifdef CONFIG_NO_HZ +static DEFINE_RATELIMIT_STATE(rcu_rs, 10 * HZ, 5); + +/** + * rcu_enter_nohz - inform RCU that current CPU is entering nohz + * + * Enter nohz mode, in other words, -leave- the mode in which RCU + * read-side critical sections can occur. (Though RCU read-side + * critical sections can occur in irq handlers in nohz mode, a possibility + * handled by rcu_irq_enter() and rcu_irq_exit()). + */ +void rcu_enter_nohz(void) +{ + unsigned long flags; + struct rcu_dynticks *rdtp; + + smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ + local_irq_save(flags); + rdtp = &__get_cpu_var(rcu_dynticks); + rdtp->dynticks++; + rdtp->dynticks_nesting--; + WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs); + local_irq_restore(flags); +} + +/* + * rcu_exit_nohz - inform RCU that current CPU is leaving nohz + * + * Exit nohz mode, in other words, -enter- the mode in which RCU + * read-side critical sections normally occur. + */ +void rcu_exit_nohz(void) +{ + unsigned long flags; + struct rcu_dynticks *rdtp; + + local_irq_save(flags); + rdtp = &__get_cpu_var(rcu_dynticks); + rdtp->dynticks++; + rdtp->dynticks_nesting++; + WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs); + local_irq_restore(flags); + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ +} + +/** + * rcu_nmi_enter - inform RCU of entry to NMI context + * + * If the CPU was idle with dynamic ticks active, and there is no + * irq handler running, this updates rdtp->dynticks_nmi to let the + * RCU grace-period handling know that the CPU is active. + */ +void rcu_nmi_enter(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (rdtp->dynticks & 0x1) + return; + rdtp->dynticks_nmi++; + WARN_ON_RATELIMIT(!(rdtp->dynticks_nmi & 0x1), &rcu_rs); + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ +} + +/** + * rcu_nmi_exit - inform RCU of exit from NMI context + * + * If the CPU was idle with dynamic ticks active, and there is no + * irq handler running, this updates rdtp->dynticks_nmi to let the + * RCU grace-period handling know that the CPU is no longer active. + */ +void rcu_nmi_exit(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (rdtp->dynticks & 0x1) + return; + smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ + rdtp->dynticks_nmi++; + WARN_ON_RATELIMIT(rdtp->dynticks_nmi & 0x1, &rcu_rs); +} + +/** + * rcu_irq_enter - inform RCU of entry to hard irq context + * + * If the CPU was idle with dynamic ticks active, this updates the + * rdtp->dynticks to let the RCU handling know that the CPU is active. + */ +void rcu_irq_enter(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (rdtp->dynticks_nesting++) + return; + rdtp->dynticks++; + WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs); + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ +} + +/** + * rcu_irq_exit - inform RCU of exit from hard irq context + * + * If the CPU was idle with dynamic ticks active, update the rdp->dynticks + * to put let the RCU handling be aware that the CPU is going back to idle + * with no ticks. + */ +void rcu_irq_exit(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (--rdtp->dynticks_nesting) + return; + smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ + rdtp->dynticks++; + WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs); + + /* If the interrupt queued a callback, get out of dyntick mode. */ + if (__get_cpu_var(rcu_data).nxtlist || + __get_cpu_var(rcu_bh_data).nxtlist) + set_need_resched(); +} + +/* + * Record the specified "completed" value, which is later used to validate + * dynticks counter manipulations. Specify "rsp->completed - 1" to + * unconditionally invalidate any future dynticks manipulations (which is + * useful at the beginning of a grace period). + */ +static void dyntick_record_completed(struct rcu_state *rsp, long comp) +{ + rsp->dynticks_completed = comp; +} + +#ifdef CONFIG_SMP + +/* + * Recall the previously recorded value of the completion for dynticks. + */ +static long dyntick_recall_completed(struct rcu_state *rsp) +{ + return rsp->dynticks_completed; +} + +/* + * Snapshot the specified CPU's dynticks counter so that we can later + * credit them with an implicit quiescent state. Return 1 if this CPU + * is already in a quiescent state courtesy of dynticks idle mode. + */ +static int dyntick_save_progress_counter(struct rcu_data *rdp) +{ + int ret; + int snap; + int snap_nmi; + + snap = rdp->dynticks->dynticks; + snap_nmi = rdp->dynticks->dynticks_nmi; + smp_mb(); /* Order sampling of snap with end of grace period. */ + rdp->dynticks_snap = snap; + rdp->dynticks_nmi_snap = snap_nmi; + ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0); + if (ret) + rdp->dynticks_fqs++; + return ret; +} + +/* + * Return true if the specified CPU has passed through a quiescent + * state by virtue of being in or having passed through an dynticks + * idle state since the last call to dyntick_save_progress_counter() + * for this same CPU. + */ +static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) +{ + long curr; + long curr_nmi; + long snap; + long snap_nmi; + + curr = rdp->dynticks->dynticks; + snap = rdp->dynticks_snap; + curr_nmi = rdp->dynticks->dynticks_nmi; + snap_nmi = rdp->dynticks_nmi_snap; + smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + + /* + * If the CPU passed through or entered a dynticks idle phase with + * no active irq/NMI handlers, then we can safely pretend that the CPU + * already acknowledged the request to pass through a quiescent + * state. Either way, that CPU cannot possibly be in an RCU + * read-side critical section that started before the beginning + * of the current RCU grace period. + */ + if ((curr != snap || (curr & 0x1) == 0) && + (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) { + rdp->dynticks_fqs++; + return 1; + } + + /* Go check for the CPU being offline. */ + return rcu_implicit_offline_qs(rdp); +} + +#endif /* #ifdef CONFIG_SMP */ + +#else /* #ifdef CONFIG_NO_HZ */ + +static void dyntick_record_completed(struct rcu_state *rsp, long comp) +{ +} + +#ifdef CONFIG_SMP + +/* + * If there are no dynticks, then the only way that a CPU can passively + * be in a quiescent state is to be offline. Unlike dynticks idle, which + * is a point in time during the prior (already finished) grace period, + * an offline CPU is always in a quiescent state, and thus can be + * unconditionally applied. So just return the current value of completed. + */ +static long dyntick_recall_completed(struct rcu_state *rsp) +{ + return rsp->completed; +} + +static int dyntick_save_progress_counter(struct rcu_data *rdp) +{ + return 0; +} + +static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) +{ + return rcu_implicit_offline_qs(rdp); +} + +#endif /* #ifdef CONFIG_SMP */ + +#endif /* #else #ifdef CONFIG_NO_HZ */ + +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + +static void record_gp_stall_check_time(struct rcu_state *rsp) +{ + rsp->gp_start = jiffies; + rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; +} + +static void print_other_cpu_stall(struct rcu_state *rsp) +{ + int cpu; + long delta; + unsigned long flags; + struct rcu_node *rnp = rcu_get_root(rsp); + struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; + struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES]; + + /* Only let one CPU complain about others per time interval. */ + + spin_lock_irqsave(&rnp->lock, flags); + delta = jiffies - rsp->jiffies_stall; + if (delta < RCU_STALL_RAT_DELAY || rsp->gpnum == rsp->completed) { + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; + spin_unlock_irqrestore(&rnp->lock, flags); + + /* OK, time to rat on our buddy... */ + + printk(KERN_ERR "INFO: RCU detected CPU stalls:"); + for (; rnp_cur < rnp_end; rnp_cur++) { + if (rnp_cur->qsmask == 0) + continue; + for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++) + if (rnp_cur->qsmask & (1UL << cpu)) + printk(" %d", rnp_cur->grplo + cpu); + } + printk(" (detected by %d, t=%ld jiffies)\n", + smp_processor_id(), (long)(jiffies - rsp->gp_start)); + force_quiescent_state(rsp, 0); /* Kick them all. */ +} + +static void print_cpu_stall(struct rcu_state *rsp) +{ + unsigned long flags; + struct rcu_node *rnp = rcu_get_root(rsp); + + printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n", + smp_processor_id(), jiffies - rsp->gp_start); + dump_stack(); + spin_lock_irqsave(&rnp->lock, flags); + if ((long)(jiffies - rsp->jiffies_stall) >= 0) + rsp->jiffies_stall = + jiffies + RCU_SECONDS_TILL_STALL_RECHECK; + spin_unlock_irqrestore(&rnp->lock, flags); + set_need_resched(); /* kick ourselves to get things going. */ +} + +static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) +{ + long delta; + struct rcu_node *rnp; + + delta = jiffies - rsp->jiffies_stall; + rnp = rdp->mynode; + if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { + + /* We haven't checked in, so go dump stack. */ + print_cpu_stall(rsp); + + } else if (rsp->gpnum != rsp->completed && + delta >= RCU_STALL_RAT_DELAY) { + + /* They had two time units to dump stack, so complain. */ + print_other_cpu_stall(rsp); + } +} + +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +static void record_gp_stall_check_time(struct rcu_state *rsp) +{ +} + +static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +/* + * Update CPU-local rcu_data state to record the newly noticed grace period. + * This is used both when we started the grace period and when we notice + * that someone else started the grace period. + */ +static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) +{ + rdp->qs_pending = 1; + rdp->passed_quiesc = 0; + rdp->gpnum = rsp->gpnum; + rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + + RCU_JIFFIES_TILL_FORCE_QS; +} + +/* + * Did someone else start a new RCU grace period start since we last + * checked? Update local state appropriately if so. Must be called + * on the CPU corresponding to rdp. + */ +static int +check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) +{ + unsigned long flags; + int ret = 0; + + local_irq_save(flags); + if (rdp->gpnum != rsp->gpnum) { + note_new_gpnum(rsp, rdp); + ret = 1; + } + local_irq_restore(flags); + return ret; +} + +/* + * Start a new RCU grace period if warranted, re-initializing the hierarchy + * in preparation for detecting the next grace period. The caller must hold + * the root node's ->lock, which is released before return. Hard irqs must + * be disabled. + */ +static void +rcu_start_gp(struct rcu_state *rsp, unsigned long flags) + __releases(rcu_get_root(rsp)->lock) +{ + struct rcu_data *rdp = rsp->rda[smp_processor_id()]; + struct rcu_node *rnp = rcu_get_root(rsp); + struct rcu_node *rnp_cur; + struct rcu_node *rnp_end; + + if (!cpu_needs_another_gp(rsp, rdp)) { + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + + /* Advance to a new grace period and initialize state. */ + rsp->gpnum++; + rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ + rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; + rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + + RCU_JIFFIES_TILL_FORCE_QS; + record_gp_stall_check_time(rsp); + dyntick_record_completed(rsp, rsp->completed - 1); + note_new_gpnum(rsp, rdp); + + /* + * Because we are first, we know that all our callbacks will + * be covered by this upcoming grace period, even the ones + * that were registered arbitrarily recently. + */ + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Special-case the common single-level case. */ + if (NUM_RCU_NODES == 1) { + rnp->qsmask = rnp->qsmaskinit; + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + + spin_unlock(&rnp->lock); /* leave irqs disabled. */ + + + /* Exclude any concurrent CPU-hotplug operations. */ + spin_lock(&rsp->onofflock); /* irqs already disabled. */ + + /* + * Set the quiescent-state-needed bits in all the non-leaf RCU + * nodes for all currently online CPUs. This operation relies + * on the layout of the hierarchy within the rsp->node[] array. + * Note that other CPUs will access only the leaves of the + * hierarchy, which still indicate that no grace period is in + * progress. In addition, we have excluded CPU-hotplug operations. + * + * We therefore do not need to hold any locks. Any required + * memory barriers will be supplied by the locks guarding the + * leaf rcu_nodes in the hierarchy. + */ + + rnp_end = rsp->level[NUM_RCU_LVLS - 1]; + for (rnp_cur = &rsp->node[0]; rnp_cur < rnp_end; rnp_cur++) + rnp_cur->qsmask = rnp_cur->qsmaskinit; + + /* + * Now set up the leaf nodes. Here we must be careful. First, + * we need to hold the lock in order to exclude other CPUs, which + * might be contending for the leaf nodes' locks. Second, as + * soon as we initialize a given leaf node, its CPUs might run + * up the rest of the hierarchy. We must therefore acquire locks + * for each node that we touch during this stage. (But we still + * are excluding CPU-hotplug operations.) + * + * Note that the grace period cannot complete until we finish + * the initialization process, as there will be at least one + * qsmask bit set in the root node until that time, namely the + * one corresponding to this CPU. + */ + rnp_end = &rsp->node[NUM_RCU_NODES]; + rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; + for (; rnp_cur < rnp_end; rnp_cur++) { + spin_lock(&rnp_cur->lock); /* irqs already disabled. */ + rnp_cur->qsmask = rnp_cur->qsmaskinit; + spin_unlock(&rnp_cur->lock); /* irqs already disabled. */ + } + + rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ + spin_unlock_irqrestore(&rsp->onofflock, flags); +} + +/* + * Advance this CPU's callbacks, but only if the current grace period + * has ended. This may be called only from the CPU to whom the rdp + * belongs. + */ +static void +rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) +{ + long completed_snap; + unsigned long flags; + + local_irq_save(flags); + completed_snap = ACCESS_ONCE(rsp->completed); /* outside of lock. */ + + /* Did another grace period end? */ + if (rdp->completed != completed_snap) { + + /* Advance callbacks. No harm if list empty. */ + rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Remember that we saw this grace-period completion. */ + rdp->completed = completed_snap; + } + local_irq_restore(flags); +} + +/* + * Similar to cpu_quiet(), for which it is a helper function. Allows + * a group of CPUs to be quieted at one go, though all the CPUs in the + * group must be represented by the same leaf rcu_node structure. + * That structure's lock must be held upon entry, and it is released + * before return. + */ +static void +cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, + unsigned long flags) + __releases(rnp->lock) +{ + /* Walk up the rcu_node hierarchy. */ + for (;;) { + if (!(rnp->qsmask & mask)) { + + /* Our bit has already been cleared, so done. */ + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + rnp->qsmask &= ~mask; + if (rnp->qsmask != 0) { + + /* Other bits still set at this level, so done. */ + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + mask = rnp->grpmask; + if (rnp->parent == NULL) { + + /* No more levels. Exit loop holding root lock. */ + + break; + } + spin_unlock_irqrestore(&rnp->lock, flags); + rnp = rnp->parent; + spin_lock_irqsave(&rnp->lock, flags); + } + + /* + * Get here if we are the last CPU to pass through a quiescent + * state for this grace period. Clean up and let rcu_start_gp() + * start up the next grace period if one is needed. Note that + * we still hold rnp->lock, as required by rcu_start_gp(), which + * will release it. + */ + rsp->completed = rsp->gpnum; + rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); + rcu_start_gp(rsp, flags); /* releases rnp->lock. */ +} + +/* + * Record a quiescent state for the specified CPU, which must either be + * the current CPU or an offline CPU. The lastcomp argument is used to + * make sure we are still in the grace period of interest. We don't want + * to end the current grace period based on quiescent states detected in + * an earlier grace period! + */ +static void +cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) +{ + unsigned long flags; + unsigned long mask; + struct rcu_node *rnp; + + rnp = rdp->mynode; + spin_lock_irqsave(&rnp->lock, flags); + if (lastcomp != ACCESS_ONCE(rsp->completed)) { + + /* + * Someone beat us to it for this grace period, so leave. + * The race with GP start is resolved by the fact that we + * hold the leaf rcu_node lock, so that the per-CPU bits + * cannot yet be initialized -- so we would simply find our + * CPU's bit already cleared in cpu_quiet_msk() if this race + * occurred. + */ + rdp->passed_quiesc = 0; /* try again later! */ + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + mask = rdp->grpmask; + if ((rnp->qsmask & mask) == 0) { + spin_unlock_irqrestore(&rnp->lock, flags); + } else { + rdp->qs_pending = 0; + + /* + * This GP can't end until cpu checks in, so all of our + * callbacks can be processed during the next GP. + */ + rdp = rsp->rda[smp_processor_id()]; + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */ + } +} + +/* + * Check to see if there is a new grace period of which this CPU + * is not yet aware, and if so, set up local rcu_data state for it. + * Otherwise, see if this CPU has just passed through its first + * quiescent state for this grace period, and record that fact if so. + */ +static void +rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) +{ + /* If there is now a new grace period, record and return. */ + if (check_for_new_grace_period(rsp, rdp)) + return; + + /* + * Does this CPU still need to do its part for current grace period? + * If no, return and let the other CPUs do their part as well. + */ + if (!rdp->qs_pending) + return; + + /* + * Was there a quiescent state since the beginning of the grace + * period? If no, then exit and wait for the next call. + */ + if (!rdp->passed_quiesc) + return; + + /* Tell RCU we are done (but cpu_quiet() will be the judge of that). */ + cpu_quiet(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); +} + +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy + * and move all callbacks from the outgoing CPU to the current one. + */ +static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) +{ + int i; + unsigned long flags; + long lastcomp; + unsigned long mask; + struct rcu_data *rdp = rsp->rda[cpu]; + struct rcu_data *rdp_me; + struct rcu_node *rnp; + + /* Exclude any attempts to start a new grace period. */ + spin_lock_irqsave(&rsp->onofflock, flags); + + /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ + rnp = rdp->mynode; + mask = rdp->grpmask; /* rnp->grplo is constant. */ + do { + spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->qsmaskinit &= ~mask; + if (rnp->qsmaskinit != 0) { + spin_unlock(&rnp->lock); /* irqs already disabled. */ + break; + } + mask = rnp->grpmask; + spin_unlock(&rnp->lock); /* irqs already disabled. */ + rnp = rnp->parent; + } while (rnp != NULL); + lastcomp = rsp->completed; + + spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + + /* Being offline is a quiescent state, so go record it. */ + cpu_quiet(cpu, rsp, rdp, lastcomp); + + /* + * Move callbacks from the outgoing CPU to the running CPU. + * Note that the outgoing CPU is now quiscent, so it is now + * (uncharacteristically) safe to access it rcu_data structure. + * Note also that we must carefully retain the order of the + * outgoing CPU's callbacks in order for rcu_barrier() to work + * correctly. Finally, note that we start all the callbacks + * afresh, even those that have passed through a grace period + * and are therefore ready to invoke. The theory is that hotplug + * events are rare, and that if they are frequent enough to + * indefinitely delay callbacks, you have far worse things to + * be worrying about. + */ + rdp_me = rsp->rda[smp_processor_id()]; + if (rdp->nxtlist != NULL) { + *rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; + rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + rdp->nxtlist = NULL; + for (i = 0; i < RCU_NEXT_SIZE; i++) + rdp->nxttail[i] = &rdp->nxtlist; + rdp_me->qlen += rdp->qlen; + rdp->qlen = 0; + } + local_irq_restore(flags); +} + +/* + * Remove the specified CPU from the RCU hierarchy and move any pending + * callbacks that it might have to the current CPU. This code assumes + * that at least one CPU in the system will remain running at all times. + * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. + */ +static void rcu_offline_cpu(int cpu) +{ + __rcu_offline_cpu(cpu, &rcu_state); + __rcu_offline_cpu(cpu, &rcu_bh_state); +} + +#else /* #ifdef CONFIG_HOTPLUG_CPU */ + +static void rcu_offline_cpu(int cpu) +{ +} + +#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ + +/* + * Invoke any RCU callbacks that have made it to the end of their grace + * period. Thottle as specified by rdp->blimit. + */ +static void rcu_do_batch(struct rcu_data *rdp) +{ + unsigned long flags; + struct rcu_head *next, *list, **tail; + int count; + + /* If no callbacks are ready, just return.*/ + if (!cpu_has_callbacks_ready_to_invoke(rdp)) + return; + + /* + * Extract the list of ready callbacks, disabling to prevent + * races with call_rcu() from interrupt handlers. + */ + local_irq_save(flags); + list = rdp->nxtlist; + rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; + *rdp->nxttail[RCU_DONE_TAIL] = NULL; + tail = rdp->nxttail[RCU_DONE_TAIL]; + for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) + if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) + rdp->nxttail[count] = &rdp->nxtlist; + local_irq_restore(flags); + + /* Invoke callbacks. */ + count = 0; + while (list) { + next = list->next; + prefetch(next); + list->func(list); + list = next; + if (++count >= rdp->blimit) + break; + } + + local_irq_save(flags); + + /* Update count, and requeue any remaining callbacks. */ + rdp->qlen -= count; + if (list != NULL) { + *tail = rdp->nxtlist; + rdp->nxtlist = list; + for (count = 0; count < RCU_NEXT_SIZE; count++) + if (&rdp->nxtlist == rdp->nxttail[count]) + rdp->nxttail[count] = tail; + else + break; + } + + /* Reinstate batch limit if we have worked down the excess. */ + if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) + rdp->blimit = blimit; + + local_irq_restore(flags); + + /* Re-raise the RCU softirq if there are callbacks remaining. */ + if (cpu_has_callbacks_ready_to_invoke(rdp)) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Check to see if this CPU is in a non-context-switch quiescent state + * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). + * Also schedule the RCU softirq handler. + * + * This function must be called with hardirqs disabled. It is normally + * invoked from the scheduling-clock interrupt. If rcu_pending returns + * false, there is no point in invoking rcu_check_callbacks(). + */ +void rcu_check_callbacks(int cpu, int user) +{ + if (user || + (idle_cpu(cpu) && !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + + /* + * Get here if this CPU took its interrupt from user + * mode or from the idle loop, and if this is not a + * nested interrupt. In this case, the CPU is in + * a quiescent state, so count it. + * + * No memory barrier is required here because both + * rcu_qsctr_inc() and rcu_bh_qsctr_inc() reference + * only CPU-local variables that other CPUs neither + * access nor modify, at least not while the corresponding + * CPU is online. + */ + + rcu_qsctr_inc(cpu); + rcu_bh_qsctr_inc(cpu); + + } else if (!in_softirq()) { + + /* + * Get here if this CPU did not take its interrupt from + * softirq, in other words, if it is not interrupting + * a rcu_bh read-side critical section. This is an _bh + * critical section, so count it. + */ + + rcu_bh_qsctr_inc(cpu); + } + raise_softirq(RCU_SOFTIRQ); +} + +#ifdef CONFIG_SMP + +/* + * Scan the leaf rcu_node structures, processing dyntick state for any that + * have not yet encountered a quiescent state, using the function specified. + * Returns 1 if the current grace period ends while scanning (possibly + * because we made it end). + */ +static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, + int (*f)(struct rcu_data *)) +{ + unsigned long bit; + int cpu; + unsigned long flags; + unsigned long mask; + struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; + struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES]; + + for (; rnp_cur < rnp_end; rnp_cur++) { + mask = 0; + spin_lock_irqsave(&rnp_cur->lock, flags); + if (rsp->completed != lastcomp) { + spin_unlock_irqrestore(&rnp_cur->lock, flags); + return 1; + } + if (rnp_cur->qsmask == 0) { + spin_unlock_irqrestore(&rnp_cur->lock, flags); + continue; + } + cpu = rnp_cur->grplo; + bit = 1; + for (; cpu <= rnp_cur->grphi; cpu++, bit <<= 1) { + if ((rnp_cur->qsmask & bit) != 0 && f(rsp->rda[cpu])) + mask |= bit; + } + if (mask != 0 && rsp->completed == lastcomp) { + + /* cpu_quiet_msk() releases rnp_cur->lock. */ + cpu_quiet_msk(mask, rsp, rnp_cur, flags); + continue; + } + spin_unlock_irqrestore(&rnp_cur->lock, flags); + } + return 0; +} + +/* + * Force quiescent states on reluctant CPUs, and also detect which + * CPUs are in dyntick-idle mode. + */ +static void force_quiescent_state(struct rcu_state *rsp, int relaxed) +{ + unsigned long flags; + long lastcomp; + struct rcu_data *rdp = rsp->rda[smp_processor_id()]; + struct rcu_node *rnp = rcu_get_root(rsp); + u8 signaled; + + if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum)) + return; /* No grace period in progress, nothing to force. */ + if (!spin_trylock_irqsave(&rsp->fqslock, flags)) { + rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ + return; /* Someone else is already on the job. */ + } + if (relaxed && + (long)(rsp->jiffies_force_qs - jiffies) >= 0 && + (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0) + goto unlock_ret; /* no emergency and done recently. */ + rsp->n_force_qs++; + spin_lock(&rnp->lock); + lastcomp = rsp->completed; + signaled = rsp->signaled; + rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; + rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + + RCU_JIFFIES_TILL_FORCE_QS; + if (lastcomp == rsp->gpnum) { + rsp->n_force_qs_ngp++; + spin_unlock(&rnp->lock); + goto unlock_ret; /* no GP in progress, time updated. */ + } + spin_unlock(&rnp->lock); + switch (signaled) { + case RCU_GP_INIT: + + break; /* grace period still initializing, ignore. */ + + case RCU_SAVE_DYNTICK: + + if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) + break; /* So gcc recognizes the dead code. */ + + /* Record dyntick-idle state. */ + if (rcu_process_dyntick(rsp, lastcomp, + dyntick_save_progress_counter)) + goto unlock_ret; + + /* Update state, record completion counter. */ + spin_lock(&rnp->lock); + if (lastcomp == rsp->completed) { + rsp->signaled = RCU_FORCE_QS; + dyntick_record_completed(rsp, lastcomp); + } + spin_unlock(&rnp->lock); + break; + + case RCU_FORCE_QS: + + /* Check dyntick-idle state, send IPI to laggarts. */ + if (rcu_process_dyntick(rsp, dyntick_recall_completed(rsp), + rcu_implicit_dynticks_qs)) + goto unlock_ret; + + /* Leave state in case more forcing is required. */ + + break; + } +unlock_ret: + spin_unlock_irqrestore(&rsp->fqslock, flags); +} + +#else /* #ifdef CONFIG_SMP */ + +static void force_quiescent_state(struct rcu_state *rsp, int relaxed) +{ + set_need_resched(); +} + +#endif /* #else #ifdef CONFIG_SMP */ + +/* + * This does the RCU processing work from softirq context for the + * specified rcu_state and rcu_data structures. This may be called + * only from the CPU to whom the rdp belongs. + */ +static void +__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) +{ + unsigned long flags; + + /* + * If an RCU GP has gone long enough, go check for dyntick + * idle CPUs and, if needed, send resched IPIs. + */ + if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || + (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0) + force_quiescent_state(rsp, 1); + + /* + * Advance callbacks in response to end of earlier grace + * period that some other CPU ended. + */ + rcu_process_gp_end(rsp, rdp); + + /* Update RCU state based on any recent quiescent states. */ + rcu_check_quiescent_state(rsp, rdp); + + /* Does this CPU require a not-yet-started grace period? */ + if (cpu_needs_another_gp(rsp, rdp)) { + spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); + rcu_start_gp(rsp, flags); /* releases above lock */ + } + + /* If there are callbacks ready, invoke them. */ + rcu_do_batch(rdp); +} + +/* + * Do softirq processing for the current CPU. + */ +static void rcu_process_callbacks(struct softirq_action *unused) +{ + /* + * Memory references from any prior RCU read-side critical sections + * executed by the interrupted code must be seen before any RCU + * grace-period manipulations below. + */ + smp_mb(); /* See above block comment. */ + + __rcu_process_callbacks(&rcu_state, &__get_cpu_var(rcu_data)); + __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + + /* + * Memory references from any later RCU read-side critical sections + * executed by the interrupted code must be seen after any RCU + * grace-period manipulations above. + */ + smp_mb(); /* See above block comment. */ +} + +static void +__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), + struct rcu_state *rsp) +{ + unsigned long flags; + struct rcu_data *rdp; + + head->func = func; + head->next = NULL; + + smp_mb(); /* Ensure RCU update seen before callback registry. */ + + /* + * Opportunistically note grace-period endings and beginnings. + * Note that we might see a beginning right after we see an + * end, but never vice versa, since this CPU has to pass through + * a quiescent state betweentimes. + */ + local_irq_save(flags); + rdp = rsp->rda[smp_processor_id()]; + rcu_process_gp_end(rsp, rdp); + check_for_new_grace_period(rsp, rdp); + + /* Add the callback to our list. */ + *rdp->nxttail[RCU_NEXT_TAIL] = head; + rdp->nxttail[RCU_NEXT_TAIL] = &head->next; + + /* Start a new grace period if one not already started. */ + if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum)) { + unsigned long nestflag; + struct rcu_node *rnp_root = rcu_get_root(rsp); + + spin_lock_irqsave(&rnp_root->lock, nestflag); + rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ + } + + /* Force the grace period if too many callbacks or too long waiting. */ + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = LONG_MAX; + force_quiescent_state(rsp, 0); + } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || + (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0) + force_quiescent_state(rsp, 1); + local_irq_restore(flags); +} + +/* + * Queue an RCU callback for invocation after a grace period. + */ +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_state); +} +EXPORT_SYMBOL_GPL(call_rcu); + +/* + * Queue an RCU for invocation after a quicker grace period. + */ +void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_bh_state); +} +EXPORT_SYMBOL_GPL(call_rcu_bh); + +/* + * Check to see if there is any immediate RCU-related work to be done + * by the current CPU, for the specified type of RCU, returning 1 if so. + * The checks are in order of increasing expense: checks that can be + * carried out against CPU-local state are performed first. However, + * we must check for CPU stalls first, else we might not get a chance. + */ +static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) +{ + rdp->n_rcu_pending++; + + /* Check for CPU stalls, if enabled. */ + check_cpu_stall(rsp, rdp); + + /* Is the RCU core waiting for a quiescent state from this CPU? */ + if (rdp->qs_pending) + return 1; + + /* Does this CPU have callbacks ready to invoke? */ + if (cpu_has_callbacks_ready_to_invoke(rdp)) + return 1; + + /* Has RCU gone idle with this CPU needing another grace period? */ + if (cpu_needs_another_gp(rsp, rdp)) + return 1; + + /* Has another RCU grace period completed? */ + if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */ + return 1; + + /* Has a new RCU grace period started? */ + if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */ + return 1; + + /* Has an RCU GP gone long enough to send resched IPIs &c? */ + if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && + ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || + (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)) + return 1; + + /* nothing to do */ + return 0; +} + +/* + * Check to see if there is any immediate RCU-related work to be done + * by the current CPU, returning 1 if so. This function is part of the + * RCU implementation; it is -not- an exported member of the RCU API. + */ +int rcu_pending(int cpu) +{ + return __rcu_pending(&rcu_state, &per_cpu(rcu_data, cpu)) || + __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)); +} + +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + */ +int rcu_needs_cpu(int cpu) +{ + /* RCU callbacks either ready or pending? */ + return per_cpu(rcu_data, cpu).nxtlist || + per_cpu(rcu_bh_data, cpu).nxtlist; +} + +/* + * Initialize a CPU's per-CPU RCU data. We take this "scorched earth" + * approach so that we don't have to worry about how long the CPU has + * been gone, or whether it ever was online previously. We do trust the + * ->mynode field, as it is constant for a given struct rcu_data and + * initialized during early boot. + * + * Note that only one online or offline event can be happening at a given + * time. Note also that we can accept some slop in the rsp->completed + * access due to the fact that this CPU cannot possibly have any RCU + * callbacks in flight yet. + */ +static void +rcu_init_percpu_data(int cpu, struct rcu_state *rsp) +{ + unsigned long flags; + int i; + long lastcomp; + unsigned long mask; + struct rcu_data *rdp = rsp->rda[cpu]; + struct rcu_node *rnp = rcu_get_root(rsp); + + /* Set up local state, ensuring consistent view of global state. */ + spin_lock_irqsave(&rnp->lock, flags); + lastcomp = rsp->completed; + rdp->completed = lastcomp; + rdp->gpnum = lastcomp; + rdp->passed_quiesc = 0; /* We could be racing with new GP, */ + rdp->qs_pending = 1; /* so set up to respond to current GP. */ + rdp->beenonline = 1; /* We have now been online. */ + rdp->passed_quiesc_completed = lastcomp - 1; + rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); + rdp->nxtlist = NULL; + for (i = 0; i < RCU_NEXT_SIZE; i++) + rdp->nxttail[i] = &rdp->nxtlist; + rdp->qlen = 0; + rdp->blimit = blimit; +#ifdef CONFIG_NO_HZ + rdp->dynticks = &per_cpu(rcu_dynticks, cpu); +#endif /* #ifdef CONFIG_NO_HZ */ + rdp->cpu = cpu; + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + + /* + * A new grace period might start here. If so, we won't be part + * of it, but that is OK, as we are currently in a quiescent state. + */ + + /* Exclude any attempts to start a new GP on large systems. */ + spin_lock(&rsp->onofflock); /* irqs already disabled. */ + + /* Add CPU to rcu_node bitmasks. */ + rnp = rdp->mynode; + mask = rdp->grpmask; + do { + /* Exclude any attempts to start a new GP on small systems. */ + spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->qsmaskinit |= mask; + mask = rnp->grpmask; + spin_unlock(&rnp->lock); /* irqs already disabled. */ + rnp = rnp->parent; + } while (rnp != NULL && !(rnp->qsmaskinit & mask)); + + spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + + /* + * A new grace period might start here. If so, we will be part of + * it, and its gpnum will be greater than ours, so we will + * participate. It is also possible for the gpnum to have been + * incremented before this function was called, and the bitmasks + * to not be filled out until now, in which case we will also + * participate due to our gpnum being behind. + */ + + /* Since it is coming online, the CPU is in a quiescent state. */ + cpu_quiet(cpu, rsp, rdp, lastcomp); + local_irq_restore(flags); +} + +static void __cpuinit rcu_online_cpu(int cpu) +{ +#ifdef CONFIG_NO_HZ + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + rdtp->dynticks_nesting = 1; + rdtp->dynticks |= 1; /* need consecutive #s even for hotplug. */ + rdtp->dynticks_nmi = (rdtp->dynticks_nmi + 1) & ~0x1; +#endif /* #ifdef CONFIG_NO_HZ */ + rcu_init_percpu_data(cpu, &rcu_state); + rcu_init_percpu_data(cpu, &rcu_bh_state); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); +} + +/* + * Handle CPU online/offline notifcation events. + */ +static int __cpuinit rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + rcu_online_cpu(cpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + rcu_offline_cpu(cpu); + break; + default: + break; + } + return NOTIFY_OK; +} + +/* + * Compute the per-level fanout, either using the exact fanout specified + * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. + */ +#ifdef CONFIG_RCU_FANOUT_EXACT +static void __init rcu_init_levelspread(struct rcu_state *rsp) +{ + int i; + + for (i = NUM_RCU_LVLS - 1; i >= 0; i--) + rsp->levelspread[i] = CONFIG_RCU_FANOUT; +} +#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ +static void __init rcu_init_levelspread(struct rcu_state *rsp) +{ + int ccur; + int cprv; + int i; + + cprv = NR_CPUS; + for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + ccur = rsp->levelcnt[i]; + rsp->levelspread[i] = (cprv + ccur - 1) / ccur; + cprv = ccur; + } +} +#endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ + +/* + * Helper function for rcu_init() that initializes one rcu_state structure. + */ +static void __init rcu_init_one(struct rcu_state *rsp) +{ + int cpustride = 1; + int i; + int j; + struct rcu_node *rnp; + + /* Initialize the level-tracking arrays. */ + + for (i = 1; i < NUM_RCU_LVLS; i++) + rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; + rcu_init_levelspread(rsp); + + /* Initialize the elements themselves, starting from the leaves. */ + + for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + cpustride *= rsp->levelspread[i]; + rnp = rsp->level[i]; + for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { + spin_lock_init(&rnp->lock); + rnp->qsmask = 0; + rnp->qsmaskinit = 0; + rnp->grplo = j * cpustride; + rnp->grphi = (j + 1) * cpustride - 1; + if (rnp->grphi >= NR_CPUS) + rnp->grphi = NR_CPUS - 1; + if (i == 0) { + rnp->grpnum = 0; + rnp->grpmask = 0; + rnp->parent = NULL; + } else { + rnp->grpnum = j % rsp->levelspread[i - 1]; + rnp->grpmask = 1UL << rnp->grpnum; + rnp->parent = rsp->level[i - 1] + + j / rsp->levelspread[i - 1]; + } + rnp->level = i; + } + } +} + +/* + * Helper macro for __rcu_init(). To be used nowhere else! + * Assigns leaf node pointers into each CPU's rcu_data structure. + */ +#define RCU_DATA_PTR_INIT(rsp, rcu_data) \ +do { \ + rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \ + j = 0; \ + for_each_possible_cpu(i) { \ + if (i > rnp[j].grphi) \ + j++; \ + per_cpu(rcu_data, i).mynode = &rnp[j]; \ + (rsp)->rda[i] = &per_cpu(rcu_data, i); \ + } \ +} while (0) + +static struct notifier_block __cpuinitdata rcu_nb = { + .notifier_call = rcu_cpu_notify, +}; + +void __init __rcu_init(void) +{ + int i; /* All used by RCU_DATA_PTR_INIT(). */ + int j; + struct rcu_node *rnp; + + printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n"); +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + rcu_init_one(&rcu_state); + RCU_DATA_PTR_INIT(&rcu_state, rcu_data); + rcu_init_one(&rcu_bh_state); + RCU_DATA_PTR_INIT(&rcu_bh_state, rcu_bh_data); + + for_each_online_cpu(i) + rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i); + /* Register notifier for non-boot CPUs */ + register_cpu_notifier(&rcu_nb); + printk(KERN_WARNING "Experimental hierarchical RCU init done.\n"); +} + +module_param(blimit, int, 0); +module_param(qhimark, int, 0); +module_param(qlowmark, int, 0); diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c new file mode 100644 index 00000000000..d6db3e83782 --- /dev/null +++ b/kernel/rcutree_trace.c @@ -0,0 +1,271 @@ +/* + * Read-Copy Update tracing for classic implementation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Papers: http://www.rdrop.com/users/paulmck/RCU + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) +{ + if (!rdp->beenonline) + return; + seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x", + rdp->cpu, + cpu_is_offline(rdp->cpu) ? '!' : ' ', + rdp->completed, rdp->gpnum, + rdp->passed_quiesc, rdp->passed_quiesc_completed, + rdp->qs_pending, + rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending, + (int)(rdp->n_rcu_pending & 0xffff)); +#ifdef CONFIG_NO_HZ + seq_printf(m, " dt=%d/%d dn=%d df=%lu", + rdp->dynticks->dynticks, + rdp->dynticks->dynticks_nesting, + rdp->dynticks->dynticks_nmi, + rdp->dynticks_fqs); +#endif /* #ifdef CONFIG_NO_HZ */ + seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); + seq_printf(m, " ql=%ld b=%ld\n", rdp->qlen, rdp->blimit); +} + +#define PRINT_RCU_DATA(name, func, m) \ + do { \ + int _p_r_d_i; \ + \ + for_each_possible_cpu(_p_r_d_i) \ + func(m, &per_cpu(name, _p_r_d_i)); \ + } while (0) + +static int show_rcudata(struct seq_file *m, void *unused) +{ + seq_puts(m, "rcu:\n"); + PRINT_RCU_DATA(rcu_data, print_one_rcu_data, m); + seq_puts(m, "rcu_bh:\n"); + PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m); + return 0; +} + +static int rcudata_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcudata, NULL); +} + +static struct file_operations rcudata_fops = { + .owner = THIS_MODULE, + .open = rcudata_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) +{ + if (!rdp->beenonline) + return; + seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld", + rdp->cpu, + cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"", + rdp->completed, rdp->gpnum, + rdp->passed_quiesc, rdp->passed_quiesc_completed, + rdp->qs_pending, + rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending, + rdp->n_rcu_pending); +#ifdef CONFIG_NO_HZ + seq_printf(m, ",%d,%d,%d,%lu", + rdp->dynticks->dynticks, + rdp->dynticks->dynticks_nesting, + rdp->dynticks->dynticks_nmi, + rdp->dynticks_fqs); +#endif /* #ifdef CONFIG_NO_HZ */ + seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); + seq_printf(m, ",%ld,%ld\n", rdp->qlen, rdp->blimit); +} + +static int show_rcudata_csv(struct seq_file *m, void *unused) +{ + seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\","); +#ifdef CONFIG_NO_HZ + seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); +#endif /* #ifdef CONFIG_NO_HZ */ + seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n"); + seq_puts(m, "\"rcu:\"\n"); + PRINT_RCU_DATA(rcu_data, print_one_rcu_data_csv, m); + seq_puts(m, "\"rcu_bh:\"\n"); + PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m); + return 0; +} + +static int rcudata_csv_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcudata_csv, NULL); +} + +static struct file_operations rcudata_csv_fops = { + .owner = THIS_MODULE, + .open = rcudata_csv_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) +{ + int level = 0; + struct rcu_node *rnp; + + seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x " + "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", + rsp->completed, rsp->gpnum, rsp->signaled, + (long)(rsp->jiffies_force_qs - jiffies), + (int)(jiffies & 0xffff), + rsp->n_force_qs, rsp->n_force_qs_ngp, + rsp->n_force_qs - rsp->n_force_qs_ngp, + rsp->n_force_qs_lh); + for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { + if (rnp->level != level) { + seq_puts(m, "\n"); + level = rnp->level; + } + seq_printf(m, "%lx/%lx %d:%d ^%d ", + rnp->qsmask, rnp->qsmaskinit, + rnp->grplo, rnp->grphi, rnp->grpnum); + } + seq_puts(m, "\n"); +} + +static int show_rcuhier(struct seq_file *m, void *unused) +{ + seq_puts(m, "rcu:\n"); + print_one_rcu_state(m, &rcu_state); + seq_puts(m, "rcu_bh:\n"); + print_one_rcu_state(m, &rcu_bh_state); + return 0; +} + +static int rcuhier_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcuhier, NULL); +} + +static struct file_operations rcuhier_fops = { + .owner = THIS_MODULE, + .open = rcuhier_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int show_rcugp(struct seq_file *m, void *unused) +{ + seq_printf(m, "rcu: completed=%ld gpnum=%ld\n", + rcu_state.completed, rcu_state.gpnum); + seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n", + rcu_bh_state.completed, rcu_bh_state.gpnum); + return 0; +} + +static int rcugp_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcugp, NULL); +} + +static struct file_operations rcugp_fops = { + .owner = THIS_MODULE, + .open = rcugp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir; +static int __init rcuclassic_trace_init(void) +{ + rcudir = debugfs_create_dir("rcu", NULL); + if (!rcudir) + goto out; + + datadir = debugfs_create_file("rcudata", 0444, rcudir, + NULL, &rcudata_fops); + if (!datadir) + goto free_out; + + datadir_csv = debugfs_create_file("rcudata.csv", 0444, rcudir, + NULL, &rcudata_csv_fops); + if (!datadir_csv) + goto free_out; + + gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops); + if (!gpdir) + goto free_out; + + hierdir = debugfs_create_file("rcuhier", 0444, rcudir, + NULL, &rcuhier_fops); + if (!hierdir) + goto free_out; + return 0; +free_out: + if (datadir) + debugfs_remove(datadir); + if (datadir_csv) + debugfs_remove(datadir_csv); + if (gpdir) + debugfs_remove(gpdir); + debugfs_remove(rcudir); +out: + return 1; +} + +static void __exit rcuclassic_trace_cleanup(void) +{ + debugfs_remove(datadir); + debugfs_remove(datadir_csv); + debugfs_remove(gpdir); + debugfs_remove(hierdir); + debugfs_remove(rcudir); +} + + +module_init(rcuclassic_trace_init); +module_exit(rcuclassic_trace_cleanup); + +MODULE_AUTHOR("Paul E. McKenney"); +MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation"); +MODULE_LICENSE("GPL"); diff --git a/kernel/softirq.c b/kernel/softirq.c index e7c69a720d6..80d323e6f61 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -269,6 +269,7 @@ void irq_enter(void) { int cpu = smp_processor_id(); + rcu_irq_enter(); if (idle_cpu(cpu) && !in_interrupt()) { __irq_enter(); tick_check_idle(cpu); @@ -295,9 +296,9 @@ void irq_exit(void) #ifdef CONFIG_NO_HZ /* Make sure that timer wheel updates are propagated */ - if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) - tick_nohz_stop_sched_tick(0); rcu_irq_exit(); + if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) + tick_nohz_stop_sched_tick(0); #endif preempt_enable_no_resched(); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b0f239e443b..465d822f3f5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -619,6 +619,19 @@ config RCU_CPU_STALL_DETECTOR Say N if you are unsure. +config RCU_CPU_STALL_DETECTOR + bool "Check for stalled CPUs delaying RCU grace periods" + depends on CLASSIC_RCU || TREE_RCU + default n + help + This option causes RCU to printk information on which + CPUs are delaying the current grace period, but only when + the grace period extends for excessive time periods. + + Say Y if you want RCU to perform such checks. + + Say N if you are unsure. + config KPROBES_SANITY_TEST bool "Kprobes sanity tests" depends on DEBUG_KERNEL -- cgit v1.2.3-70-g09d2 From d62720ade82c5e5b8f9585e5ed02c89573ebf111 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 17 Dec 2008 14:14:30 -0800 Subject: sysfs: add documentation to cputopology.txt for system cpumasks Add information to cputopology.txt explaining the output of various system cpumask's. Signed-off-by: Mike Travis Signed-off-by: Rusty Russell Acked-by: Greg Kroah-Hartman --- Documentation/cputopology.txt | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'Documentation') diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index bd699da2466..45932ec21ce 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt @@ -31,3 +31,51 @@ not defined by include/asm-XXX/topology.h: 2) core_id: 0 3) thread_siblings: just the given CPU 4) core_siblings: just the given CPU + +Additionally, cpu topology information is provided under +/sys/devices/system/cpu and includes these files. The internal +source for the output is in brackets ("[]"). + + kernel_max: the maximum cpu index allowed by the kernel configuration. + [NR_CPUS-1] + + offline: cpus that are not online because they have been + HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit + of cpus allowed by the kernel configuration (kernel_max + above). [~cpu_online_mask + cpus >= NR_CPUS] + + online: cpus that are online and being scheduled [cpu_online_mask] + + possible: cpus that have been allocated resources and can be + brought online if they are present. [cpu_possible_mask] + + present: cpus that have been identified as being present in the + system. [cpu_present_mask] + +The format for the above output is compatible with cpulist_parse() +[see ]. Some examples follow. + +In this example, there are 64 cpus in the system but cpus 32-63 exceed +the kernel max which is limited to 0..31 by the NR_CPUS config option +being 32. Note also that cpus 2 and 4-31 are not online but could be +brought online as they are both present and possible. + + kernel_max: 31 + offline: 2,4-31,32-63 + online: 0-1,3 + possible: 0-31 + present: 0-31 + +In this example, the NR_CPUS config option is 128, but the kernel was +started with possible_cpus=144. There are 4 cpus in the system and cpu2 +was manually taken offline (and is the only cpu that can be brought +online.) + + kernel_max: 127 + offline: 2,4-127,128-143 + online: 0-1,3 + possible: 0-127 + present: 0-3 + +See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter +as well as more information on the various cpumask's. -- cgit v1.2.3-70-g09d2 From ba84ed9546e91348fdf3ff2bff859b0ee53b407a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 26 Oct 2008 20:56:30 +0100 Subject: ACPI hibernate: Introduce new kernel parameter acpi_sleep=s4_nonvs On some machines it may be necessary to disable the saving/restoring of the ACPI NVS memory region during hibernation/resume. For this purpose, introduce new ACPI kernel command line option acpi_sleep=s4_nonvs. Based on a patch by Zhang Rui. Signed-off-by: Rafael J. Wysocki Acked-by: Nigel Cunningham Acked-by: Pavel Machek Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 5 ++++- arch/x86/kernel/acpi/sleep.c | 2 ++ drivers/acpi/sleep/main.c | 18 ++++++++++++++++-- include/linux/acpi.h | 1 + 4 files changed, 23 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e0f346d201e..1d089eeff3c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -149,7 +149,8 @@ and is between 256 and 4096 characters. It is defined in the file default: 0 acpi_sleep= [HW,ACPI] Sleep options - Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, old_ordering } + Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, + old_ordering, s4_nonvs } See Documentation/power/video.txt for s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep as soon as the kernel's real-mode entry point is called. @@ -159,6 +160,8 @@ and is between 256 and 4096 characters. It is defined in the file control method, wrt putting devices into low power states, to be enforced (the ACPI 2.0 ordering of _PTS is used by default). + s4_nonvs prevents the kernel from saving/restoring the + ACPI NVS memory during hibernation. acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode Format: { level | edge | high | low } diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 806b4e9051b..707c1f6f95f 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -159,6 +159,8 @@ static int __init acpi_sleep_setup(char *str) #endif if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); + if (strncmp(str, "s4_nonvs", 8) == 0) + acpi_s4_no_nvs(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index 45a8015e421..bef41fd4c87 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -101,6 +101,19 @@ void __init acpi_old_suspend_ordering(void) * cases. */ static bool set_sci_en_on_resume; +/* + * The ACPI specification wants us to save NVS memory regions during hibernation + * and to restore them during the subsequent resume. However, it is not certain + * if this mechanism is going to work on all machines, so we allow the user to + * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line + * option. + */ +static bool s4_no_nvs; + +void __init acpi_s4_no_nvs(void) +{ + s4_no_nvs = true; +} /** * acpi_pm_disable_gpes - Disable the GPEs. @@ -396,7 +409,7 @@ static int acpi_hibernation_begin(void) { int error; - error = hibernate_nvs_alloc(); + error = s4_no_nvs ? 0 : hibernate_nvs_alloc(); if (!error) { acpi_target_sleep_state = ACPI_STATE_S4; acpi_sleep_tts_switch(acpi_target_sleep_state); @@ -494,7 +507,8 @@ static int acpi_hibernation_begin_old(void) error = acpi_sleep_prepare(ACPI_STATE_S4); if (!error) { - error = hibernate_nvs_alloc(); + if (!s4_no_nvs) + error = hibernate_nvs_alloc(); if (!error) acpi_target_sleep_state = ACPI_STATE_S4; } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fba8051fb29..dfa0a5356c5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -270,6 +270,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, #ifdef CONFIG_PM_SLEEP void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); +void __init acpi_s4_no_nvs(void); #endif /* CONFIG_PM_SLEEP */ #else /* CONFIG_ACPI */ -- cgit v1.2.3-70-g09d2 From ada9cfdd158abb8169873dc8e5ae39b1ec6ffa8c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 19 Dec 2008 10:57:32 -0800 Subject: doc: fix kernel-parameters.txt formatting Spell out "wrt". I suspect plenty of people won't know what that means. Fix a '}' that should be a ']'. Reformat long lines into shorter lines. Signed-off-by: Randy Dunlap Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 40 ++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1d089eeff3c..350e71960a9 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -151,15 +151,16 @@ and is between 256 and 4096 characters. It is defined in the file acpi_sleep= [HW,ACPI] Sleep options Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, old_ordering, s4_nonvs } - See Documentation/power/video.txt for s3_bios and s3_mode. + See Documentation/power/video.txt for information on + s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep as soon as the kernel's real-mode entry point is called. s4_nohwsig prevents ACPI hardware signature from being used during resume from hibernation. old_ordering causes the ACPI 1.0 ordering of the _PTS - control method, wrt putting devices into low power - states, to be enforced (the ACPI 2.0 ordering of _PTS is - used by default). + control method, with respect to putting devices into + low power states, to be enforced (the ACPI 2.0 ordering + of _PTS is used by default). s4_nonvs prevents the kernel from saving/restoring the ACPI NVS memory during hibernation. @@ -196,7 +197,7 @@ and is between 256 and 4096 characters. It is defined in the file acpi_skip_timer_override [HW,ACPI] Recognize and ignore IRQ0/pin2 Interrupt Override. For broken nForce2 BIOS resulting in XT-PIC timer. - acpi_use_timer_override [HW,ACPI} + acpi_use_timer_override [HW,ACPI] Use timer override. For some broken Nvidia NF5 boards that require a timer override, but don't have HPET @@ -860,17 +861,19 @@ and is between 256 and 4096 characters. It is defined in the file See Documentation/ide/ide.txt. idle= [X86] - Format: idle=poll or idle=mwait, idle=halt, idle=nomwait - Poll forces a polling idle loop that can slightly improves the performance - of waking up a idle CPU, but will use a lot of power and make the system - run hot. Not recommended. - idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose - to not use it because it doesn't save as much power as a normal idle - loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same - as idle=poll. - idle=halt. Halt is forced to be used for CPU idle. + Format: idle=poll, idle=mwait, idle=halt, idle=nomwait + Poll forces a polling idle loop that can slightly + improve the performance of waking up a idle CPU, but + will use a lot of power and make the system run hot. + Not recommended. + idle=mwait: On systems which support MONITOR/MWAIT but + the kernel chose to not use it because it doesn't save + as much power as a normal idle loop, use the + MONITOR/MWAIT idle loop anyways. Performance should be + the same as idle=poll. + idle=halt: Halt is forced to be used for CPU idle. In such case C2/C3 won't be used again. - idle=nomwait. Disable mwait for CPU C-states + idle=nomwait: Disable mwait for CPU C-states ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem Claim all unknown PCI IDE storage controllers. @@ -1052,8 +1055,8 @@ and is between 256 and 4096 characters. It is defined in the file lapic [X86-32,APIC] Enable the local APIC even if BIOS disabled it. - lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer in - C2 power state. + lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer + in C2 power state. libata.dma= [LIBATA] DMA control libata.dma=0 Disable all PATA and SATA DMA @@ -2241,7 +2244,8 @@ and is between 256 and 4096 characters. It is defined in the file thermal.psv= [HW,ACPI] -1: disable all passive trip points - : override all passive trip points to this value + : override all passive trip points to this + value thermal.tzp= [HW,ACPI] Specify global default ACPI thermal zone polling rate -- cgit v1.2.3-70-g09d2 From 42364690992e592c05f85c76fda4055820b48c1b Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Mon, 24 Nov 2008 10:46:29 +0100 Subject: Documentation: remove reference to ll_rw_blk.c and moved drivers/block/elevator.c The drivers/block/ll_rw_block.c has been split and organized in the block/ directory, and also drivers/block/elevator.c has been moved to the block/ directory. Update Documentation/block/biodoc.txt accordingly Signed-off-by: Nikanth Karthikesan Signed-off-by: Jens Axboe --- Documentation/block/biodoc.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 4dbb8be1c99..3c5434c83da 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -914,7 +914,7 @@ I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch queue and specific I/O schedulers. Unless stated otherwise, elevator is used to refer to both parts and I/O scheduler to specific I/O schedulers. -Block layer implements generic dispatch queue in ll_rw_blk.c and elevator.c. +Block layer implements generic dispatch queue in block/*.c. The generic dispatch queue is responsible for properly ordering barrier requests, requeueing, handling non-fs requests and all other subtleties. @@ -926,8 +926,8 @@ be built inside the kernel. Each queue can choose different one and can also change to another one dynamically. A block layer call to the i/o scheduler follows the convention elv_xxx(). This -calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh, -xxx and xxx might not match exactly, but use your imagination. If an elevator +calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx +and xxx might not match exactly, but use your imagination. If an elevator doesn't implement a function, the switch does nothing or some minimal house keeping work. -- cgit v1.2.3-70-g09d2 From 5bfb4093be6ac7b6c06c8e6461d85241654acc61 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Thu, 25 Dec 2008 17:19:02 +0800 Subject: [ARM] pxa: add document on the MFP design and how to use it Signed-off-by: Eric Miao --- Documentation/arm/pxa/mfp.txt | 286 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100644 Documentation/arm/pxa/mfp.txt (limited to 'Documentation') diff --git a/Documentation/arm/pxa/mfp.txt b/Documentation/arm/pxa/mfp.txt new file mode 100644 index 00000000000..a179e5bc02c --- /dev/null +++ b/Documentation/arm/pxa/mfp.txt @@ -0,0 +1,286 @@ + MFP Configuration for PXA2xx/PXA3xx Processors + + Eric Miao + +MFP stands for Multi-Function Pin, which is the pin-mux logic on PXA3xx and +later PXA series processors. This document describes the existing MFP API, +and how board/platform driver authors could make use of it. + + Basic Concept +=============== + +Unlike the GPIO alternate function settings on PXA25x and PXA27x, a new MFP +mechanism is introduced from PXA3xx to completely move the pin-mux functions +out of the GPIO controller. In addition to pin-mux configurations, the MFP +also controls the low power state, driving strength, pull-up/down and event +detection of each pin. Below is a diagram of internal connections between +the MFP logic and the remaining SoC peripherals: + + +--------+ + | |--(GPIO19)--+ + | GPIO | | + | |--(GPIO...) | + +--------+ | + | +---------+ + +--------+ +------>| | + | PWM2 |--(PWM_OUT)-------->| MFP | + +--------+ +------>| |-------> to external PAD + | +---->| | + +--------+ | | +-->| | + | SSP2 |---(TXD)----+ | | +---------+ + +--------+ | | + | | + +--------+ | | + | Keypad |--(MKOUT4)----+ | + +--------+ | + | + +--------+ | + | UART2 |---(TXD)--------+ + +--------+ + +NOTE: the external pad is named as MFP_PIN_GPIO19, it doesn't necessarily +mean it's dedicated for GPIO19, only as a hint that internally this pin +can be routed from GPIO19 of the GPIO controller. + +To better understand the change from PXA25x/PXA27x GPIO alternate function +to this new MFP mechanism, here are several key points: + + 1. GPIO controller on PXA3xx is now a dedicated controller, same as other + internal controllers like PWM, SSP and UART, with 128 internal signals + which can be routed to external through one or more MFPs (e.g. GPIO<0> + can be routed through either MFP_PIN_GPIO0 as well as MFP_PIN_GPIO0_2, + see arch/arm/mach-pxa/mach/include/mfp-pxa300.h) + + 2. Alternate function configuration is removed from this GPIO controller, + the remaining functions are pure GPIO-specific, i.e. + + - GPIO signal level control + - GPIO direction control + - GPIO level change detection + + 3. Low power state for each pin is now controlled by MFP, this means the + PGSRx registers on PXA2xx are now useless on PXA3xx + + 4. Wakeup detection is now controlled by MFP, PWER does not control the + wakeup from GPIO(s) any more, depending on the sleeping state, ADxER + (as defined in pxa3xx-regs.h) controls the wakeup from MFP + +NOTE: with such a clear separation of MFP and GPIO, by GPIO we normally +mean it is a GPIO signal, and by MFP or pin xxx, we mean a physical +pad (or ball). + + MFP API Usage +=============== + +For board code writers, here are some guidelines: + +1. include ONE of the following header files in your .c: + + - #include + - #include + - #include + - #include + - #include + + NOTE: only one file in your .c, depending on the processors used, + because pin configuration definitions may conflict in these file (i.e. + same name, different meaning and settings on different processors). E.g. + for zylonite platform, which support both PXA300/PXA310 and PXA320, two + separate files are introduced: zylonite_pxa300.c and zylonite_pxa320.c + (in addition to handle MFP configuration differences, they also handle + the other differences between the two combinations). + + NOTE: PXA300 and PXA310 are almost identical in pin configurations (with + PXA310 supporting some additional ones), thus the difference is actually + covered in a single mfp-pxa300.h. + +2. prepare an array for the initial pin configurations, e.g.: + + static unsigned long mainstone_pin_config[] __initdata = { + /* Chip Select */ + GPIO15_nCS_1, + + /* LCD - 16bpp Active TFT */ + GPIOxx_TFT_LCD_16BPP, + GPIO16_PWM0_OUT, /* Backlight */ + + /* MMC */ + GPIO32_MMC_CLK, + GPIO112_MMC_CMD, + GPIO92_MMC_DAT_0, + GPIO109_MMC_DAT_1, + GPIO110_MMC_DAT_2, + GPIO111_MMC_DAT_3, + + ... + + /* GPIO */ + GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH, + }; + + a) once the pin configurations are passed to pxa{2xx,3xx}_mfp_config(), + and written to the actual registers, they are useless and may discard, + adding '__initdata' will help save some additional bytes here. + + b) when there is only one possible pin configurations for a component, + some simplified definitions can be used, e.g. GPIOxx_TFT_LCD_16BPP on + PXA25x and PXA27x processors + + c) if by board design, a pin can be configured to wake up the system + from low power state, it can be 'OR'ed with any of: + + WAKEUP_ON_EDGE_BOTH + WAKEUP_ON_EDGE_RISE + WAKEUP_ON_EDGE_FALL + WAKEUP_ON_LEVEL_HIGH - specifically for enabling of keypad GPIOs, + + to indicate that this pin has the capability of wake-up the system, + and on which edge(s). This, however, doesn't necessarily mean the + pin _will_ wakeup the system, it will only when set_irq_wake() is + invoked with the corresponding GPIO IRQ (GPIO_IRQ(xx) or gpio_to_irq()) + and eventually calls gpio_set_wake() for the actual register setting. + + d) although PXA3xx MFP supports edge detection on each pin, the + internal logic will only wakeup the system when those specific bits + in ADxER registers are set, which can be well mapped to the + corresponding peripheral, thus set_irq_wake() can be called with + the peripheral IRQ to enable the wakeup. + + + MFP on PXA3xx +=============== + +Every external I/O pad on PXA3xx (excluding those for special purpose) has +one MFP logic associated, and is controlled by one MFP register (MFPR). + +The MFPR has the following bit definitions (for PXA300/PXA310/PXA320): + + 31 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 + +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + | RESERVED |PS|PU|PD| DRIVE |SS|SD|SO|EC|EF|ER|--| AF_SEL | + +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + + Bit 3: RESERVED + Bit 4: EDGE_RISE_EN - enable detection of rising edge on this pin + Bit 5: EDGE_FALL_EN - enable detection of falling edge on this pin + Bit 6: EDGE_CLEAR - disable edge detection on this pin + Bit 7: SLEEP_OE_N - enable outputs during low power modes + Bit 8: SLEEP_DATA - output data on the pin during low power modes + Bit 9: SLEEP_SEL - selection control for low power modes signals + Bit 13: PULLDOWN_EN - enable the internal pull-down resistor on this pin + Bit 14: PULLUP_EN - enable the internal pull-up resistor on this pin + Bit 15: PULL_SEL - pull state controlled by selected alternate function + (0) or by PULL{UP,DOWN}_EN bits (1) + + Bit 0 - 2: AF_SEL - alternate function selection, 8 possibilities, from 0-7 + Bit 10-12: DRIVE - drive strength and slew rate + 0b000 - fast 1mA + 0b001 - fast 2mA + 0b002 - fast 3mA + 0b003 - fast 4mA + 0b004 - slow 6mA + 0b005 - fast 6mA + 0b006 - slow 10mA + 0b007 - fast 10mA + + MFP Design for PXA2xx/PXA3xx +============================== + +Due to the difference of pin-mux handling between PXA2xx and PXA3xx, a unified +MFP API is introduced to cover both series of processors. + +The basic idea of this design is to introduce definitions for all possible pin +configurations, these definitions are processor and platform independent, and +the actual API invoked to convert these definitions into register settings and +make them effective there-after. + + Files Involved + -------------- + + - arch/arm/mach-pxa/include/mach/mfp.h + + for + 1. Unified pin definitions - enum constants for all configurable pins + 2. processor-neutral bit definitions for a possible MFP configuration + + - arch/arm/mach-pxa/include/mach/mfp-pxa3xx.h + + for PXA3xx specific MFPR register bit definitions and PXA3xx common pin + configurations + + - arch/arm/mach-pxa/include/mach/mfp-pxa2xx.h + + for PXA2xx specific definitions and PXA25x/PXA27x common pin configurations + + - arch/arm/mach-pxa/include/mach/mfp-pxa25x.h + arch/arm/mach-pxa/include/mach/mfp-pxa27x.h + arch/arm/mach-pxa/include/mach/mfp-pxa300.h + arch/arm/mach-pxa/include/mach/mfp-pxa320.h + arch/arm/mach-pxa/include/mach/mfp-pxa930.h + + for processor specific definitions + + - arch/arm/mach-pxa/mfp-pxa3xx.c + - arch/arm/mach-pxa/mfp-pxa2xx.c + + for implementation of the pin configuration to take effect for the actual + processor. + + Pin Configuration + ----------------- + + The following comments are copied from mfp.h (see the actual source code + for most updated info) + + /* + * a possible MFP configuration is represented by a 32-bit integer + * + * bit 0.. 9 - MFP Pin Number (1024 Pins Maximum) + * bit 10..12 - Alternate Function Selection + * bit 13..15 - Drive Strength + * bit 16..18 - Low Power Mode State + * bit 19..20 - Low Power Mode Edge Detection + * bit 21..22 - Run Mode Pull State + * + * to facilitate the definition, the following macros are provided + * + * MFP_CFG_DEFAULT - default MFP configuration value, with + * alternate function = 0, + * drive strength = fast 3mA (MFP_DS03X) + * low power mode = default + * edge detection = none + * + * MFP_CFG - default MFPR value with alternate function + * MFP_CFG_DRV - default MFPR value with alternate function and + * pin drive strength + * MFP_CFG_LPM - default MFPR value with alternate function and + * low power mode + * MFP_CFG_X - default MFPR value with alternate function, + * pin drive strength and low power mode + */ + + Examples of pin configurations are: + + #define GPIO94_SSP3_RXD MFP_CFG_X(GPIO94, AF1, DS08X, FLOAT) + + which reads GPIO94 can be configured as SSP3_RXD, with alternate function + selection of 1, driving strength of 0b101, and a float state in low power + modes. + + NOTE: this is the default setting of this pin being configured as SSP3_RXD + which can be modified a bit in board code, though it is not recommended to + do so, simply because this default setting is usually carefully encoded, + and is supposed to work in most cases. + + Register Settings + ----------------- + + Register settings on PXA3xx for a pin configuration is actually very + straight-forward, most bits can be converted directly into MFPR value + in a easier way. Two sets of MFPR values are calculated: the run-time + ones and the low power mode ones, to allow different settings. + + The conversion from a generic pin configuration to the actual register + settings on PXA2xx is a bit complicated: many registers are involved, + including GAFRx, GPDRx, PGSRx, PWER, PKWR, PFER and PRER. Please see + mfp-pxa2xx.c for how the conversion is made. -- cgit v1.2.3-70-g09d2 From 77e196752bdd76a0c58ab082658d28c6a90fa40e Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Tue, 16 Dec 2008 11:54:34 +0800 Subject: [ARM] pxafb: allow video memory size to be configurable The amount of video memory size is decided according to the following order: 1. x x by default, which is the backward compatible way 2. size specified in platform data 3. size specified in module parameter 'options' string or specified in kernel boot command line (see updated Documentation/fb/pxafb.txt) And now since the memory is allocated from system memory, the pxafb_mmap can be removed and the default fb_mmap() should be working all right. Also, since we now have introduced the 'struct pxafb_dma_buff' for DMA descriptors and palettes, the allocation can be separated cleanly. NOTE: the LCD DMA actually supports chained transfer (i.e. page-based transfers), to simplify the logic and keep the performance (with less TLB misses when accessing from memory mapped user space), the memory is allocated by alloc_pages_*() to ensures it's physical contiguous. Signed-off-by: Eric Miao Signed-off-by: Eric Miao --- Documentation/fb/pxafb.txt | 8 +- arch/arm/mach-pxa/include/mach/pxafb.h | 1 + drivers/video/pxafb.c | 131 ++++++++++++++------------------- drivers/video/pxafb.h | 17 +---- 4 files changed, 65 insertions(+), 92 deletions(-) (limited to 'Documentation') diff --git a/Documentation/fb/pxafb.txt b/Documentation/fb/pxafb.txt index db9b8500b43..ad94b5ca009 100644 --- a/Documentation/fb/pxafb.txt +++ b/Documentation/fb/pxafb.txt @@ -5,9 +5,13 @@ The driver supports the following options, either via options= when modular or video=pxafb: when built in. For example: - modprobe pxafb options=mode:640x480-8,passive + modprobe pxafb options=vmem:2M,mode:640x480-8,passive or on the kernel command line - video=pxafb:mode:640x480-8,passive + video=pxafb:vmem:2M,mode:640x480-8,passive + +vmem: VIDEO_MEM_SIZE + Amount of video memory to allocate (can be suffixed with K or M + for kilobytes or megabytes) mode:XRESxYRES[-BPP] XRES == LCCR1_PPL + 1 diff --git a/arch/arm/mach-pxa/include/mach/pxafb.h b/arch/arm/mach-pxa/include/mach/pxafb.h index 4201a889ff4..6932720ba04 100644 --- a/arch/arm/mach-pxa/include/mach/pxafb.h +++ b/arch/arm/mach-pxa/include/mach/pxafb.h @@ -113,6 +113,7 @@ struct pxafb_mach_info { unsigned int num_modes; unsigned int lcd_conn; + unsigned long video_mem_size; u_int fixed_modes:1, cmap_inverse:1, diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c index ab689597f25..25bf4b8b6b5 100644 --- a/drivers/video/pxafb.c +++ b/drivers/video/pxafb.c @@ -72,6 +72,8 @@ static int pxafb_activate_var(struct fb_var_screeninfo *var, struct pxafb_info *); static void set_ctrlr_state(struct pxafb_info *fbi, u_int state); +static unsigned long video_mem_size = 0; + static inline unsigned long lcd_readl(struct pxafb_info *fbi, unsigned int off) { @@ -498,20 +500,6 @@ static int pxafb_blank(int blank, struct fb_info *info) return 0; } -static int pxafb_mmap(struct fb_info *info, - struct vm_area_struct *vma) -{ - struct pxafb_info *fbi = (struct pxafb_info *)info; - unsigned long off = vma->vm_pgoff << PAGE_SHIFT; - - if (off < info->fix.smem_len) { - vma->vm_pgoff += fbi->video_offset / PAGE_SIZE; - return dma_mmap_writecombine(fbi->dev, vma, fbi->map_cpu, - fbi->map_dma, fbi->map_size); - } - return -EINVAL; -} - static struct fb_ops pxafb_ops = { .owner = THIS_MODULE, .fb_check_var = pxafb_check_var, @@ -521,7 +509,6 @@ static struct fb_ops pxafb_ops = { .fb_copyarea = cfb_copyarea, .fb_imageblit = cfb_imageblit, .fb_blank = pxafb_blank, - .fb_mmap = pxafb_mmap, }; /* @@ -614,7 +601,7 @@ static int setup_frame_dma(struct pxafb_info *fbi, int dma, int pal, dma_desc = &fbi->dma_buff->dma_desc[dma]; dma_desc_off = offsetof(struct pxafb_dma_buff, dma_desc[dma]); - dma_desc->fsadr = fbi->screen_dma + offset; + dma_desc->fsadr = fbi->video_mem_phys + offset; dma_desc->fidr = 0; dma_desc->ldcmd = size; @@ -1267,69 +1254,30 @@ static int pxafb_resume(struct platform_device *dev) #define pxafb_resume NULL #endif -/* - * pxafb_map_video_memory(): - * Allocates the DRAM memory for the frame buffer. This buffer is - * remapped into a non-cached, non-buffered, memory region to - * allow palette and pixel writes to occur without flushing the - * cache. Once this area is remapped, all virtual memory - * access to the video memory should occur at the new region. - */ -static int __devinit pxafb_map_video_memory(struct pxafb_info *fbi) +static int __devinit pxafb_init_video_memory(struct pxafb_info *fbi) { - /* - * We reserve one page for the palette, plus the size - * of the framebuffer. - */ - fbi->video_offset = PAGE_ALIGN(sizeof(struct pxafb_dma_buff)); - fbi->map_size = PAGE_ALIGN(fbi->fb.fix.smem_len + fbi->video_offset); - fbi->map_cpu = dma_alloc_writecombine(fbi->dev, fbi->map_size, - &fbi->map_dma, GFP_KERNEL); - - if (fbi->map_cpu) { - /* prevent initial garbage on screen */ - memset(fbi->map_cpu, 0, fbi->map_size); - fbi->fb.screen_base = fbi->map_cpu + fbi->video_offset; - fbi->screen_dma = fbi->map_dma + fbi->video_offset; - - /* - * FIXME: this is actually the wrong thing to place in - * smem_start. But fbdev suffers from the problem that - * it needs an API which doesn't exist (in this case, - * dma_writecombine_mmap) - */ - fbi->fb.fix.smem_start = fbi->screen_dma; - fbi->palette_size = fbi->fb.var.bits_per_pixel == 8 ? 256 : 16; - - fbi->dma_buff = (void *) fbi->map_cpu; - fbi->dma_buff_phys = fbi->map_dma; - fbi->palette_cpu = (u16 *) fbi->dma_buff->palette; + int size = PAGE_ALIGN(fbi->video_mem_size); - pr_debug("pxafb: palette_mem_size = 0x%08x\n", fbi->palette_size*sizeof(u16)); - } - - return fbi->map_cpu ? 0 : -ENOMEM; -} + fbi->video_mem = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); + if (fbi->video_mem == NULL) + return -ENOMEM; -static void pxafb_decode_mode_info(struct pxafb_info *fbi, - struct pxafb_mode_info *modes, - unsigned int num_modes) -{ - unsigned int i, smemlen; + fbi->video_mem_phys = virt_to_phys(fbi->video_mem); + fbi->video_mem_size = size; - pxafb_setmode(&fbi->fb.var, &modes[0]); + fbi->fb.fix.smem_start = fbi->video_mem_phys; + fbi->fb.fix.smem_len = fbi->video_mem_size; + fbi->fb.screen_base = fbi->video_mem; - for (i = 0; i < num_modes; i++) { - smemlen = modes[i].xres * modes[i].yres * modes[i].bpp / 8; - if (smemlen > fbi->fb.fix.smem_len) - fbi->fb.fix.smem_len = smemlen; - } + return fbi->video_mem ? 0 : -ENOMEM; } static void pxafb_decode_mach_info(struct pxafb_info *fbi, struct pxafb_mach_info *inf) { unsigned int lcd_conn = inf->lcd_conn; + struct pxafb_mode_info *m; + int i; fbi->cmap_inverse = inf->cmap_inverse; fbi->cmap_static = inf->cmap_static; @@ -1371,7 +1319,22 @@ static void pxafb_decode_mach_info(struct pxafb_info *fbi, fbi->lccr3 |= (lcd_conn & LCD_PCLK_EDGE_FALL) ? LCCR3_PCP : 0; decode_mode: - pxafb_decode_mode_info(fbi, inf->modes, inf->num_modes); + pxafb_setmode(&fbi->fb.var, &inf->modes[0]); + + /* decide video memory size as follows: + * 1. default to mode of maximum resolution + * 2. allow platform to override + * 3. allow module parameter to override + */ + for (i = 0, m = &inf->modes[0]; i < inf->num_modes; i++, m++) + fbi->video_mem_size = max_t(size_t, fbi->video_mem_size, + m->xres * m->yres * m->bpp / 8); + + if (inf->video_mem_size > fbi->video_mem_size) + fbi->video_mem_size = inf->video_mem_size; + + if (video_mem_size > fbi->video_mem_size) + fbi->video_mem_size = video_mem_size; } static struct pxafb_info * __devinit pxafb_init_fbinfo(struct device *dev) @@ -1499,7 +1462,9 @@ static int __devinit parse_opt(struct device *dev, char *this_opt) s[0] = '\0'; - if (!strncmp(this_opt, "mode:", 5)) { + if (!strncmp(this_opt, "vmem:", 5)) { + video_mem_size = memparse(this_opt + 5, NULL); + } else if (!strncmp(this_opt, "mode:", 5)) { return parse_opt_mode(dev, this_opt); } else if (!strncmp(this_opt, "pixclock:", 9)) { mode->pixclock = simple_strtoul(this_opt+9, NULL, 0); @@ -1736,12 +1701,20 @@ static int __devinit pxafb_probe(struct platform_device *dev) goto failed_free_res; } - /* Initialize video memory */ - ret = pxafb_map_video_memory(fbi); + fbi->dma_buff_size = PAGE_ALIGN(sizeof(struct pxafb_dma_buff)); + fbi->dma_buff = dma_alloc_coherent(fbi->dev, fbi->dma_buff_size, + &fbi->dma_buff_phys, GFP_KERNEL); + if (fbi->dma_buff == NULL) { + dev_err(&dev->dev, "failed to allocate memory for DMA\n"); + ret = -ENOMEM; + goto failed_free_io; + } + + ret = pxafb_init_video_memory(fbi); if (ret) { dev_err(&dev->dev, "Failed to allocate video RAM: %d\n", ret); ret = -ENOMEM; - goto failed_free_io; + goto failed_free_dma; } irq = platform_get_irq(dev, 0); @@ -1811,8 +1784,10 @@ failed_free_cmap: failed_free_irq: free_irq(irq, fbi); failed_free_mem: - dma_free_writecombine(&dev->dev, fbi->map_size, - fbi->map_cpu, fbi->map_dma); + free_pages_exact(fbi->video_mem, fbi->video_mem_size); +failed_free_dma: + dma_free_coherent(&dev->dev, fbi->dma_buff_size, + fbi->dma_buff, fbi->dma_buff_phys); failed_free_io: iounmap(fbi->mmio_base); failed_free_res: @@ -1847,8 +1822,10 @@ static int __devexit pxafb_remove(struct platform_device *dev) irq = platform_get_irq(dev, 0); free_irq(irq, fbi); - dma_free_writecombine(&dev->dev, fbi->map_size, - fbi->map_cpu, fbi->map_dma); + free_pages_exact(fbi->video_mem, fbi->video_mem_size); + + dma_free_writecombine(&dev->dev, fbi->dma_buff_size, + fbi->dma_buff, fbi->dma_buff_phys); iounmap(fbi->mmio_base); diff --git a/drivers/video/pxafb.h b/drivers/video/pxafb.h index d8eb93fa03a..0981938682e 100644 --- a/drivers/video/pxafb.h +++ b/drivers/video/pxafb.h @@ -69,24 +69,15 @@ struct pxafb_info { void __iomem *mmio_base; struct pxafb_dma_buff *dma_buff; + size_t dma_buff_size; dma_addr_t dma_buff_phys; dma_addr_t fdadr[DMA_MAX]; - /* - * These are the addresses we mapped - * the framebuffer memory region to. - */ - /* raw memory addresses */ - dma_addr_t map_dma; /* physical */ - u_char * map_cpu; /* virtual */ - u_int map_size; - - /* addresses of pieces placed in raw buffer */ - u_char * screen_cpu; /* virtual address of frame buffer */ - dma_addr_t screen_dma; /* physical address of frame buffer */ + void __iomem *video_mem; /* virtual address of frame buffer */ + unsigned long video_mem_phys; /* physical address of frame buffer */ + size_t video_mem_size; /* size of the frame buffer */ u16 * palette_cpu; /* virtual address of palette memory */ u_int palette_size; - ssize_t video_offset; u_int lccr0; u_int lccr3; -- cgit v1.2.3-70-g09d2 From 198fc108ee4c2cd3f08954eae6a819c81c03214b Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Tue, 23 Dec 2008 17:49:43 +0800 Subject: [ARM] pxafb: add support for overlay1 and overlay2 as framebuffer devices PXA27x and later processors support overlay1 and overlay2 on-top of the base framebuffer (although under-neath the base is also possible). They support palette and no-palette RGB formats, as well as YUV formats (only available on overlay2). These overlays have dedicated DMA channels and behave in a similar way as a framebuffer. This heavily simplified and re-structured work is based on the original pxafb_overlay.c (which is pending for mainline merge for a long time). The major problems with this pxafb_overlay.c are (if you are interested in the history): 1. heavily redundant (the control logics for overlay1 and overlay2 are actually identical except for some small operations, which are now abstracted into a 'pxafb_layer_ops' structure) 2. a lot of useless and un-tested code (two workarounds which are now fixed on mature silicons) 3. cursorfb is actually useless, hardware cursor should not be used this way, and the code was actually un-tested for a long time. The code in this patch should be self-explanatory, I tried to add minimum comments. As said, this is basically simplified, there are several things still on the pending list: 1. palette mode is un-supported and un-tested (although re-using the palette code of the base framebuffer is actually very easy now with previous clean-up patches) 2. fb_pan_display for overlay(s) is un-supported 3. the base framebuffer can actually be abstracted by 'pxafb_layer' as well, which will help further re-use of the code and keep a better and consistent structure. (This is the reason I named it 'pxafb_layer' instead of 'pxafb_overlay' or something alike) See Documentation/fb/pxafb.txt for additional usage information. Signed-off-by: Eric Miao Cc: Rodolfo Giometti Signed-off-by: Eric Miao --- Documentation/fb/pxafb.txt | 84 +++++++ arch/arm/mach-pxa/include/mach/regs-lcd.h | 34 ++- drivers/video/Kconfig | 5 + drivers/video/pxafb.c | 364 +++++++++++++++++++++++++++++- drivers/video/pxafb.h | 45 ++++ 5 files changed, 516 insertions(+), 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/fb/pxafb.txt b/Documentation/fb/pxafb.txt index ad94b5ca009..d143a0a749f 100644 --- a/Documentation/fb/pxafb.txt +++ b/Documentation/fb/pxafb.txt @@ -56,3 +56,87 @@ outputen:POLARITY pixclockpol:POLARITY pixel clock polarity 0 => falling edge, 1 => rising edge + + +Overlay Support for PXA27x and later LCD controllers +==================================================== + + PXA27x and later processors support overlay1 and overlay2 on-top of the + base framebuffer (although under-neath the base is also possible). They + support palette and no-palette RGB formats, as well as YUV formats (only + available on overlay2). These overlays have dedicated DMA channels and + behave in a similar way as a framebuffer. + + However, there are some differences between these overlay framebuffers + and normal framebuffers, as listed below: + + 1. overlay can start at a 32-bit word aligned position within the base + framebuffer, which means they have a start (x, y). This information + is encoded into var->nonstd (no, var->xoffset and var->yoffset are + not for such purpose). + + 2. overlay framebuffer is allocated dynamically according to specified + 'struct fb_var_screeninfo', the amount is decided by: + + var->xres_virtual * var->yres_virtual * bpp + + bpp = 16 -- for RGB565 or RGBT555 + = 24 -- for YUV444 packed + = 24 -- for YUV444 planar + = 16 -- for YUV422 planar (1 pixel = 1 Y + 1/2 Cb + 1/2 Cr) + = 12 -- for YUV420 planar (1 pixel = 1 Y + 1/4 Cb + 1/4 Cr) + + NOTE: + + a. overlay does not support panning in x-direction, thus + var->xres_virtual will always be equal to var->xres + + b. line length of overlay(s) must be on a 32-bit word boundary, + for YUV planar modes, it is a requirement for the component + with minimum bits per pixel, e.g. for YUV420, Cr component + for one pixel is actually 2-bits, it means the line length + should be a multiple of 16-pixels + + c. starting horizontal position (XPOS) should start on a 32-bit + word boundary, otherwise the fb_check_var() will just fail. + + d. the rectangle of the overlay should be within the base plane, + otherwise fail + + Applications should follow the sequence below to operate an overlay + framebuffer: + + a. open("/dev/fb[1-2]", ...) + b. ioctl(fd, FBIOGET_VSCREENINFO, ...) + c. modify 'var' with desired parameters: + 1) var->xres and var->yres + 2) larger var->yres_virtual if more memory is required, + usually for double-buffering + 3) var->nonstd for starting (x, y) and color format + 4) var->{red, green, blue, transp} if RGB mode is to be used + d. ioctl(fd, FBIOPUT_VSCREENINFO, ...) + e. ioctl(fd, FBIOGET_FSCREENINFO, ...) + f. mmap + g. ... + + 3. for YUV planar formats, these are actually not supported within the + framebuffer framework, application has to take care of the offsets + and lengths of each component within the framebuffer. + + 4. var->nonstd is used to pass starting (x, y) position and color format, + the detailed bit fields are shown below: + + 31 23 20 10 0 + +-----------------+---+----------+----------+ + | ... unused ... |FOR| XPOS | YPOS | + +-----------------+---+----------+----------+ + + FOR - color format, as defined by OVERLAY_FORMAT_* in pxafb.h + 0 - RGB + 1 - YUV444 PACKED + 2 - YUV444 PLANAR + 3 - YUV422 PLANAR + 4 - YUR420 PLANAR + + XPOS - starting horizontal position + YPOS - starting vertical position diff --git a/arch/arm/mach-pxa/include/mach/regs-lcd.h b/arch/arm/mach-pxa/include/mach/regs-lcd.h index aff3b876a7b..f82dcea792d 100644 --- a/arch/arm/mach-pxa/include/mach/regs-lcd.h +++ b/arch/arm/mach-pxa/include/mach/regs-lcd.h @@ -12,7 +12,8 @@ #define LCCR3 (0x00C) /* LCD Controller Control Register 3 */ #define LCCR4 (0x010) /* LCD Controller Control Register 4 */ #define LCCR5 (0x014) /* LCD Controller Control Register 5 */ -#define LCSR (0x038) /* LCD Controller Status Register */ +#define LCSR (0x038) /* LCD Controller Status Register 0 */ +#define LCSR1 (0x034) /* LCD Controller Status Register 1 */ #define LIIDR (0x03C) /* LCD Controller Interrupt ID Register */ #define TMEDRGBR (0x040) /* TMED RGB Seed Register */ #define TMEDCR (0x044) /* TMED Control Register */ @@ -25,6 +26,11 @@ #define FBR5 (0x110) /* DMA Channel 2 Frame Branch Register */ #define FBR6 (0x114) /* DMA Channel 2 Frame Branch Register */ +#define OVL1C1 (0x050) /* Overlay 1 Control Register 1 */ +#define OVL1C2 (0x060) /* Overlay 1 Control Register 2 */ +#define OVL2C1 (0x070) /* Overlay 2 Control Register 1 */ +#define OVL2C2 (0x080) /* Overlay 2 Control Register 2 */ + #define CMDCR (0x100) /* Command Control Register */ #define PRSR (0x104) /* Panel Read Status Register */ @@ -42,16 +48,12 @@ #define LCCR4_PAL_FOR_MASK (3 << 15) #define FDADR0 (0x200) /* DMA Channel 0 Frame Descriptor Address Register */ -#define FSADR0 (0x204) /* DMA Channel 0 Frame Source Address Register */ -#define FIDR0 (0x208) /* DMA Channel 0 Frame ID Register */ -#define LDCMD0 (0x20C) /* DMA Channel 0 Command Register */ #define FDADR1 (0x210) /* DMA Channel 1 Frame Descriptor Address Register */ -#define FSADR1 (0x214) /* DMA Channel 1 Frame Source Address Register */ -#define FIDR1 (0x218) /* DMA Channel 1 Frame ID Register */ -#define LDCMD1 (0x21C) /* DMA Channel 1 Command Register */ +#define FDADR2 (0x220) /* DMA Channel 2 Frame Descriptor Address Register */ +#define FDADR3 (0x230) /* DMA Channel 3 Frame Descriptor Address Register */ +#define FDADR4 (0x240) /* DMA Channel 4 Frame Descriptor Address Register */ +#define FDADR5 (0x250) /* DMA Channel 5 Frame Descriptor Address Register */ #define FDADR6 (0x260) /* DMA Channel 6 Frame Descriptor Address Register */ -#define FSADR6 (0x264) /* DMA Channel 6 Frame Source Address Register */ -#define FIDR6 (0x268) /* DMA Channel 6 Frame ID Register */ #define LCCR0_ENB (1 << 0) /* LCD Controller enable */ #define LCCR0_CMS (1 << 1) /* Color/Monochrome Display Select */ @@ -151,8 +153,22 @@ #define LCSR_RD_ST (1 << 11) /* read status */ #define LCSR_CMD_INT (1 << 12) /* command interrupt */ +#define LCSR1_IU(x) (1 << ((x) + 23)) /* Input FIFO underrun */ +#define LCSR1_BS(x) (1 << ((x) + 15)) /* Branch Status */ +#define LCSR1_EOF(x) (1 << ((x) + 7)) /* End of Frame Status */ +#define LCSR1_SOF(x) (1 << ((x) - 1)) /* Start of Frame Status */ + #define LDCMD_PAL (1 << 26) /* instructs DMA to load palette buffer */ +/* overlay control registers */ +#define OVLxC1_PPL(x) ((((x) - 1) & 0x3ff) << 0) /* Pixels Per Line */ +#define OVLxC1_LPO(x) ((((x) - 1) & 0x3ff) << 10) /* Number of Lines */ +#define OVLxC1_BPP(x) (((x) & 0xf) << 20) /* Bits Per Pixel */ +#define OVLxC1_OEN (1 << 31) /* Enable bit for Overlay */ +#define OVLxC2_XPOS(x) (((x) & 0x3ff) << 0) /* Horizontal Position */ +#define OVLxC2_YPOS(x) (((x) & 0x3ff) << 10) /* Vertical Position */ +#define OVL2C2_PFOR(x) (((x) & 0x7) << 20) /* Pixel Format */ + /* smartpanel related */ #define PRSR_DATA(x) ((x) & 0xff) /* Panel Data */ #define PRSR_A0 (1 << 8) /* Read Data Source */ diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 3f3ce13fef4..486d81ca02a 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1817,6 +1817,11 @@ config FB_PXA If unsure, say N. +config FB_PXA_OVERLAY + bool "Support PXA27x/PXA3xx Overlay(s) as framebuffer" + default n + depends on FB_PXA && (PXA27x || PXA3xx) + config FB_PXA_SMARTPANEL bool "PXA Smartpanel LCD support" default n diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c index 7935706a756..3a41ea10e8e 100644 --- a/drivers/video/pxafb.c +++ b/drivers/video/pxafb.c @@ -20,6 +20,16 @@ * * linux-arm-kernel@lists.arm.linux.org.uk * + * Add support for overlay1 and overlay2 based on pxafb_overlay.c: + * + * Copyright (C) 2004, Intel Corporation + * + * 2003/08/27: + * 2004/03/10: + * 2004/10/28: + * + * Copyright (C) 2006-2008 Marvell International Ltd. + * All Rights Reserved */ #include @@ -72,6 +82,8 @@ static int pxafb_activate_var(struct fb_var_screeninfo *var, struct pxafb_info *); static void set_ctrlr_state(struct pxafb_info *fbi, u_int state); static void setup_base_frame(struct pxafb_info *fbi, int branch); +static int setup_frame_dma(struct pxafb_info *fbi, int dma, int pal, + unsigned long offset, size_t size); static unsigned long video_mem_size = 0; @@ -581,6 +593,330 @@ static struct fb_ops pxafb_ops = { .fb_blank = pxafb_blank, }; +#ifdef CONFIG_FB_PXA_OVERLAY +static void overlay1fb_setup(struct pxafb_layer *ofb) +{ + int size = ofb->fb.fix.line_length * ofb->fb.var.yres_virtual; + unsigned long start = ofb->video_mem_phys; + setup_frame_dma(ofb->fbi, DMA_OV1, PAL_NONE, start, size); +} + +/* Depending on the enable status of overlay1/2, the DMA should be + * updated from FDADRx (when disabled) or FBRx (when enabled). + */ +static void overlay1fb_enable(struct pxafb_layer *ofb) +{ + int enabled = lcd_readl(ofb->fbi, OVL1C1) & OVLxC1_OEN; + uint32_t fdadr1 = ofb->fbi->fdadr[DMA_OV1] | (enabled ? 0x1 : 0); + + lcd_writel(ofb->fbi, enabled ? FBR1 : FDADR1, fdadr1); + lcd_writel(ofb->fbi, OVL1C2, ofb->control[1]); + lcd_writel(ofb->fbi, OVL1C1, ofb->control[0] | OVLxC1_OEN); +} + +static void overlay1fb_disable(struct pxafb_layer *ofb) +{ + uint32_t lccr5 = lcd_readl(ofb->fbi, LCCR5); + + lcd_writel(ofb->fbi, OVL1C1, ofb->control[0] & ~OVLxC1_OEN); + + lcd_writel(ofb->fbi, LCSR1, LCSR1_BS(1)); + lcd_writel(ofb->fbi, LCCR5, lccr5 & ~LCSR1_BS(1)); + lcd_writel(ofb->fbi, FBR1, ofb->fbi->fdadr[DMA_OV1] | 0x3); + + if (wait_for_completion_timeout(&ofb->branch_done, 1 * HZ) == 0) + pr_warning("%s: timeout disabling overlay1\n", __func__); + + lcd_writel(ofb->fbi, LCCR5, lccr5); +} + +static void overlay2fb_setup(struct pxafb_layer *ofb) +{ + int size, div = 1, pfor = NONSTD_TO_PFOR(ofb->fb.var.nonstd); + unsigned long start[3] = { ofb->video_mem_phys, 0, 0 }; + + if (pfor == OVERLAY_FORMAT_RGB || pfor == OVERLAY_FORMAT_YUV444_PACKED) { + size = ofb->fb.fix.line_length * ofb->fb.var.yres_virtual; + setup_frame_dma(ofb->fbi, DMA_OV2_Y, -1, start[0], size); + } else { + size = ofb->fb.var.xres_virtual * ofb->fb.var.yres_virtual; + switch (pfor) { + case OVERLAY_FORMAT_YUV444_PLANAR: div = 1; break; + case OVERLAY_FORMAT_YUV422_PLANAR: div = 2; break; + case OVERLAY_FORMAT_YUV420_PLANAR: div = 4; break; + } + start[1] = start[0] + size; + start[2] = start[1] + size / div; + setup_frame_dma(ofb->fbi, DMA_OV2_Y, -1, start[0], size); + setup_frame_dma(ofb->fbi, DMA_OV2_Cb, -1, start[1], size / div); + setup_frame_dma(ofb->fbi, DMA_OV2_Cr, -1, start[2], size / div); + } +} + +static void overlay2fb_enable(struct pxafb_layer *ofb) +{ + int pfor = NONSTD_TO_PFOR(ofb->fb.var.nonstd); + int enabled = lcd_readl(ofb->fbi, OVL2C1) & OVLxC1_OEN; + uint32_t fdadr2 = ofb->fbi->fdadr[DMA_OV2_Y] | (enabled ? 0x1 : 0); + uint32_t fdadr3 = ofb->fbi->fdadr[DMA_OV2_Cb] | (enabled ? 0x1 : 0); + uint32_t fdadr4 = ofb->fbi->fdadr[DMA_OV2_Cr] | (enabled ? 0x1 : 0); + + if (pfor == OVERLAY_FORMAT_RGB || pfor == OVERLAY_FORMAT_YUV444_PACKED) + lcd_writel(ofb->fbi, enabled ? FBR2 : FDADR2, fdadr2); + else { + lcd_writel(ofb->fbi, enabled ? FBR2 : FDADR2, fdadr2); + lcd_writel(ofb->fbi, enabled ? FBR3 : FDADR3, fdadr3); + lcd_writel(ofb->fbi, enabled ? FBR4 : FDADR4, fdadr4); + } + lcd_writel(ofb->fbi, OVL2C2, ofb->control[1]); + lcd_writel(ofb->fbi, OVL2C1, ofb->control[0] | OVLxC1_OEN); +} + +static void overlay2fb_disable(struct pxafb_layer *ofb) +{ + uint32_t lccr5 = lcd_readl(ofb->fbi, LCCR5); + + lcd_writel(ofb->fbi, OVL2C1, ofb->control[0] & ~OVLxC1_OEN); + + lcd_writel(ofb->fbi, LCSR1, LCSR1_BS(2)); + lcd_writel(ofb->fbi, LCCR5, lccr5 & ~LCSR1_BS(2)); + lcd_writel(ofb->fbi, FBR2, ofb->fbi->fdadr[DMA_OV2_Y] | 0x3); + lcd_writel(ofb->fbi, FBR3, ofb->fbi->fdadr[DMA_OV2_Cb] | 0x3); + lcd_writel(ofb->fbi, FBR4, ofb->fbi->fdadr[DMA_OV2_Cr] | 0x3); + + if (wait_for_completion_timeout(&ofb->branch_done, 1 * HZ) == 0) + pr_warning("%s: timeout disabling overlay2\n", __func__); +} + +static struct pxafb_layer_ops ofb_ops[] = { + [0] = { + .enable = overlay1fb_enable, + .disable = overlay1fb_disable, + .setup = overlay1fb_setup, + }, + [1] = { + .enable = overlay2fb_enable, + .disable = overlay2fb_disable, + .setup = overlay2fb_setup, + }, +}; + +static int overlayfb_open(struct fb_info *info, int user) +{ + struct pxafb_layer *ofb = (struct pxafb_layer *)info; + + /* no support for framebuffer console on overlay */ + if (user == 0) + return -ENODEV; + + /* allow only one user at a time */ + if (atomic_inc_and_test(&ofb->usage)) + return -EBUSY; + + /* unblank the base framebuffer */ + fb_blank(&ofb->fbi->fb, FB_BLANK_UNBLANK); + return 0; +} + +static int overlayfb_release(struct fb_info *info, int user) +{ + struct pxafb_layer *ofb = (struct pxafb_layer*) info; + + atomic_dec(&ofb->usage); + ofb->ops->disable(ofb); + + free_pages_exact(ofb->video_mem, ofb->video_mem_size); + ofb->video_mem = NULL; + ofb->video_mem_size = 0; + return 0; +} + +static int overlayfb_check_var(struct fb_var_screeninfo *var, + struct fb_info *info) +{ + struct pxafb_layer *ofb = (struct pxafb_layer *)info; + struct fb_var_screeninfo *base_var = &ofb->fbi->fb.var; + int xpos, ypos, pfor, bpp; + + xpos = NONSTD_TO_XPOS(var->nonstd); + ypos = NONSTD_TO_XPOS(var->nonstd); + pfor = NONSTD_TO_PFOR(var->nonstd); + + bpp = pxafb_var_to_bpp(var); + if (bpp < 0) + return -EINVAL; + + /* no support for YUV format on overlay1 */ + if (ofb->id == OVERLAY1 && pfor != 0) + return -EINVAL; + + /* for YUV packed formats, bpp = 'minimum bpp of YUV components' */ + switch (pfor) { + case OVERLAY_FORMAT_RGB: + bpp = pxafb_var_to_bpp(var); + if (bpp < 0) + return -EINVAL; + + pxafb_set_pixfmt(var, var_to_depth(var)); + break; + case OVERLAY_FORMAT_YUV444_PACKED: bpp = 24; break; + case OVERLAY_FORMAT_YUV444_PLANAR: bpp = 8; break; + case OVERLAY_FORMAT_YUV422_PLANAR: bpp = 4; break; + case OVERLAY_FORMAT_YUV420_PLANAR: bpp = 2; break; + default: + return -EINVAL; + } + + /* each line must start at a 32-bit word boundary */ + if ((xpos * bpp) % 32) + return -EINVAL; + + /* xres must align on 32-bit word boundary */ + var->xres = roundup(var->xres * bpp, 32) / bpp; + + if ((xpos + var->xres > base_var->xres) || + (ypos + var->yres > base_var->yres)) + return -EINVAL; + + var->xres_virtual = var->xres; + var->yres_virtual = max(var->yres, var->yres_virtual); + return 0; +} + +static int overlayfb_map_video_memory(struct pxafb_layer *ofb) +{ + struct fb_var_screeninfo *var = &ofb->fb.var; + int pfor = NONSTD_TO_PFOR(var->nonstd); + int size, bpp = 0; + + switch (pfor) { + case OVERLAY_FORMAT_RGB: bpp = var->bits_per_pixel; break; + case OVERLAY_FORMAT_YUV444_PACKED: bpp = 24; break; + case OVERLAY_FORMAT_YUV444_PLANAR: bpp = 24; break; + case OVERLAY_FORMAT_YUV422_PLANAR: bpp = 16; break; + case OVERLAY_FORMAT_YUV420_PLANAR: bpp = 12; break; + } + + ofb->fb.fix.line_length = var->xres_virtual * bpp / 8; + + size = PAGE_ALIGN(ofb->fb.fix.line_length * var->yres_virtual); + + /* don't re-allocate if the original video memory is enough */ + if (ofb->video_mem) { + if (ofb->video_mem_size >= size) + return 0; + + free_pages_exact(ofb->video_mem, ofb->video_mem_size); + } + + ofb->video_mem = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); + if (ofb->video_mem == NULL) + return -ENOMEM; + + ofb->video_mem_phys = virt_to_phys(ofb->video_mem); + ofb->video_mem_size = size; + + ofb->fb.fix.smem_start = ofb->video_mem_phys; + ofb->fb.fix.smem_len = ofb->fb.fix.line_length * var->yres_virtual; + ofb->fb.screen_base = ofb->video_mem; + return 0; +} + +static int overlayfb_set_par(struct fb_info *info) +{ + struct pxafb_layer *ofb = (struct pxafb_layer *)info; + struct fb_var_screeninfo *var = &info->var; + int xpos, ypos, pfor, bpp, ret; + + ret = overlayfb_map_video_memory(ofb); + if (ret) + return ret; + + bpp = pxafb_var_to_bpp(var); + xpos = NONSTD_TO_XPOS(var->nonstd); + ypos = NONSTD_TO_XPOS(var->nonstd); + pfor = NONSTD_TO_PFOR(var->nonstd); + + ofb->control[0] = OVLxC1_PPL(var->xres) | OVLxC1_LPO(var->yres) | + OVLxC1_BPP(bpp); + ofb->control[1] = OVLxC2_XPOS(xpos) | OVLxC2_YPOS(ypos); + + if (ofb->id == OVERLAY2) + ofb->control[1] |= OVL2C2_PFOR(pfor); + + ofb->ops->setup(ofb); + ofb->ops->enable(ofb); + return 0; +} + +static struct fb_ops overlay_fb_ops = { + .owner = THIS_MODULE, + .fb_open = overlayfb_open, + .fb_release = overlayfb_release, + .fb_check_var = overlayfb_check_var, + .fb_set_par = overlayfb_set_par, +}; + +static void __devinit init_pxafb_overlay(struct pxafb_info *fbi, + struct pxafb_layer *ofb, int id) +{ + sprintf(ofb->fb.fix.id, "overlay%d", id + 1); + + ofb->fb.fix.type = FB_TYPE_PACKED_PIXELS; + ofb->fb.fix.xpanstep = 0; + ofb->fb.fix.ypanstep = 1; + + ofb->fb.var.activate = FB_ACTIVATE_NOW; + ofb->fb.var.height = -1; + ofb->fb.var.width = -1; + ofb->fb.var.vmode = FB_VMODE_NONINTERLACED; + + ofb->fb.fbops = &overlay_fb_ops; + ofb->fb.flags = FBINFO_FLAG_DEFAULT; + ofb->fb.node = -1; + ofb->fb.pseudo_palette = NULL; + + ofb->id = id; + ofb->ops = &ofb_ops[id]; + atomic_set(&ofb->usage, 0); + ofb->fbi = fbi; + init_completion(&ofb->branch_done); +} + +static int __devinit pxafb_overlay_init(struct pxafb_info *fbi) +{ + int i, ret; + + for (i = 0; i < 2; i++) { + init_pxafb_overlay(fbi, &fbi->overlay[i], i); + ret = register_framebuffer(&fbi->overlay[i].fb); + if (ret) { + dev_err(fbi->dev, "failed to register overlay %d\n", i); + return ret; + } + } + + /* mask all IU/BS/EOF/SOF interrupts */ + lcd_writel(fbi, LCCR5, ~0); + + /* place overlay(s) on top of base */ + fbi->lccr0 |= LCCR0_OUC; + pr_info("PXA Overlay driver loaded successfully!\n"); + return 0; +} + +static void __devexit pxafb_overlay_exit(struct pxafb_info *fbi) +{ + int i; + + for (i = 0; i < 2; i++) + unregister_framebuffer(&fbi->overlay[i].fb); +} +#else +static inline void pxafb_overlay_init(struct pxafb_info *fbi) {} +static inline void pxafb_overlay_exit(struct pxafb_info *fbi) {} +#endif /* CONFIG_FB_PXA_OVERLAY */ + /* * Calculate the PCD value from the clock rate (in picoseconds). * We take account of the PPCR clock setting. @@ -660,7 +996,7 @@ unsigned long pxafb_get_hsync_time(struct device *dev) EXPORT_SYMBOL(pxafb_get_hsync_time); static int setup_frame_dma(struct pxafb_info *fbi, int dma, int pal, - unsigned int offset, size_t size) + unsigned long start, size_t size) { struct pxafb_dma_descriptor *dma_desc, *pal_desc; unsigned int dma_desc_off, pal_desc_off; @@ -671,7 +1007,7 @@ static int setup_frame_dma(struct pxafb_info *fbi, int dma, int pal, dma_desc = &fbi->dma_buff->dma_desc[dma]; dma_desc_off = offsetof(struct pxafb_dma_buff, dma_desc[dma]); - dma_desc->fsadr = fbi->video_mem_phys + offset; + dma_desc->fsadr = start; dma_desc->fidr = 0; dma_desc->ldcmd = size; @@ -705,14 +1041,14 @@ static void setup_base_frame(struct pxafb_info *fbi, int branch) { struct fb_var_screeninfo *var = &fbi->fb.var; struct fb_fix_screeninfo *fix = &fbi->fb.fix; - unsigned int nbytes, offset; - int dma, pal, bpp = var->bits_per_pixel; + int nbytes, dma, pal, bpp = var->bits_per_pixel; + unsigned long offset; dma = DMA_BASE + (branch ? DMA_MAX : 0); pal = (bpp >= 16) ? PAL_NONE : PAL_BASE + (branch ? PAL_MAX : 0); nbytes = fix->line_length * var->yres; - offset = fix->line_length * var->yoffset; + offset = fix->line_length * var->yoffset + fbi->video_mem_phys; if (fbi->lccr0 & LCCR0_SDS) { nbytes = nbytes / 2; @@ -1090,8 +1426,9 @@ static void pxafb_disable_controller(struct pxafb_info *fbi) static irqreturn_t pxafb_handle_irq(int irq, void *dev_id) { struct pxafb_info *fbi = dev_id; - unsigned int lccr0, lcsr = lcd_readl(fbi, LCSR); + unsigned int lccr0, lcsr, lcsr1; + lcsr = lcd_readl(fbi, LCSR); if (lcsr & LCSR_LDD) { lccr0 = lcd_readl(fbi, LCCR0); lcd_writel(fbi, LCCR0, lccr0 | LCCR0_LDM); @@ -1102,8 +1439,18 @@ static irqreturn_t pxafb_handle_irq(int irq, void *dev_id) if (lcsr & LCSR_CMD_INT) complete(&fbi->command_done); #endif - lcd_writel(fbi, LCSR, lcsr); + +#ifdef CONFIG_FB_PXA_OVERLAY + lcsr1 = lcd_readl(fbi, LCSR1); + if (lcsr1 & LCSR1_BS(1)) + complete(&fbi->overlay[0].branch_done); + + if (lcsr1 & LCSR1_BS(2)) + complete(&fbi->overlay[1].branch_done); + + lcd_writel(fbi, LCSR1, lcsr1); +#endif return IRQ_HANDLED; } @@ -1802,6 +2149,8 @@ static int __devinit pxafb_probe(struct platform_device *dev) goto failed_free_cmap; } + pxafb_overlay_init(fbi); + #ifdef CONFIG_CPU_FREQ fbi->freq_transition.notifier_call = pxafb_freq_transition; fbi->freq_policy.notifier_call = pxafb_freq_policy; @@ -1852,6 +2201,7 @@ static int __devexit pxafb_remove(struct platform_device *dev) info = &fbi->fb; + pxafb_overlay_exit(fbi); unregister_framebuffer(info); pxafb_disable_controller(fbi); diff --git a/drivers/video/pxafb.h b/drivers/video/pxafb.h index ae3cbc1ca64..2353521c5c8 100644 --- a/drivers/video/pxafb.h +++ b/drivers/video/pxafb.h @@ -64,6 +64,47 @@ struct pxafb_dma_buff { struct pxafb_dma_descriptor dma_desc[DMA_MAX * 2]; }; +enum { + OVERLAY1, + OVERLAY2, +}; + +enum { + OVERLAY_FORMAT_RGB = 0, + OVERLAY_FORMAT_YUV444_PACKED, + OVERLAY_FORMAT_YUV444_PLANAR, + OVERLAY_FORMAT_YUV422_PLANAR, + OVERLAY_FORMAT_YUV420_PLANAR, +}; + +#define NONSTD_TO_XPOS(x) (((x) >> 0) & 0x3ff) +#define NONSTD_TO_YPOS(x) (((x) >> 10) & 0x3ff) +#define NONSTD_TO_PFOR(x) (((x) >> 20) & 0x7) + +struct pxafb_layer; + +struct pxafb_layer_ops { + void (*enable)(struct pxafb_layer *); + void (*disable)(struct pxafb_layer *); + void (*setup)(struct pxafb_layer *); +}; + +struct pxafb_layer { + struct fb_info fb; + int id; + atomic_t usage; + uint32_t control[2]; + + struct pxafb_layer_ops *ops; + + void __iomem *video_mem; + unsigned long video_mem_phys; + size_t video_mem_size; + struct completion branch_done; + + struct pxafb_info *fbi; +}; + struct pxafb_info { struct fb_info fb; struct device *dev; @@ -114,6 +155,10 @@ struct pxafb_info { struct task_struct *smart_thread; #endif +#ifdef CONFIG_FB_PXA_OVERLAY + struct pxafb_layer overlay[2]; +#endif + #ifdef CONFIG_CPU_FREQ struct notifier_block freq_transition; struct notifier_block freq_policy; -- cgit v1.2.3-70-g09d2 From 2b1b945f88537a110f018f6a50b1f01bbb23fb7e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 24 Oct 2008 23:13:57 -0300 Subject: V4L/DVB (9370): Update README.cx88 with the current status README.cx88 were outdated since a long time. Update it with the current status. Cc: Rafael Diniz Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/README.cx88 | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/README.cx88 b/Documentation/video4linux/README.cx88 index 166d5960b1a..35fae23f883 100644 --- a/Documentation/video4linux/README.cx88 +++ b/Documentation/video4linux/README.cx88 @@ -1,4 +1,3 @@ - cx8800 release notes ==================== @@ -10,21 +9,20 @@ current status video - Basically works. - - Some minor image quality glitches. - - For now only capture, overlay support isn't completed yet. + - For now, only capture and read(). Overlay isn't supported. audio - The chip specs for the on-chip TV sound decoder are next to useless :-/ - Neverless the builtin TV sound decoder starts working now, - at least for PAL-BG. Other TV norms need other code ... + at least for some standards. FOR ANY REPORTS ON THIS PLEASE MENTION THE TV NORM YOU ARE USING. - Most tuner chips do provide mono sound, which may or may not be useable depending on the board design. With the Hauppauge cards it works, so there is mono sound available as fallback. - audio data dma (i.e. recording without loopback cable to the - sound card) should be possible, but there is no code yet ... + sound card) is supported via cx88-alsa. vbi - Code present. Works for NTSC closed caption. PAL and other -- cgit v1.2.3-70-g09d2 From bc13ae11227b06d2397cea1a8eb22fc2ca64e22f Mon Sep 17 00:00:00 2001 From: Patrice Levesque Date: Sun, 2 Nov 2008 16:37:35 -0300 Subject: V4L/DVB (9529): cx88: add a second PCI ID for ATI TV Wonder Pro There's a second PCI identifier for the ATI TV WONDER PRO card (0x1002:0x00f9). Attached is a patch to kernel 2.6.27 that adds autodetection for this version. Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.cx88 | 2 +- drivers/media/video/cx88/cx88-cards.c | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.cx88 b/Documentation/video4linux/CARDLIST.cx88 index a5227e308f4..12e600d7424 100644 --- a/Documentation/video4linux/CARDLIST.cx88 +++ b/Documentation/video4linux/CARDLIST.cx88 @@ -2,7 +2,7 @@ 1 -> Hauppauge WinTV 34xxx models [0070:3400,0070:3401] 2 -> GDI Black Gold [14c7:0106,14c7:0107] 3 -> PixelView [1554:4811] - 4 -> ATI TV Wonder Pro [1002:00f8] + 4 -> ATI TV Wonder Pro [1002:00f8,1002:00f9] 5 -> Leadtek Winfast 2000XP Expert [107d:6611,107d:6613] 6 -> AverTV Studio 303 (M126) [1461:000b] 7 -> MSI TV-@nywhere Master [1462:8606] diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c index 5bcbb4cc7c2..3ead9498c4d 100644 --- a/drivers/media/video/cx88/cx88-cards.c +++ b/drivers/media/video/cx88/cx88-cards.c @@ -1897,7 +1897,11 @@ static const struct cx88_subid cx88_subids[] = { .subvendor = PCI_VENDOR_ID_ATI, .subdevice = 0x00f8, .card = CX88_BOARD_ATI_WONDER_PRO, - },{ + }, { + .subvendor = PCI_VENDOR_ID_ATI, + .subdevice = 0x00f9, + .card = CX88_BOARD_ATI_WONDER_PRO, + }, { .subvendor = 0x107d, .subdevice = 0x6611, .card = CX88_BOARD_WINFAST2000XP_EXPERT, -- cgit v1.2.3-70-g09d2 From 4b29631db33292d416dc395c56122ea865e7635c Mon Sep 17 00:00:00 2001 From: "Igor M. Liplianin" Date: Sun, 9 Nov 2008 15:25:31 -0300 Subject: V4L/DVB (9533): cx88: Add support for TurboSight TBS8910 DVB-S PCI card The card based on stv0299 or stv0288 demodulators. Signed-off-by: Igor M. Liplianin Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.cx88 | 2 ++ drivers/media/video/Makefile | 1 + drivers/media/video/cx88/cx88-cards.c | 34 +++++++++++++++++++++++++++++++++ drivers/media/video/cx88/cx88-dvb.c | 2 ++ drivers/media/video/cx88/cx88.h | 2 ++ firmware/Makefile | 16 ++-------------- sound/i2c/other/tea575x-tuner.c | 22 ++++++++++----------- 7 files changed, 54 insertions(+), 25 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.cx88 b/Documentation/video4linux/CARDLIST.cx88 index 12e600d7424..dc8da94061a 100644 --- a/Documentation/video4linux/CARDLIST.cx88 +++ b/Documentation/video4linux/CARDLIST.cx88 @@ -74,3 +74,5 @@ 73 -> TeVii S420 DVB-S [d420:9022] 74 -> Prolink Pixelview Global Extreme [1554:4976] 75 -> PROF 7300 DVB-S/S2 [B033:3033] + 76 -> SATTRADE ST4200 DVB-S/S2 [b200:4200] + 77 -> TBS 8910 DVB-S [8910:8888] diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile index e5b80137900..53d43f9e017 100644 --- a/drivers/media/video/Makefile +++ b/drivers/media/video/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_VIDEO_IR_I2C) += ir-kbd-i2c.o obj-$(CONFIG_VIDEO_TVAUDIO) += tvaudio.o obj-$(CONFIG_VIDEO_TDA7432) += tda7432.o obj-$(CONFIG_VIDEO_TDA9875) += tda9875.o +obj-$(CONFIG_SOUND_TVMIXER) += tvmixer.o obj-$(CONFIG_VIDEO_SAA6588) += saa6588.o obj-$(CONFIG_VIDEO_SAA5246A) += saa5246a.o diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c index 3ead9498c4d..bbe5b3343ac 100644 --- a/drivers/media/video/cx88/cx88-cards.c +++ b/drivers/media/video/cx88/cx88-cards.c @@ -1847,6 +1847,18 @@ static const struct cx88_board cx88_boards[] = { } }, .mpeg = CX88_MPEG_DVB, }, + [CX88_BOARD_TBS_8910] = { + .name = "TBS 8910 DVB-S", + .tuner_type = UNSET, + .radio_type = UNSET, + .tuner_addr = ADDR_UNSET, + .radio_addr = ADDR_UNSET, + .input = {{ + .type = CX88_VMUX_DVB, + .vmux = 0, + } }, + .mpeg = CX88_MPEG_DVB, + }, [CX88_BOARD_TBS_8920] = { .name = "TBS 8920 DVB-S/S2", .tuner_type = TUNER_ABSENT, @@ -1871,6 +1883,18 @@ static const struct cx88_board cx88_boards[] = { } }, .mpeg = CX88_MPEG_DVB, }, + [CX88_BOARD_SATTRADE_ST4200] = { + .name = "SATTRADE ST4200 DVB-S/S2", + .tuner_type = UNSET, + .radio_type = UNSET, + .tuner_addr = ADDR_UNSET, + .radio_addr = ADDR_UNSET, + .input = {{ + .type = CX88_VMUX_DVB, + .vmux = 0, + } }, + .mpeg = CX88_MPEG_DVB, + }, }; /* ------------------------------------------------------------------ */ @@ -2260,6 +2284,10 @@ static const struct cx88_subid cx88_subids[] = { .subvendor = 0xA044, .subdevice = 0x2011, .card = CX88_BOARD_OMICOM_SS4_PCI, + }, { + .subvendor = 0x8910, + .subdevice = 0x8888, + .card = CX88_BOARD_TBS_8910, }, { .subvendor = 0x8920, .subdevice = 0x8888, @@ -2268,6 +2296,10 @@ static const struct cx88_subid cx88_subids[] = { .subvendor = 0xB033, .subdevice = 0x3033, .card = CX88_BOARD_PROF_7300, + }, { + .subvendor = 0xb200, + .subdevice = 0x4200, + .card = CX88_BOARD_SATTRADE_ST4200, }, }; @@ -2878,8 +2910,10 @@ static void cx88_card_setup(struct cx88_core *core) case CX88_BOARD_TEVII_S420: case CX88_BOARD_TEVII_S460: case CX88_BOARD_OMICOM_SS4_PCI: + case CX88_BOARD_TBS_8910: case CX88_BOARD_TBS_8920: case CX88_BOARD_PROF_7300: + case CX88_BOARD_SATTRADE_ST4200: cx_write(MO_SRST_IO, 0); msleep(100); cx_write(MO_SRST_IO, 1); diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c index 8378d46dc80..bab28112e40 100644 --- a/drivers/media/video/cx88/cx88-dvb.c +++ b/drivers/media/video/cx88/cx88-dvb.c @@ -1042,6 +1042,7 @@ static int dvb_register(struct cx8802_dev *dev) 0x08, ISL6421_DCL, 0x00); } break; + case CX88_BOARD_TBS_8910: case CX88_BOARD_TEVII_S420: fe0->dvb.frontend = dvb_attach(stv0299_attach, &tevii_tuner_sharp_config, @@ -1078,6 +1079,7 @@ static int dvb_register(struct cx8802_dev *dev) case CX88_BOARD_OMICOM_SS4_PCI: case CX88_BOARD_TBS_8920: case CX88_BOARD_PROF_7300: + case CX88_BOARD_SATTRADE_ST4200: fe0->dvb.frontend = dvb_attach(cx24116_attach, &hauppauge_hvr4000_config, &core->i2c_adap); diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h index a11599fa1d8..ae1b89a88bc 100644 --- a/drivers/media/video/cx88/cx88.h +++ b/drivers/media/video/cx88/cx88.h @@ -228,6 +228,8 @@ extern struct sram_channel cx88_sram_channels[]; #define CX88_BOARD_TEVII_S420 73 #define CX88_BOARD_PROLINK_PV_GLOBAL_XTREME 74 #define CX88_BOARD_PROF_7300 75 +#define CX88_BOARD_SATTRADE_ST4200 76 +#define CX88_BOARD_TBS_8910 77 enum cx88_itype { CX88_VMUX_COMPOSITE1 = 1, diff --git a/firmware/Makefile b/firmware/Makefile index 4993a4b3d8a..6968388818b 100644 --- a/firmware/Makefile +++ b/firmware/Makefile @@ -150,27 +150,15 @@ $(patsubst %,$(obj)/%.gen.o, $(fw-external-y)): $(obj)/%.gen.o: $(fwdir)/% $(obj)/%: $(obj)/%.ihex | $(objtree)/$(obj)/$$(dir %) $(call cmd,ihex) -# Don't depend on ihex2fw if we're installing and it already exists. -# Putting it after | in the dependencies doesn't seem sufficient when -# we're installing after a cross-compile, because ihex2fw has dependencies -# on stuff like /usr/lib/gcc/ppc64-redhat-linux/4.3.0/include/stddef.h and -# thus wants to be rebuilt. Which it can't be, if the prebuilt kernel tree -# is exported read-only for someone to run 'make install'. -ifeq ($(INSTALL):$(wildcard $(obj)/ihex2fw),install:$(obj)/ihex2fw) -ihex2fw_dep := -else -ihex2fw_dep := $(obj)/ihex2fw -endif - # .HEX is also Intel HEX, but where the offset and length in each record # is actually meaningful, because the firmware has to be loaded in a certain # order rather than as a single binary blob. Thus, we convert them into our # more compact binary representation of ihex records () -$(obj)/%.fw: $(obj)/%.HEX $(ihex2fw_dep) | $(objtree)/$(obj)/$$(dir %) +$(obj)/%.fw: $(obj)/%.HEX $(obj)/ihex2fw | $(objtree)/$(obj)/$$(dir %) $(call cmd,ihex2fw) # .H16 is our own modified form of Intel HEX, with 16-bit length for records. -$(obj)/%.fw: $(obj)/%.H16 $(ihex2fw_dep) | $(objtree)/$(obj)/$$(dir %) +$(obj)/%.fw: $(obj)/%.H16 $(obj)/ihex2fw | $(objtree)/$(obj)/$$(dir %) $(call cmd,h16tofw) $(firmware-dirs): diff --git a/sound/i2c/other/tea575x-tuner.c b/sound/i2c/other/tea575x-tuner.c index c13a178383b..549b4eba149 100644 --- a/sound/i2c/other/tea575x-tuner.c +++ b/sound/i2c/other/tea575x-tuner.c @@ -18,7 +18,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - */ + */ #include #include @@ -89,7 +89,7 @@ static int snd_tea575x_ioctl(struct inode *inode, struct file *file, { struct snd_tea575x *tea = video_drvdata(file); void __user *arg = (void __user *)data; - + switch(cmd) { case VIDIOCGCAP: { @@ -110,9 +110,9 @@ static int snd_tea575x_ioctl(struct inode *inode, struct file *file, case VIDIOCGTUNER: { struct video_tuner v; - if (copy_from_user(&v, arg,sizeof(v))!=0) + if (copy_from_user(&v, arg,sizeof(v))!=0) return -EFAULT; - if (v.tuner) /* Only 1 tuner */ + if (v.tuner) /* Only 1 tuner */ return -EINVAL; v.rangelow = (87*16000); v.rangehigh = (108*16000); @@ -144,24 +144,24 @@ static int snd_tea575x_ioctl(struct inode *inode, struct file *file, snd_tea575x_set_freq(tea); return 0; case VIDIOCGAUDIO: - { + { struct video_audio v; memset(&v, 0, sizeof(v)); strcpy(v.name, "Radio"); if(copy_to_user(arg,&v, sizeof(v))) return -EFAULT; - return 0; + return 0; } case VIDIOCSAUDIO: { struct video_audio v; - if(copy_from_user(&v, arg, sizeof(v))) - return -EFAULT; + if(copy_from_user(&v, arg, sizeof(v))) + return -EFAULT; if (tea->ops->mute) tea->ops->mute(tea, (v.flags & VIDEO_AUDIO_MUTE) ? 1 : 0); - if(v.audio) + if(v.audio) return -EINVAL; return 0; } @@ -240,11 +240,11 @@ static int __init alsa_tea575x_module_init(void) { return 0; } - + static void __exit alsa_tea575x_module_exit(void) { } - + module_init(alsa_tea575x_module_init) module_exit(alsa_tea575x_module_exit) -- cgit v1.2.3-70-g09d2 From cd3cde1271c6c597c16e4c22810449949c675092 Mon Sep 17 00:00:00 2001 From: "Igor M. Liplianin" Date: Sun, 9 Nov 2008 15:26:25 -0300 Subject: V4L/DVB (9534): cx88: Add support for Prof 6200 DVB-S PCI card The card based on stv0299 or stv0288 demodulators. Signed-off-by: Igor M. Liplianin Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.cx88 | 1 + drivers/media/video/cx88/cx88-cards.c | 17 +++++++++++++++++ drivers/media/video/cx88/cx88-dvb.c | 1 + drivers/media/video/cx88/cx88.h | 1 + 4 files changed, 20 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.cx88 b/Documentation/video4linux/CARDLIST.cx88 index dc8da94061a..0d08f1edcf6 100644 --- a/Documentation/video4linux/CARDLIST.cx88 +++ b/Documentation/video4linux/CARDLIST.cx88 @@ -76,3 +76,4 @@ 75 -> PROF 7300 DVB-S/S2 [B033:3033] 76 -> SATTRADE ST4200 DVB-S/S2 [b200:4200] 77 -> TBS 8910 DVB-S [8910:8888] + 78 -> Prof 6200 DVB-S [b022:3022] diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c index bbe5b3343ac..38bf5ee4d4a 100644 --- a/drivers/media/video/cx88/cx88-cards.c +++ b/drivers/media/video/cx88/cx88-cards.c @@ -1871,6 +1871,18 @@ static const struct cx88_board cx88_boards[] = { } }, .mpeg = CX88_MPEG_DVB, }, + [CX88_BOARD_PROF_6200] = { + .name = "Prof 6200 DVB-S", + .tuner_type = UNSET, + .radio_type = UNSET, + .tuner_addr = ADDR_UNSET, + .radio_addr = ADDR_UNSET, + .input = {{ + .type = CX88_VMUX_DVB, + .vmux = 0, + } }, + .mpeg = CX88_MPEG_DVB, + }, [CX88_BOARD_PROF_7300] = { .name = "PROF 7300 DVB-S/S2", .tuner_type = UNSET, @@ -2292,6 +2304,10 @@ static const struct cx88_subid cx88_subids[] = { .subvendor = 0x8920, .subdevice = 0x8888, .card = CX88_BOARD_TBS_8920, + }, { + .subvendor = 0xb022, + .subdevice = 0x3022, + .card = CX88_BOARD_PROF_6200, }, { .subvendor = 0xB033, .subdevice = 0x3033, @@ -2912,6 +2928,7 @@ static void cx88_card_setup(struct cx88_core *core) case CX88_BOARD_OMICOM_SS4_PCI: case CX88_BOARD_TBS_8910: case CX88_BOARD_TBS_8920: + case CX88_BOARD_PROF_6200: case CX88_BOARD_PROF_7300: case CX88_BOARD_SATTRADE_ST4200: cx_write(MO_SRST_IO, 0); diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c index bab28112e40..3c328a2f28f 100644 --- a/drivers/media/video/cx88/cx88-dvb.c +++ b/drivers/media/video/cx88/cx88-dvb.c @@ -1042,6 +1042,7 @@ static int dvb_register(struct cx8802_dev *dev) 0x08, ISL6421_DCL, 0x00); } break; + case CX88_BOARD_PROF_6200: case CX88_BOARD_TBS_8910: case CX88_BOARD_TEVII_S420: fe0->dvb.frontend = dvb_attach(stv0299_attach, diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h index ae1b89a88bc..20649b25f7b 100644 --- a/drivers/media/video/cx88/cx88.h +++ b/drivers/media/video/cx88/cx88.h @@ -230,6 +230,7 @@ extern struct sram_channel cx88_sram_channels[]; #define CX88_BOARD_PROF_7300 75 #define CX88_BOARD_SATTRADE_ST4200 76 #define CX88_BOARD_TBS_8910 77 +#define CX88_BOARD_PROF_6200 78 enum cx88_itype { CX88_VMUX_COMPOSITE1 = 1, -- cgit v1.2.3-70-g09d2 From 917118745a610765e98621c4a81d7744806e4954 Mon Sep 17 00:00:00 2001 From: Jean-Francois Moine Date: Tue, 28 Oct 2008 08:00:23 -0300 Subject: V4L/DVB (9553): gspca: Webcam 145f:013a added in pac207. Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 1 + drivers/media/video/gspca/pac207.c | 1 + 2 files changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 004818fab04..2a23fe31e0e 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -263,6 +263,7 @@ etoms 102c:6251 Qcam xxxxxx VGA zc3xx 10fd:0128 Typhoon Webshot II USB 300k 0x0128 spca561 10fd:7e50 FlyCam Usb 100 zc3xx 10fd:8050 Typhoon Webshot II USB 300k +pac207 145f:013a Trust WB-1300N spca501 1776:501c Arowana 300K CMOS Camera t613 17a1:0128 TASCORP JPEG Webcam, NGS Cyclops vc032x 17ef:4802 Lenovo Vc0323+MI1310_SOC diff --git a/drivers/media/video/gspca/pac207.c b/drivers/media/video/gspca/pac207.c index 0b0c573d06d..39473e6b914 100644 --- a/drivers/media/video/gspca/pac207.c +++ b/drivers/media/video/gspca/pac207.c @@ -536,6 +536,7 @@ static const __devinitdata struct usb_device_id device_table[] = { {USB_DEVICE(0x093a, 0x2471)}, {USB_DEVICE(0x093a, 0x2472)}, {USB_DEVICE(0x093a, 0x2476)}, + {USB_DEVICE(0x145f, 0x013a)}, {USB_DEVICE(0x2001, 0xf115)}, {} }; -- cgit v1.2.3-70-g09d2 From 864ec0b7a03c8401e6e49f9e480489478ea14cb5 Mon Sep 17 00:00:00 2001 From: Devin Heitmueller Date: Wed, 12 Nov 2008 02:05:28 -0300 Subject: V4L/DVB (9590): Add registration for Pinnacle 80e ATSC tuner Add registration for Pinnacle 80e ATSC tuner Register the em2874 based Pinnacle 80e device. Note that support for this device also requires the new drx-j driver (which is not available yet) Thanks for Ray Lu from Empia for providing the em2874 datasheet. Thanks to Joerg Schindler from Pinnacle for providing sample hardware. Thanks to Rainer Miethling from Pinnacle for providing engineering support. Signed-off-by: Devin Heitmueller Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.em28xx | 1 + drivers/media/video/em28xx/em28xx-cards.c | 34 +++++++++++++++++++++++++++++++ drivers/media/video/em28xx/em28xx.h | 1 + 3 files changed, 36 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index 187cc48d092..ea537f5ae40 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx @@ -57,3 +57,4 @@ 56 -> Pinnacle Hybrid Pro (2) (em2882) [2304:0226] 57 -> Kworld PlusTV HD Hybrid 330 (em2883) [eb1a:a316] 58 -> Compro VideoMate ForYou/Stereo (em2820/em2840) [185b:2041] + 59 -> Pinnacle PCTV HD Mini (em2874) [2304:023f] diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index 791ab2cc7d8..e4251117cbe 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -1087,6 +1087,20 @@ struct em28xx_board em28xx_boards[] = { .amux = EM28XX_AMUX_LINE_IN, } }, }, + [EM2874_BOARD_PINNACLE_PCTV_80E] = { + .name = "Pinnacle PCTV HD Mini", + .vchannels = 0, + .tuner_type = TUNER_ABSENT, + .has_dvb = 1, + .decoder = EM28XX_NODECODER, +#ifdef DJH_DEBUG + .input = { { + .type = EM28XX_VMUX_TELEVISION, + .vmux = TVP5150_COMPOSITE0, + .amux = EM28XX_AMUX_LINE_IN, + } }, +#endif + }, }; const unsigned int em28xx_bcount = ARRAY_SIZE(em28xx_boards); @@ -1180,6 +1194,8 @@ struct usb_device_id em28xx_id_table [] = { .driver_info = EM2882_BOARD_PINNACLE_HYBRID_PRO }, { USB_DEVICE(0x2304, 0x0227), .driver_info = EM2880_BOARD_PINNACLE_PCTV_HD_PRO }, + { USB_DEVICE(0x2304, 0x023f), + .driver_info = EM2874_BOARD_PINNACLE_PCTV_80E }, { USB_DEVICE(0x0413, 0x6023), .driver_info = EM2800_BOARD_LEADTEK_WINFAST_USBII }, { USB_DEVICE(0x093b, 0xa005), @@ -1255,6 +1271,17 @@ static struct em28xx_reg_seq em2882_terratec_hybrid_xs_digital[] = { { -1, -1, -1, -1}, }; +/* Pinnacle PCTV HD Mini (80e) GPIOs + 0-5: not used + 6: demod reset, active low + 7: LED on, active high */ +static struct em28xx_reg_seq em2874_pinnacle_80e_digital[] = { + {EM28XX_R06_I2C_CLK, 0x45, 0xff, 10}, /*400 KHz*/ + {EM2874_R80_GPIO, 0x80, 0xff, 100},/*Demod reset*/ + {EM2874_R80_GPIO, 0xc0, 0xff, 10}, + { -1, -1, -1, -1}, +}; + /* * EEPROM hash table for devices with generic USB IDs */ @@ -1518,6 +1545,13 @@ void em28xx_pre_card_setup(struct em28xx *dev) /* enables audio for that device */ em28xx_write_regs_req(dev, 0x00, 0x08, "\xfd", 1); break; + + case EM2874_BOARD_PINNACLE_PCTV_80E: + /* Set 400 KHz clock */ + em28xx_write_regs(dev, EM28XX_R06_I2C_CLK, "\x45", 1); + + dev->digital_gpio = em2874_pinnacle_80e_digital; + break; } em28xx_gpio_set(dev, dev->tun_analog_gpio); diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h index 7bf08cebb63..1350a9cea7c 100644 --- a/drivers/media/video/em28xx/em28xx.h +++ b/drivers/media/video/em28xx/em28xx.h @@ -97,6 +97,7 @@ #define EM2882_BOARD_PINNACLE_HYBRID_PRO 56 #define EM2883_BOARD_KWORLD_HYBRID_A316 57 #define EM2820_BOARD_COMPRO_VIDEOMATE_FORYOU 58 +#define EM2874_BOARD_PINNACLE_PCTV_80E 59 /* Limits minimum and default number of buffers */ #define EM28XX_MIN_BUF 4 -- cgit v1.2.3-70-g09d2 From 05583625710dfd75880a6cbb68292929d1d4c33c Mon Sep 17 00:00:00 2001 From: Dirk Heer Date: Fri, 21 Nov 2008 19:00:55 -0300 Subject: V4L/DVB (9677): bttv: fix some entries on Phytec boards and add missing ones This Patch does modify the bttv-cards.c and bttc.h so that the driver supports VD-011, VD-012, VD-012-X1 and VD-012-X2 Framegrabber from Phytec Messtechnik GmbH. Signed-off-by: Dirk Heer Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.bttv | 7 ++-- drivers/media/video/bt8xx/bttv-cards.c | 60 +++++++++++++++++++++++++++++---- drivers/media/video/bt8xx/bttv.h | 8 +++-- 3 files changed, 65 insertions(+), 10 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.bttv b/Documentation/video4linux/CARDLIST.bttv index 60ba6683603..0d93fa1ac25 100644 --- a/Documentation/video4linux/CARDLIST.bttv +++ b/Documentation/video4linux/CARDLIST.bttv @@ -104,8 +104,8 @@ 103 -> Grand X-Guard / Trust 814PCI [0304:0102] 104 -> Nebula Electronics DigiTV [0071:0101] 105 -> ProVideo PV143 [aa00:1430,aa00:1431,aa00:1432,aa00:1433,aa03:1433] -106 -> PHYTEC VD-009-X1 MiniDIN (bt878) -107 -> PHYTEC VD-009-X1 Combi (bt878) +106 -> PHYTEC VD-009-X1 VD-011 MiniDIN (bt878) +107 -> PHYTEC VD-009-X1 VD-011 Combi (bt878) 108 -> PHYTEC VD-009 MiniDIN (bt878) 109 -> PHYTEC VD-009 Combi (bt878) 110 -> IVC-100 [ff00:a132] @@ -151,3 +151,6 @@ 150 -> Geovision GV-600 [008a:763c] 151 -> Kozumi KTV-01C 152 -> Encore ENL TV-FM-2 [1000:1801] +153 -> PHYTEC VD-012 (bt878) +154 -> PHYTEC VD-012-X1 (bt878) +155 -> PHYTEC VD-012-X2 (bt878) diff --git a/drivers/media/video/bt8xx/bttv-cards.c b/drivers/media/video/bt8xx/bttv-cards.c index 13742b0bbe3..8629b77666f 100644 --- a/drivers/media/video/bt8xx/bttv-cards.c +++ b/drivers/media/video/bt8xx/bttv-cards.c @@ -2217,9 +2217,9 @@ struct tvcard bttv_tvcards[] = { .tuner_addr = ADDR_UNSET, .radio_addr = ADDR_UNSET, }, - [BTTV_BOARD_VD009X1_MINIDIN] = { + [BTTV_BOARD_VD009X1_VD011_MINIDIN] = { /* M.Klahr@phytec.de */ - .name = "PHYTEC VD-009-X1 MiniDIN (bt878)", + .name = "PHYTEC VD-009-X1 VD-011 MiniDIN (bt878)", .video_inputs = 4, .audio_inputs = 0, .tuner = UNSET, /* card has no tuner */ @@ -2227,14 +2227,14 @@ struct tvcard bttv_tvcards[] = { .gpiomask = 0x00, .muxsel = { 2, 3, 1, 0 }, .gpiomux = { 0, 0, 0, 0 }, /* card has no audio */ - .needs_tvaudio = 1, + .needs_tvaudio = 0, .pll = PLL_28, .tuner_type = UNSET, .tuner_addr = ADDR_UNSET, .radio_addr = ADDR_UNSET, }, - [BTTV_BOARD_VD009X1_COMBI] = { - .name = "PHYTEC VD-009-X1 Combi (bt878)", + [BTTV_BOARD_VD009X1_VD011_COMBI] = { + .name = "PHYTEC VD-009-X1 VD-011 Combi (bt878)", .video_inputs = 4, .audio_inputs = 0, .tuner = UNSET, /* card has no tuner */ @@ -2242,7 +2242,7 @@ struct tvcard bttv_tvcards[] = { .gpiomask = 0x00, .muxsel = { 2, 3, 1, 1 }, .gpiomux = { 0, 0, 0, 0 }, /* card has no audio */ - .needs_tvaudio = 1, + .needs_tvaudio = 0, .pll = PLL_28, .tuner_type = UNSET, .tuner_addr = ADDR_UNSET, @@ -3061,6 +3061,54 @@ struct tvcard bttv_tvcards[] = { .pll = PLL_28, .has_radio = 1, .has_remote = 1, + }, + [BTTV_BOARD_VD012] = { + /* D.Heer@Phytec.de */ + .name = "PHYTEC VD-012 (bt878)", + .video_inputs = 4, + .audio_inputs = 0, + .tuner = UNSET, /* card has no tuner */ + .svhs = UNSET, /* card has no s-video */ + .gpiomask = 0x00, + .muxsel = { 0, 2, 3, 1 }, + .gpiomux = { 0, 0, 0, 0 }, /* card has no audio */ + .needs_tvaudio = 0, + .pll = PLL_28, + .tuner_type = UNSET, + .tuner_addr = ADDR_UNSET, + .radio_addr = ADDR_UNSET, + }, + [BTTV_BOARD_VD012_X1] = { + /* D.Heer@Phytec.de */ + .name = "PHYTEC VD-012-X1 (bt878)", + .video_inputs = 4, + .audio_inputs = 0, + .tuner = UNSET, /* card has no tuner */ + .svhs = 3, + .gpiomask = 0x00, + .muxsel = { 2, 3, 1 }, + .gpiomux = { 0, 0, 0, 0 }, /* card has no audio */ + .needs_tvaudio = 0, + .pll = PLL_28, + .tuner_type = UNSET, + .tuner_addr = ADDR_UNSET, + .radio_addr = ADDR_UNSET, + }, + [BTTV_BOARD_VD012_X2] = { + /* D.Heer@Phytec.de */ + .name = "PHYTEC VD-012-X2 (bt878)", + .video_inputs = 4, + .audio_inputs = 0, + .tuner = UNSET, /* card has no tuner */ + .svhs = 3, + .gpiomask = 0x00, + .muxsel = { 3, 2, 1 }, + .gpiomux = { 0, 0, 0, 0 }, /* card has no audio */ + .needs_tvaudio = 0, + .pll = PLL_28, + .tuner_type = UNSET, + .tuner_addr = ADDR_UNSET, + .radio_addr = ADDR_UNSET, } }; diff --git a/drivers/media/video/bt8xx/bttv.h b/drivers/media/video/bt8xx/bttv.h index bc2c88499ab..529bf6cf634 100644 --- a/drivers/media/video/bt8xx/bttv.h +++ b/drivers/media/video/bt8xx/bttv.h @@ -130,8 +130,8 @@ #define BTTV_BOARD_XGUARD 0x67 #define BTTV_BOARD_NEBULA_DIGITV 0x68 #define BTTV_BOARD_PV143 0x69 -#define BTTV_BOARD_VD009X1_MINIDIN 0x6a -#define BTTV_BOARD_VD009X1_COMBI 0x6b +#define BTTV_BOARD_VD009X1_VD011_MINIDIN 0x6a +#define BTTV_BOARD_VD009X1_VD011_COMBI 0x6b #define BTTV_BOARD_VD009_MINIDIN 0x6c #define BTTV_BOARD_VD009_COMBI 0x6d #define BTTV_BOARD_IVC100 0x6e @@ -177,6 +177,10 @@ #define BTTV_BOARD_GEOVISION_GV600 0x96 #define BTTV_BOARD_KOZUMI_KTV_01C 0x97 #define BTTV_BOARD_ENLTV_FM_2 0x98 +#define BTTV_BOARD_VD012 0x99 +#define BTTV_BOARD_VD012_X1 0x9a +#define BTTV_BOARD_VD012_X2 0x9b + /* more card-specific defines */ #define PT2254_L_CHANNEL 0x10 -- cgit v1.2.3-70-g09d2 From 6e4ae872c580d6a388a52ddfdfc93b4215af1180 Mon Sep 17 00:00:00 2001 From: Jean-Francois Moine Date: Sat, 22 Nov 2008 05:35:49 -0300 Subject: V4L/DVB (9713): gspca: Add the ov534 webcams in the gspca documentation. Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 3 +++ drivers/media/video/gspca/gspca.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 2a23fe31e0e..4c96adc0ba5 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -169,6 +169,8 @@ spca500 06bd:0404 Agfa CL20 spca500 06be:0800 Optimedia sunplus 06d6:0031 Trust 610 LCD PowerC@m Zoom spca506 06e1:a190 ADS Instant VCD +ov534 06f8:3002 Hercules Blog Webcam +ov534 06f8:3003 Hercules Dualpix HD Weblog spca508 0733:0110 ViewQuest VQ110 spca508 0130:0130 Clone Digital Webcam 11043 spca501 0733:0401 Intel Create and Share @@ -263,6 +265,7 @@ etoms 102c:6251 Qcam xxxxxx VGA zc3xx 10fd:0128 Typhoon Webshot II USB 300k 0x0128 spca561 10fd:7e50 FlyCam Usb 100 zc3xx 10fd:8050 Typhoon Webshot II USB 300k +ov534 1415:2000 Sony HD Eye for PS3 (SLEH 00201) pac207 145f:013a Trust WB-1300N spca501 1776:501c Arowana 300K CMOS Camera t613 17a1:0128 TASCORP JPEG Webcam, NGS Cyclops diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c index 0d233c72487..7f8b798deb1 100644 --- a/drivers/media/video/gspca/gspca.c +++ b/drivers/media/video/gspca/gspca.c @@ -1923,6 +1923,9 @@ void gspca_disconnect(struct usb_interface *intf) { struct gspca_dev *gspca_dev = usb_get_intfdata(intf); + gspca_dev->present = 0; + gspca_dev->streaming = 0; + usb_set_intfdata(intf, NULL); /* release the device */ -- cgit v1.2.3-70-g09d2 From 2966af73e70dee461c256b5eb877b2ff757f8c82 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:25:58 -0600 Subject: virtio: use LGUEST_VRING_ALIGN instead of relying on pagesize This doesn't really matter, since lguest is i386 only at the moment, but we could actually choose a different value. (lguest doesn't have a guarenteed ABI). Signed-off-by: Rusty Russell --- Documentation/lguest/lguest.c | 6 +++--- drivers/lguest/lguest_device.c | 2 +- include/linux/lguest_launcher.h | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 804520633fc..aa2574ca94c 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -1030,7 +1030,7 @@ static void update_device_status(struct device *dev) /* Zero out the virtqueues. */ for (vq = dev->vq; vq; vq = vq->next) { memset(vq->vring.desc, 0, - vring_size(vq->config.num, getpagesize())); + vring_size(vq->config.num, LGUEST_VRING_ALIGN)); lg_last_avail(vq) = 0; } } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { @@ -1211,7 +1211,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, void *p; /* First we need some memory for this virtqueue. */ - pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) + pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1) / getpagesize(); p = get_pages(pages); @@ -1228,7 +1228,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, vq->config.pfn = to_guest_phys(p) / getpagesize(); /* Initialize the vring. */ - vring_init(&vq->vring, num_descs, p, getpagesize()); + vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN); /* Append virtqueue to this device's descriptor. We use * device_config() to get the end of the device's current virtqueues; diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index a661bbdae3d..f062dc55c57 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -250,7 +250,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, /* Figure out how many pages the ring will take, and map that memory */ lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, DIV_ROUND_UP(vring_size(lvq->config.num, - PAGE_SIZE), + LGUEST_VRING_ALIGN), PAGE_SIZE)); if (!lvq->pages) { err = -ENOMEM; diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index e7217dc58f3..bd0eba76052 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -59,4 +59,8 @@ enum lguest_req LHREQ_IRQ, /* + irq */ LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ }; + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize for historical reasons. */ +#define LGUEST_VRING_ALIGN 4096 #endif /* _LINUX_LGUEST_LAUNCHER */ -- cgit v1.2.3-70-g09d2 From 58a24566449892dda409b9ad92c2e56c76c5670c Mon Sep 17 00:00:00 2001 From: Matias Zabaljauregui Date: Mon, 29 Sep 2008 01:40:07 -0300 Subject: lguest: move the initial guest page table creation code to the host This patch moves the initial guest page table creation code to the host, so the launcher keeps working with PAE enabled configs. Signed-off-by: Matias Zabaljauregui Signed-off-by: Rusty Russell --- Documentation/lguest/lguest.c | 60 ++++------------------------------ arch/x86/lguest/i386_head.S | 15 --------- drivers/lguest/lg.h | 2 +- drivers/lguest/lguest_user.c | 13 +++----- drivers/lguest/page_tables.c | 72 +++++++++++++++++++++++++++++++++++++++-- include/linux/lguest_launcher.h | 2 +- 6 files changed, 83 insertions(+), 81 deletions(-) (limited to 'Documentation') diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index aa2574ca94c..f2dbbf3bdea 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -481,51 +481,6 @@ static unsigned long load_initrd(const char *name, unsigned long mem) /* We return the initrd size. */ return len; } - -/* Once we know how much memory we have we can construct simple linear page - * tables which set virtual == physical which will get the Guest far enough - * into the boot to create its own. - * - * We lay them out of the way, just below the initrd (which is why we need to - * know its size here). */ -static unsigned long setup_pagetables(unsigned long mem, - unsigned long initrd_size) -{ - unsigned long *pgdir, *linear; - unsigned int mapped_pages, i, linear_pages; - unsigned int ptes_per_page = getpagesize()/sizeof(void *); - - mapped_pages = mem/getpagesize(); - - /* Each PTE page can map ptes_per_page pages: how many do we need? */ - linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; - - /* We put the toplevel page directory page at the top of memory. */ - pgdir = from_guest_phys(mem) - initrd_size - getpagesize(); - - /* Now we use the next linear_pages pages as pte pages */ - linear = (void *)pgdir - linear_pages*getpagesize(); - - /* Linear mapping is easy: put every page's address into the mapping in - * order. PAGE_PRESENT contains the flags Present, Writable and - * Executable. */ - for (i = 0; i < mapped_pages; i++) - linear[i] = ((i * getpagesize()) | PAGE_PRESENT); - - /* The top level points to the linear page table pages above. */ - for (i = 0; i < mapped_pages; i += ptes_per_page) { - pgdir[i/ptes_per_page] - = ((to_guest_phys(linear) + i*sizeof(void *)) - | PAGE_PRESENT); - } - - verbose("Linear mapping of %u pages in %u pte pages at %#lx\n", - mapped_pages, linear_pages, to_guest_phys(linear)); - - /* We return the top level (guest-physical) address: the kernel needs - * to know where it is. */ - return to_guest_phys(pgdir); -} /*:*/ /* Simple routine to roll all the commandline arguments together with spaces @@ -548,13 +503,13 @@ static void concat(char *dst, char *args[]) /*L:185 This is where we actually tell the kernel to initialize the Guest. We * saw the arguments it expects when we looked at initialize() in lguest_user.c: - * the base of Guest "physical" memory, the top physical page to allow, the - * top level pagetable and the entry point for the Guest. */ -static int tell_kernel(unsigned long pgdir, unsigned long start) + * the base of Guest "physical" memory, the top physical page to allow and the + * entry point for the Guest. */ +static int tell_kernel(unsigned long start) { unsigned long args[] = { LHREQ_INITIALIZE, (unsigned long)guest_base, - guest_limit / getpagesize(), pgdir, start }; + guest_limit / getpagesize(), start }; int fd; verbose("Guest: %p - %p (%#lx)\n", @@ -1941,7 +1896,7 @@ int main(int argc, char *argv[]) { /* Memory, top-level pagetable, code startpoint and size of the * (optional) initrd. */ - unsigned long mem = 0, pgdir, start, initrd_size = 0; + unsigned long mem = 0, start, initrd_size = 0; /* Two temporaries and the /dev/lguest file descriptor. */ int i, c, lguest_fd; /* The boot information for the Guest. */ @@ -2040,9 +1995,6 @@ int main(int argc, char *argv[]) boot->hdr.type_of_loader = 0xFF; } - /* Set up the initial linear pagetables, starting below the initrd. */ - pgdir = setup_pagetables(mem, initrd_size); - /* The Linux boot header contains an "E820" memory map: ours is a * simple, single region. */ boot->e820_entries = 1; @@ -2064,7 +2016,7 @@ int main(int argc, char *argv[]) /* We tell the kernel to initialize the Guest: this returns the open * /dev/lguest file descriptor. */ - lguest_fd = tell_kernel(pgdir, start); + lguest_fd = tell_kernel(start); /* We clone off a thread, which wakes the Launcher whenever one of the * input file descriptors needs attention. We call this the Waker, and diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 5c7cef34c9e..10b9bd35a8f 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -30,21 +30,6 @@ ENTRY(lguest_entry) movl $lguest_data - __PAGE_OFFSET, %edx int $LGUEST_TRAP_ENTRY - /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl - * instruction uses %esi implicitly as the source for the copy we're - * about to do. */ - movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi - - /* Copy first 32 entries of page directory to __PAGE_OFFSET entries. - * This means the first 128M of kernel memory will be mapped at - * PAGE_OFFSET where the kernel expects to run. This will get it far - * enough through boot to switch to its own pagetables. */ - movl $32, %ecx - movl %esi, %edi - addl $((__PAGE_OFFSET >> 22) * 4), %edi - rep - movsl - /* Set up the initial stack so we can run C code. */ movl $(init_thread_union+THREAD_SIZE),%esp diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 5faefeaf679..f2c641e0bdd 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -164,7 +164,7 @@ void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt); void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt); /* page_tables.c: */ -int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); +int init_guest_pagetable(struct lguest *lg); void free_guest_pagetable(struct lguest *lg); void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable); void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i); diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index e73a000473c..34bc017b8b3 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -146,7 +146,7 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) return 0; } -/*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit) +/*L:020 The initialization write supplies 3 pointer sized (32 or 64 bit) * values (in addition to the LHREQ_INITIALIZE value). These are: * * base: The start of the Guest-physical memory inside the Launcher memory. @@ -155,9 +155,6 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) * allowed to access. The Guest memory lives inside the Launcher, so it sets * this to ensure the Guest can only reach its own memory. * - * pgdir: The (Guest-physical) address of the top of the initial Guest - * pagetables (which are set up by the Launcher). - * * start: The first instruction to execute ("eip" in x86-speak). */ static int initialize(struct file *file, const unsigned long __user *input) @@ -166,7 +163,7 @@ static int initialize(struct file *file, const unsigned long __user *input) * Guest. */ struct lguest *lg; int err; - unsigned long args[4]; + unsigned long args[3]; /* We grab the Big Lguest lock, which protects against multiple * simultaneous initializations. */ @@ -192,14 +189,14 @@ static int initialize(struct file *file, const unsigned long __user *input) lg->mem_base = (void __user *)args[0]; lg->pfn_limit = args[1]; - /* This is the first cpu (cpu 0) and it will start booting at args[3] */ - err = lg_cpu_start(&lg->cpus[0], 0, args[3]); + /* This is the first cpu (cpu 0) and it will start booting at args[2] */ + err = lg_cpu_start(&lg->cpus[0], 0, args[2]); if (err) goto release_guest; /* Initialize the Guest's shadow page tables, using the toplevel * address the Launcher gave us. This allocates memory, so can fail. */ - err = init_guest_pagetable(lg, args[2]); + err = init_guest_pagetable(lg); if (err) goto free_regs; diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 81d0c605344..576a8318221 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "lg.h" /*M:008 We hold reference to pages, which prevents them from being swapped. @@ -581,15 +582,82 @@ void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx) release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx); } +/* Once we know how much memory we have we can construct simple identity + * (which set virtual == physical) and linear mappings + * which will get the Guest far enough into the boot to create its own. + * + * We lay them out of the way, just below the initrd (which is why we need to + * know its size here). */ +static unsigned long setup_pagetables(struct lguest *lg, + unsigned long mem, + unsigned long initrd_size) +{ + pgd_t __user *pgdir; + pte_t __user *linear; + unsigned int mapped_pages, i, linear_pages, phys_linear; + unsigned long mem_base = (unsigned long)lg->mem_base; + + /* We have mapped_pages frames to map, so we need + * linear_pages page tables to map them. */ + mapped_pages = mem / PAGE_SIZE; + linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE; + + /* We put the toplevel page directory page at the top of memory. */ + pgdir = (pgd_t *)(mem + mem_base - initrd_size - PAGE_SIZE); + + /* Now we use the next linear_pages pages as pte pages */ + linear = (void *)pgdir - linear_pages * PAGE_SIZE; + + /* Linear mapping is easy: put every page's address into the + * mapping in order. */ + for (i = 0; i < mapped_pages; i++) { + pte_t pte; + pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER)); + if (copy_to_user(&linear[i], &pte, sizeof(pte)) != 0) + return -EFAULT; + } + + /* The top level points to the linear page table pages above. + * We setup the identity and linear mappings here. */ + phys_linear = (unsigned long)linear - mem_base; + for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) { + pgd_t pgd; + pgd = __pgd((phys_linear + i * sizeof(pte_t)) | + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); + + if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd)) + || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET) + + i / PTRS_PER_PTE], + &pgd, sizeof(pgd))) + return -EFAULT; + } + + /* We return the top level (guest-physical) address: remember where + * this is. */ + return (unsigned long)pgdir - mem_base; +} + /*H:500 (vii) Setting up the page tables initially. * * When a Guest is first created, the Launcher tells us where the toplevel of * its first page table is. We set some things up here: */ -int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) +int init_guest_pagetable(struct lguest *lg) { + u64 mem; + u32 initrd_size; + struct boot_params __user *boot = (struct boot_params *)lg->mem_base; + + /* Get the Guest memory size and the ramdisk size from the boot header + * located at lg->mem_base (Guest address 0). */ + if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem)) + || get_user(initrd_size, &boot->hdr.ramdisk_size)) + return -EFAULT; + /* We start on the first shadow page table, and give it a blank PGD * page. */ - lg->pgdirs[0].gpgdir = pgtable; + lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size); + if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir)) + return lg->pgdirs[0].gpgdir; lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); if (!lg->pgdirs[0].pgdir) return -ENOMEM; diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index bd0eba76052..a53407a4165 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -54,7 +54,7 @@ struct lguest_vqconfig { /* Write command first word is a request. */ enum lguest_req { - LHREQ_INITIALIZE, /* + base, pfnlimit, pgdir, start */ + LHREQ_INITIALIZE, /* + base, pfnlimit, start */ LHREQ_GETDMA, /* No longer used */ LHREQ_IRQ, /* + irq */ LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ -- cgit v1.2.3-70-g09d2 From 26d5f3a3fe917232cb77e2e3450f7d7f8698259c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 7 Dec 2008 13:19:29 -0300 Subject: V4L/DVB (9772): saa7134: Add support for Kworld Plus TV Analog Lite PCI Thanks to Sistema Fenix (http://www.sistemafenix.com.br/) for sponsoring this development. Signed-off-by: Gilberto Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.saa7134 | 1 + drivers/media/common/ir-keymaps.c | 61 +++++++++++++++++++++++++++++ drivers/media/video/saa7134/saa7134-cards.c | 44 +++++++++++++++++++++ drivers/media/video/saa7134/saa7134-input.c | 14 +++++++ drivers/media/video/saa7134/saa7134.h | 1 + include/media/ir-common.h | 1 + 6 files changed, 122 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134 index dc67eef38ff..dd979dca8af 100644 --- a/Documentation/video4linux/CARDLIST.saa7134 +++ b/Documentation/video4linux/CARDLIST.saa7134 @@ -151,3 +151,4 @@ 150 -> Zogis Real Angel 220 151 -> ADS Tech Instant HDTV [1421:0380] 152 -> Asus Tiger Rev:1.00 [1043:4857] +153 -> Kworld Plus TV Analog Lite PCI [17de:7128] diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c index 3534cdc1f95..d8229a0e9a9 100644 --- a/drivers/media/common/ir-keymaps.c +++ b/drivers/media/common/ir-keymaps.c @@ -2391,6 +2391,67 @@ IR_KEYTAB_TYPE ir_codes_powercolor_real_angel[IR_KEYTAB_SIZE] = { }; EXPORT_SYMBOL_GPL(ir_codes_powercolor_real_angel); +/* Kworld Plus TV Analog Lite PCI IR + Mauro Carvalho Chehab + */ +IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE] = { + [0x0c] = KEY_PROG1, /* Kworld key */ + [0x16] = KEY_CLOSECD, /* -> ) */ + [0x1d] = KEY_POWER2, + + [0x00] = KEY_1, + [0x01] = KEY_2, + [0x02] = KEY_3, /* Two keys have the same code: 3 and left */ + [0x03] = KEY_4, /* Two keys have the same code: 3 and right */ + [0x04] = KEY_5, + [0x05] = KEY_6, + [0x06] = KEY_7, + [0x07] = KEY_8, + [0x08] = KEY_9, + [0x0a] = KEY_0, + + [0x09] = KEY_AGAIN, + [0x14] = KEY_MUTE, + + [0x20] = KEY_UP, + [0x21] = KEY_DOWN, + [0x0b] = KEY_ENTER, + + [0x10] = KEY_CHANNELUP, + [0x11] = KEY_CHANNELDOWN, + + /* Couldn't map key left/key right since those + conflict with '3' and '4' scancodes + I dunno what the original driver does + */ + + [0x13] = KEY_VOLUMEUP, + [0x12] = KEY_VOLUMEDOWN, + + /* The lower part of the IR + There are several duplicated keycodes there. + Most of them conflict with digits. + Add mappings just to the unused scancodes. + Somehow, the original driver has a way to know, + but this doesn't seem to be on some GPIO. + Also, it is not related to the time between keyup + and keydown. + */ + [0x19] = KEY_PAUSE, /* Timeshift */ + [0x1a] = KEY_STOP, + [0x1b] = KEY_RECORD, + + [0x22] = KEY_TEXT, + + [0x15] = KEY_AUDIO, /* ((*)) */ + [0x0f] = KEY_ZOOM, + [0x1c] = KEY_SHUFFLE, /* snapshot */ + + [0x18] = KEY_RED, /* B */ + [0x23] = KEY_GREEN, /* C */ +}; +EXPORT_SYMBOL_GPL(ir_codes_kworld_plus_tv_analog); + IR_KEYTAB_TYPE ir_codes_avermedia_a16d[IR_KEYTAB_SIZE] = { [0x20] = KEY_LIST, [0x00] = KEY_POWER, diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c index 863522899e8..f0b95804e41 100644 --- a/drivers/media/video/saa7134/saa7134-cards.c +++ b/drivers/media/video/saa7134/saa7134-cards.c @@ -4606,6 +4606,43 @@ struct saa7134_board saa7134_boards[] = { .gpio = 0x0200000, }, }, + [SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG] = { + .name = "Kworld Plus TV Analog Lite PCI", + .audio_clock = 0x00187de7, + .tuner_type = TUNER_YMEC_TVF_5533MF, + .radio_type = TUNER_TEA5767, + .tuner_addr = ADDR_UNSET, + .radio_addr = ADDR_UNSET, + .gpiomask = 0x80000700, + .inputs = { { + .name = name_tv, + .vmux = 1, + .amux = LINE2, + .tv = 1, + .gpio = 0x100, + }, { + .name = name_comp1, + .vmux = 3, + .amux = LINE1, + .gpio = 0x200, + }, { + .name = name_svideo, + .vmux = 8, + .amux = LINE1, + .gpio = 0x200, + } }, + .radio = { + .name = name_radio, + .vmux = 1, + .amux = LINE1, + .gpio = 0x100, + }, + .mute = { + .name = name_mute, + .vmux = 8, + .amux = 2, + }, + }, }; const unsigned int saa7134_bcount = ARRAY_SIZE(saa7134_boards); @@ -5652,6 +5689,12 @@ struct pci_device_id saa7134_pci_tbl[] = { .subvendor = 0x1043, .subdevice = 0x4878, /* REV:1.02G */ .driver_data = SAA7134_BOARD_ASUSTeK_TIGER_3IN1, + }, { + .vendor = PCI_VENDOR_ID_PHILIPS, + .device = PCI_DEVICE_ID_PHILIPS_SAA7134, + .subvendor = 0x17de, + .subdevice = 0x7128, + .driver_data = SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG, }, { /* --- boards without eeprom + subsystem ID --- */ .vendor = PCI_VENDOR_ID_PHILIPS, @@ -5880,6 +5923,7 @@ int saa7134_board_init1(struct saa7134_dev *dev) case SAA7134_BOARD_BEHOLD_507_9FM: case SAA7134_BOARD_GENIUS_TVGO_A11MCE: case SAA7134_BOARD_REAL_ANGEL_220: + case SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG: dev->has_remote = SAA7134_REMOTE_GPIO; break; case SAA7134_BOARD_FLYDVBS_LR300: diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c index c53fd5f9f6b..d2124f64e4e 100644 --- a/drivers/media/video/saa7134/saa7134-input.c +++ b/drivers/media/video/saa7134/saa7134-input.c @@ -97,6 +97,15 @@ static int build_key(struct saa7134_dev *dev) dprintk("build_key gpio=0x%x mask=0x%x data=%d\n", gpio, ir->mask_keycode, data); + switch (dev->board) { + case SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG: + if (data == ir->mask_keycode) + ir_input_nokey(ir->dev, &ir->ir); + else + ir_input_keydown(ir->dev, &ir->ir, data, data); + return 0; + } + if (ir->polling) { if ((ir->mask_keydown && (0 != (gpio & ir->mask_keydown))) || (ir->mask_keyup && (0 == (gpio & ir->mask_keyup)))) { @@ -586,6 +595,11 @@ int saa7134_input_init1(struct saa7134_dev *dev) mask_keyup = 0x4000; polling = 50; /* ms */ break; + case SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG: + ir_codes = ir_codes_kworld_plus_tv_analog; + mask_keycode = 0x7f; + polling = 40; /* ms */ + break; } if (NULL == ir_codes) { printk("%s: Oops: IR config error [card=%d]\n", diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h index 9070efe4a4d..f6c1fcc7207 100644 --- a/drivers/media/video/saa7134/saa7134.h +++ b/drivers/media/video/saa7134/saa7134.h @@ -275,6 +275,7 @@ struct saa7134_format { #define SAA7134_BOARD_REAL_ANGEL_220 150 #define SAA7134_BOARD_ADS_INSTANT_HDTV_PCI 151 #define SAA7134_BOARD_ASUSTeK_TIGER 152 +#define SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG 153 #define SAA7134_MAXBOARDS 32 #define SAA7134_INPUT_MAX 8 diff --git a/include/media/ir-common.h b/include/media/ir-common.h index 3a88e13a20e..5bf2ea00678 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -158,6 +158,7 @@ extern IR_KEYTAB_TYPE ir_codes_encore_enltv_fm53[IR_KEYTAB_SIZE]; extern IR_KEYTAB_TYPE ir_codes_real_audio_220_32_keys[IR_KEYTAB_SIZE]; extern IR_KEYTAB_TYPE ir_codes_msi_tvanywhere_plus[IR_KEYTAB_SIZE]; extern IR_KEYTAB_TYPE ir_codes_ati_tv_wonder_hd_600[IR_KEYTAB_SIZE]; +extern IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE]; #endif /* -- cgit v1.2.3-70-g09d2 From f89bc32974a4376e8393001484af28d8c3350ab4 Mon Sep 17 00:00:00 2001 From: Douglas Schilling Landgraf Date: Mon, 1 Dec 2008 21:01:04 -0300 Subject: V4L/DVB (9793): em28xx: Add specific entry for WinTV-HVR 850 Added specific entry for WinTV-HVR 850 Signed-off-by: Douglas Schilling Landgraf Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.em28xx | 3 ++- drivers/media/video/em28xx/em28xx-cards.c | 32 +++++++++++++++++++++++++++++-- drivers/media/video/em28xx/em28xx-dvb.c | 1 + drivers/media/video/em28xx/em28xx.h | 1 + 4 files changed, 34 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index ea537f5ae40..a6734eb7bf7 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx @@ -14,7 +14,7 @@ 13 -> Terratec Prodigy XS (em2880) [0ccd:0047] 14 -> Pixelview Prolink PlayTV USB 2.0 (em2820/em2840) [eb1a:2821] 15 -> V-Gear PocketTV (em2800) - 16 -> Hauppauge WinTV HVR 950 (em2883) [2040:6513,2040:6517,2040:651b,2040:651f] + 16 -> Hauppauge WinTV HVR 950 (em2883) [2040:6513,2040:6517,2040:651b] 17 -> Pinnacle PCTV HD Pro Stick (em2880) [2304:0227] 18 -> Hauppauge WinTV HVR 900 (R2) (em2880) [2040:6502] 19 -> PointNix Intra-Oral Camera (em2860) @@ -58,3 +58,4 @@ 57 -> Kworld PlusTV HD Hybrid 330 (em2883) [eb1a:a316] 58 -> Compro VideoMate ForYou/Stereo (em2820/em2840) [185b:2041] 59 -> Pinnacle PCTV HD Mini (em2874) [2304:023f] + 60 -> Hauppauge WinTV HVR 850 (em2883) [2040:651f] diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index 01757c018b5..745268e1125 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -650,6 +650,32 @@ struct em28xx_board em28xx_boards[] = { .gpio = hauppauge_wintv_hvr_900_analog, } }, }, + [EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850] = { + .name = "Hauppauge WinTV HVR 850", + .tuner_type = TUNER_XC2028, + .tuner_gpio = default_tuner_gpio, + .mts_firmware = 1, + .has_dvb = 1, + .dvb_gpio = hauppauge_wintv_hvr_900_digital, + .ir_codes = ir_codes_hauppauge_new, + .decoder = EM28XX_TVP5150, + .input = { { + .type = EM28XX_VMUX_TELEVISION, + .vmux = TVP5150_COMPOSITE0, + .amux = EM28XX_AMUX_VIDEO, + .gpio = hauppauge_wintv_hvr_900_analog, + }, { + .type = EM28XX_VMUX_COMPOSITE1, + .vmux = TVP5150_COMPOSITE1, + .amux = EM28XX_AMUX_LINE_IN, + .gpio = hauppauge_wintv_hvr_900_analog, + }, { + .type = EM28XX_VMUX_SVIDEO, + .vmux = TVP5150_SVIDEO, + .amux = EM28XX_AMUX_LINE_IN, + .gpio = hauppauge_wintv_hvr_900_analog, + } }, + }, [EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950] = { .name = "Hauppauge WinTV HVR 950", .tuner_type = TUNER_XC2028, @@ -1281,8 +1307,8 @@ struct usb_device_id em28xx_id_table [] = { .driver_info = EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950 }, { USB_DEVICE(0x2040, 0x651b), /* RP HVR-950 */ .driver_info = EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950 }, - { USB_DEVICE(0x2040, 0x651f), /* HCW HVR-850 */ - .driver_info = EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950 }, + { USB_DEVICE(0x2040, 0x651f), + .driver_info = EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850 }, { USB_DEVICE(0x0438, 0xb002), .driver_info = EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600 }, { USB_DEVICE(0x2001, 0xf112), @@ -1502,6 +1528,7 @@ static void em28xx_setup_xc3028(struct em28xx *dev, struct xc2028_ctrl *ctl) ctl->demod = XC3028_FE_DEFAULT; ctl->fname = XC3028L_DEFAULT_FIRMWARE; break; + case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850: case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950: case EM2880_BOARD_PINNACLE_PCTV_HD_PRO: /* FIXME: Better to specify the needed IF */ @@ -1686,6 +1713,7 @@ void em28xx_card_setup(struct em28xx *dev) case EM2820_BOARD_HAUPPAUGE_WINTV_USB_2: case EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900: case EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900_R2: + case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850: case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950: { struct tveeprom tv; diff --git a/drivers/media/video/em28xx/em28xx-dvb.c b/drivers/media/video/em28xx/em28xx-dvb.c index 211156d458f..967bf6859fc 100644 --- a/drivers/media/video/em28xx/em28xx-dvb.c +++ b/drivers/media/video/em28xx/em28xx-dvb.c @@ -409,6 +409,7 @@ static int dvb_init(struct em28xx *dev) em28xx_set_mode(dev, EM28XX_DIGITAL_MODE); /* init frontend */ switch (dev->model) { + case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850: case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950: case EM2880_BOARD_PINNACLE_PCTV_HD_PRO: case EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600: diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h index 64459a16076..54802d2d1bc 100644 --- a/drivers/media/video/em28xx/em28xx.h +++ b/drivers/media/video/em28xx/em28xx.h @@ -98,6 +98,7 @@ #define EM2883_BOARD_KWORLD_HYBRID_A316 57 #define EM2820_BOARD_COMPRO_VIDEOMATE_FORYOU 58 #define EM2874_BOARD_PINNACLE_PCTV_80E 59 +#define EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850 60 /* Limits minimum and default number of buffers */ #define EM28XX_MIN_BUF 4 -- cgit v1.2.3-70-g09d2 From 9bb1b7e879091f09fc677dca10c5e132b68a9da3 Mon Sep 17 00:00:00 2001 From: "Igor M. Liplianin" Date: Sun, 23 Nov 2008 14:11:16 -0300 Subject: V4L/DVB (9795): Add Compro VideoMate E650F (DVB-T part only). Add Compro VideoMate E650F (DVB-T part only). The card based on cx23885 PCI-Express chip, xc3028 tuner and ce6353 demodulator. Cc: Steven Toth Signed-off-by: Igor M. Liplianin Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.cx23885 | 1 + drivers/media/video/cx23885/cx23885-cards.c | 12 ++++++++++++ drivers/media/video/cx23885/cx23885-dvb.c | 1 + drivers/media/video/cx23885/cx23885.h | 1 + 4 files changed, 15 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.cx23885 b/Documentation/video4linux/CARDLIST.cx23885 index 64823ccacd6..35ea130e989 100644 --- a/Documentation/video4linux/CARDLIST.cx23885 +++ b/Documentation/video4linux/CARDLIST.cx23885 @@ -11,3 +11,4 @@ 10 -> DViCO FusionHDTV7 Dual Express [18ac:d618] 11 -> DViCO FusionHDTV DVB-T Dual Express [18ac:db78] 12 -> Leadtek Winfast PxDVR3200 H [107d:6681] + 13 -> Compro VideoMate E650F [185b:e800] diff --git a/drivers/media/video/cx23885/cx23885-cards.c b/drivers/media/video/cx23885/cx23885-cards.c index dac5ccc9ba7..caa098beeec 100644 --- a/drivers/media/video/cx23885/cx23885-cards.c +++ b/drivers/media/video/cx23885/cx23885-cards.c @@ -158,6 +158,10 @@ struct cx23885_board cx23885_boards[] = { .name = "Leadtek Winfast PxDVR3200 H", .portc = CX23885_MPEG_DVB, }, + [CX23885_BOARD_COMPRO_VIDEOMATE_E650F] = { + .name = "Compro VideoMate E650F", + .portc = CX23885_MPEG_DVB, + }, }; const unsigned int cx23885_bcount = ARRAY_SIZE(cx23885_boards); @@ -237,6 +241,10 @@ struct cx23885_subid cx23885_subids[] = { .subvendor = 0x107d, .subdevice = 0x6681, .card = CX23885_BOARD_LEADTEK_WINFAST_PXDVR3200_H, + }, { + .subvendor = 0x185b, + .subdevice = 0xe800, + .card = CX23885_BOARD_COMPRO_VIDEOMATE_E650F, }, }; const unsigned int cx23885_idcount = ARRAY_SIZE(cx23885_subids); @@ -390,6 +398,7 @@ int cx23885_tuner_callback(void *priv, int component, int command, int arg) case CX23885_BOARD_HAUPPAUGE_HVR1500: case CX23885_BOARD_HAUPPAUGE_HVR1500Q: case CX23885_BOARD_LEADTEK_WINFAST_PXDVR3200_H: + case CX23885_BOARD_COMPRO_VIDEOMATE_E650F: /* Tuner Reset Command */ bitmask = 0x04; break; @@ -530,6 +539,7 @@ void cx23885_gpio_setup(struct cx23885_dev *dev) cx_set(GP0_IO, 0x000f000f); break; case CX23885_BOARD_LEADTEK_WINFAST_PXDVR3200_H: + case CX23885_BOARD_COMPRO_VIDEOMATE_E650F: /* GPIO-2 xc3028 tuner reset */ /* The following GPIO's are on the internal AVCore (cx25840) */ @@ -630,6 +640,7 @@ void cx23885_card_setup(struct cx23885_dev *dev) case CX23885_BOARD_HAUPPAUGE_HVR1700: case CX23885_BOARD_HAUPPAUGE_HVR1400: case CX23885_BOARD_LEADTEK_WINFAST_PXDVR3200_H: + case CX23885_BOARD_COMPRO_VIDEOMATE_E650F: default: ts2->gen_ctrl_val = 0xc; /* Serial bus + punctured clock */ ts2->ts_clk_en_val = 0x1; /* Enable TS_CLK */ @@ -644,6 +655,7 @@ void cx23885_card_setup(struct cx23885_dev *dev) case CX23885_BOARD_HAUPPAUGE_HVR1800lp: case CX23885_BOARD_HAUPPAUGE_HVR1700: case CX23885_BOARD_LEADTEK_WINFAST_PXDVR3200_H: + case CX23885_BOARD_COMPRO_VIDEOMATE_E650F: request_module("cx25840"); break; } diff --git a/drivers/media/video/cx23885/cx23885-dvb.c b/drivers/media/video/cx23885/cx23885-dvb.c index e1aac07b315..1c454128a9d 100644 --- a/drivers/media/video/cx23885/cx23885-dvb.c +++ b/drivers/media/video/cx23885/cx23885-dvb.c @@ -502,6 +502,7 @@ static int dvb_register(struct cx23885_tsport *port) break; } case CX23885_BOARD_LEADTEK_WINFAST_PXDVR3200_H: + case CX23885_BOARD_COMPRO_VIDEOMATE_E650F: i2c_bus = &dev->i2c_bus[0]; fe0->dvb.frontend = dvb_attach(zl10353_attach, diff --git a/drivers/media/video/cx23885/cx23885.h b/drivers/media/video/cx23885/cx23885.h index 1d53f54cd94..67828029fc6 100644 --- a/drivers/media/video/cx23885/cx23885.h +++ b/drivers/media/video/cx23885/cx23885.h @@ -66,6 +66,7 @@ #define CX23885_BOARD_DVICO_FUSIONHDTV_7_DUAL_EXP 10 #define CX23885_BOARD_DVICO_FUSIONHDTV_DVB_T_DUAL_EXP 11 #define CX23885_BOARD_LEADTEK_WINFAST_PXDVR3200_H 12 +#define CX23885_BOARD_COMPRO_VIDEOMATE_E650F 13 /* Currently unsupported by the driver: PAL/H, NTSC/Kr, SECAM B/G/H/LC */ #define CX23885_NORMS (\ -- cgit v1.2.3-70-g09d2 From a5525685eeaec8e720323180530181ffe69a24f5 Mon Sep 17 00:00:00 2001 From: Hermann Pitton Date: Fri, 5 Dec 2008 19:49:34 -0300 Subject: V4L/DVB (9798): saa7134: add analog and DVB-T support for Medion/Creatix CTX946 How to enable the mpeg encoder is not found yet. The card comes up with gpio 0x0820000 for DVB-T. Signed-off-by: Hermann Pitton Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.saa7134 | 2 +- drivers/media/video/saa7134/saa7134-cards.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134 index dd979dca8af..335aef4dcae 100644 --- a/Documentation/video4linux/CARDLIST.saa7134 +++ b/Documentation/video4linux/CARDLIST.saa7134 @@ -10,7 +10,7 @@ 9 -> Medion 5044 10 -> Kworld/KuroutoShikou SAA7130-TVPCI 11 -> Terratec Cinergy 600 TV [153b:1143] - 12 -> Medion 7134 [16be:0003] + 12 -> Medion 7134 [16be:0003,16be:5000] 13 -> Typhoon TV+Radio 90031 14 -> ELSA EX-VISION 300TV [1048:226b] 15 -> ELSA EX-VISION 500TV [1048:226a] diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c index f0b95804e41..a2e3f6729c5 100644 --- a/drivers/media/video/saa7134/saa7134-cards.c +++ b/drivers/media/video/saa7134/saa7134-cards.c @@ -4772,6 +4772,12 @@ struct pci_device_id saa7134_pci_tbl[] = { .subdevice = 0x0003, .driver_data = SAA7134_BOARD_MD7134, },{ + .vendor = PCI_VENDOR_ID_PHILIPS, + .device = PCI_DEVICE_ID_PHILIPS_SAA7134, + .subvendor = 0x16be, /* CTX946 analog TV, HW mpeg, DVB-T */ + .subdevice = 0x5000, /* only analog TV and DVB-T for now */ + .driver_data = SAA7134_BOARD_MD7134, + }, { .vendor = PCI_VENDOR_ID_PHILIPS, .device = PCI_DEVICE_ID_PHILIPS_SAA7130, .subvendor = 0x1048, -- cgit v1.2.3-70-g09d2 From b1f1d76ef7cc96541b6a16bff7082e9033f0ba08 Mon Sep 17 00:00:00 2001 From: Patrick Boettcher Date: Wed, 10 Dec 2008 18:27:39 -0300 Subject: V4L/DVB (9812): [PATCH] short help for Technisat cards to select the right configuration This patch adds a short help for Technisat cards to help the user selecting the right configuration for his card(s). Signed-off-by: Uwe Bugla Signed-off-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab --- Documentation/dvb/technisat.txt | 69 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 Documentation/dvb/technisat.txt (limited to 'Documentation') diff --git a/Documentation/dvb/technisat.txt b/Documentation/dvb/technisat.txt new file mode 100644 index 00000000000..cdf6ee4b2da --- /dev/null +++ b/Documentation/dvb/technisat.txt @@ -0,0 +1,69 @@ +How to set up the Technisat devices +=================================== + +1) Find out what device you have +================================ + +First start your linux box with a shipped kernel: +lspci -vvv for a PCI device (lsusb -vvv for an USB device) will show you for example: +02:0b.0 Network controller: Techsan Electronics Co Ltd B2C2 FlexCopII DVB chip / Technisat SkyStar2 DVB card (rev 02) + +dmesg | grep frontend may show you for example: +DVB: registering frontend 0 (Conexant CX24123/CX24109)... + +2) Kernel compilation: +====================== + +If the Technisat is the only TV device in your box get rid of unnecessary modules and check this one: +"Multimedia devices" => "Customise analog and hybrid tuner modules to build" +In this directory uncheck every driver which is activated there. + +Then please activate: +2a) Main module part: + +a.)"Multimedia devices" => "DVB/ATSC adapters" => "Technisat/B2C2 FlexcopII(b) and FlexCopIII adapters" +b.)"Multimedia devices" => "DVB/ATSC adapters" => "Technisat/B2C2 FlexcopII(b) and FlexCopIII adapters" => "Technisat/B2C2 Air/Sky/Cable2PC PCI" in case of a PCI card OR +c.)"Multimedia devices" => "DVB/ATSC adapters" => "Technisat/B2C2 FlexcopII(b) and FlexCopIII adapters" => "Technisat/B2C2 Air/Sky/Cable2PC USB" in case of an USB 1.1 adapter +d.)"Multimedia devices" => "DVB/ATSC adapters" => "Technisat/B2C2 FlexcopII(b) and FlexCopIII adapters" => "Enable debug for the B2C2 FlexCop drivers" +Notice: d.) is helpful for troubleshooting + +2b) Frontend module part: + +1.) Revision 2.3: +a.)"Multimedia devices" => "Customise DVB frontends" => "Customise the frontend modules to build" +b.)"Multimedia devices" => "Customise DVB frontends" => "Zarlink VP310/MT312/ZL10313 based" + +2.) Revision 2.6: +a.)"Multimedia devices" => "Customise DVB frontends" => "Customise the frontend modules to build" +b.)"Multimedia devices" => "Customise DVB frontends" => "ST STV0299 based" + +3.) Revision 2.7: +a.)"Multimedia devices" => "Customise DVB frontends" => "Customise the frontend modules to build" +b.)"Multimedia devices" => "Customise DVB frontends" => "Samsung S5H1420 based" +c.)"Multimedia devices" => "Customise DVB frontends" => "Integrant ITD1000 Zero IF tuner for DVB-S/DSS" +d.)"Multimedia devices" => "Customise DVB frontends" => "ISL6421 SEC controller" + +4.) Revision 2.8: +a.)"Multimedia devices" => "Customise DVB frontends" => "Customise the frontend modules to build" +b.)"Multimedia devices" => "Customise DVB frontends" => "Conexant CX24113/CX24128 tuner for DVB-S/DSS" +c.)"Multimedia devices" => "Customise DVB frontends" => "Conexant CX24123 based" +d.)"Multimedia devices" => "Customise DVB frontends" => "ISL6421 SEC controller" + +5.) DVB-T card: +a.)"Multimedia devices" => "Customise DVB frontends" => "Customise the frontend modules to build" +b.)"Multimedia devices" => "Customise DVB frontends" => "Zarlink MT352 based" + +6.) DVB-C card: +a.)"Multimedia devices" => "Customise DVB frontends" => "Customise the frontend modules to build" +b.)"Multimedia devices" => "Customise DVB frontends" => "ST STV0297 based" + +7.) ATSC card 1st generation: +a.)"Multimedia devices" => "Customise DVB frontends" => "Customise the frontend modules to build" +b.)"Multimedia devices" => "Customise DVB frontends" => "Broadcom BCM3510" + +8.) ATSC card 2nd generation: +a.)"Multimedia devices" => "Customise DVB frontends" => "Customise the frontend modules to build" +b.)"Multimedia devices" => "Customise DVB frontends" => "NxtWave Communications NXT2002/NXT2004 based" +c.)"Multimedia devices" => "Customise DVB frontends" => "LG Electronics LGDT3302/LGDT3303 based" + +Author: Uwe Bugla December 2008 -- cgit v1.2.3-70-g09d2 From 2a1fcdf08230522bd5024f91da24aaa6e8d81f59 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 29 Nov 2008 21:36:58 -0300 Subject: V4L/DVB (9820): v4l2: add v4l2_device and v4l2_subdev structs to the v4l2 framework. Start implementing a proper v4l2 framework as discussed during the Linux Plumbers Conference 2008. Introduces v4l2_device (for device instances) and v4l2_subdev (representing sub-device instances). Signed-off-by: Hans Verkuil Reviewed-by: Laurent Pinchart Reviewed-by: Guennadi Liakhovetski Reviewed-by: Andy Walls Reviewed-by: David Brownell Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/v4l2-framework.txt | 362 +++++++++++++++++++++++++++ drivers/media/video/Makefile | 2 +- drivers/media/video/v4l2-device.c | 86 +++++++ drivers/media/video/v4l2-subdev.c | 108 ++++++++ include/media/v4l2-device.h | 109 ++++++++ include/media/v4l2-subdev.h | 188 ++++++++++++++ 6 files changed, 854 insertions(+), 1 deletion(-) create mode 100644 Documentation/video4linux/v4l2-framework.txt create mode 100644 drivers/media/video/v4l2-device.c create mode 100644 drivers/media/video/v4l2-subdev.c create mode 100644 include/media/v4l2-device.h create mode 100644 include/media/v4l2-subdev.h (limited to 'Documentation') diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt new file mode 100644 index 00000000000..60eaf54e7ef --- /dev/null +++ b/Documentation/video4linux/v4l2-framework.txt @@ -0,0 +1,362 @@ +Overview of the V4L2 driver framework +===================================== + +This text documents the various structures provided by the V4L2 framework and +their relationships. + + +Introduction +------------ + +The V4L2 drivers tend to be very complex due to the complexity of the +hardware: most devices have multiple ICs, export multiple device nodes in +/dev, and create also non-V4L2 devices such as DVB, ALSA, FB, I2C and input +(IR) devices. + +Especially the fact that V4L2 drivers have to setup supporting ICs to +do audio/video muxing/encoding/decoding makes it more complex than most. +Usually these ICs are connected to the main bridge driver through one or +more I2C busses, but other busses can also be used. Such devices are +called 'sub-devices'. + +For a long time the framework was limited to the video_device struct for +creating V4L device nodes and video_buf for handling the video buffers +(note that this document does not discuss the video_buf framework). + +This meant that all drivers had to do the setup of device instances and +connecting to sub-devices themselves. Some of this is quite complicated +to do right and many drivers never did do it correctly. + +There is also a lot of common code that could never be refactored due to +the lack of a framework. + +So this framework sets up the basic building blocks that all drivers +need and this same framework should make it much easier to refactor +common code into utility functions shared by all drivers. + + +Structure of a driver +--------------------- + +All drivers have the following structure: + +1) A struct for each device instance containing the device state. + +2) A way of initializing and commanding sub-devices (if any). + +3) Creating V4L2 device nodes (/dev/videoX, /dev/vbiX, /dev/radioX and + /dev/vtxX) and keeping track of device-node specific data. + +4) Filehandle-specific structs containing per-filehandle data. + +This is a rough schematic of how it all relates: + + device instances + | + +-sub-device instances + | + \-V4L2 device nodes + | + \-filehandle instances + + +Structure of the framework +-------------------------- + +The framework closely resembles the driver structure: it has a v4l2_device +struct for the device instance data, a v4l2_subdev struct to refer to +sub-device instances, the video_device struct stores V4L2 device node data +and in the future a v4l2_fh struct will keep track of filehandle instances +(this is not yet implemented). + + +struct v4l2_device +------------------ + +Each device instance is represented by a struct v4l2_device (v4l2-device.h). +Very simple devices can just allocate this struct, but most of the time you +would embed this struct inside a larger struct. + +You must register the device instance: + + v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev); + +Registration will initialize the v4l2_device struct and link dev->driver_data +to v4l2_dev. Registration will also set v4l2_dev->name to a value derived from +dev (driver name followed by the bus_id, to be precise). You may change the +name after registration if you want. + +You unregister with: + + v4l2_device_unregister(struct v4l2_device *v4l2_dev); + +Unregistering will also automatically unregister all subdevs from the device. + +Sometimes you need to iterate over all devices registered by a specific +driver. This is usually the case if multiple device drivers use the same +hardware. E.g. the ivtvfb driver is a framebuffer driver that uses the ivtv +hardware. The same is true for alsa drivers for example. + +You can iterate over all registered devices as follows: + +static int callback(struct device *dev, void *p) +{ + struct v4l2_device *v4l2_dev = dev_get_drvdata(dev); + + /* test if this device was inited */ + if (v4l2_dev == NULL) + return 0; + ... + return 0; +} + +int iterate(void *p) +{ + struct device_driver *drv; + int err; + + /* Find driver 'ivtv' on the PCI bus. + pci_bus_type is a global. For USB busses use usb_bus_type. */ + drv = driver_find("ivtv", &pci_bus_type); + /* iterate over all ivtv device instances */ + err = driver_for_each_device(drv, NULL, p, callback); + put_driver(drv); + return err; +} + +Sometimes you need to keep a running counter of the device instance. This is +commonly used to map a device instance to an index of a module option array. + +The recommended approach is as follows: + +static atomic_t drv_instance = ATOMIC_INIT(0); + +static int __devinit drv_probe(struct pci_dev *dev, + const struct pci_device_id *pci_id) +{ + ... + state->instance = atomic_inc_return(&drv_instance) - 1; +} + + +struct v4l2_subdev +------------------ + +Many drivers need to communicate with sub-devices. These devices can do all +sort of tasks, but most commonly they handle audio and/or video muxing, +encoding or decoding. For webcams common sub-devices are sensors and camera +controllers. + +Usually these are I2C devices, but not necessarily. In order to provide the +driver with a consistent interface to these sub-devices the v4l2_subdev struct +(v4l2-subdev.h) was created. + +Each sub-device driver must have a v4l2_subdev struct. This struct can be +stand-alone for simple sub-devices or it might be embedded in a larger struct +if more state information needs to be stored. Usually there is a low-level +device struct (e.g. i2c_client) that contains the device data as setup +by the kernel. It is recommended to store that pointer in the private +data of v4l2_subdev using v4l2_set_subdevdata(). That makes it easy to go +from a v4l2_subdev to the actual low-level bus-specific device data. + +You also need a way to go from the low-level struct to v4l2_subdev. For the +common i2c_client struct the i2c_set_clientdata() call is used to store a +v4l2_subdev pointer, for other busses you may have to use other methods. + +From the bridge driver perspective you load the sub-device module and somehow +obtain the v4l2_subdev pointer. For i2c devices this is easy: you call +i2c_get_clientdata(). For other busses something similar needs to be done. +Helper functions exists for sub-devices on an I2C bus that do most of this +tricky work for you. + +Each v4l2_subdev contains function pointers that sub-device drivers can +implement (or leave NULL if it is not applicable). Since sub-devices can do +so many different things and you do not want to end up with a huge ops struct +of which only a handful of ops are commonly implemented, the function pointers +are sorted according to category and each category has its own ops struct. + +The top-level ops struct contains pointers to the category ops structs, which +may be NULL if the subdev driver does not support anything from that category. + +It looks like this: + +struct v4l2_subdev_core_ops { + int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip); + int (*log_status)(struct v4l2_subdev *sd); + int (*init)(struct v4l2_subdev *sd, u32 val); + ... +}; + +struct v4l2_subdev_tuner_ops { + ... +}; + +struct v4l2_subdev_audio_ops { + ... +}; + +struct v4l2_subdev_video_ops { + ... +}; + +struct v4l2_subdev_ops { + const struct v4l2_subdev_core_ops *core; + const struct v4l2_subdev_tuner_ops *tuner; + const struct v4l2_subdev_audio_ops *audio; + const struct v4l2_subdev_video_ops *video; +}; + +The core ops are common to all subdevs, the other categories are implemented +depending on the sub-device. E.g. a video device is unlikely to support the +audio ops and vice versa. + +This setup limits the number of function pointers while still making it easy +to add new ops and categories. + +A sub-device driver initializes the v4l2_subdev struct using: + + v4l2_subdev_init(subdev, &ops); + +Afterwards you need to initialize subdev->name with a unique name and set the +module owner. This is done for you if you use the i2c helper functions. + +A device (bridge) driver needs to register the v4l2_subdev with the +v4l2_device: + + int err = v4l2_device_register_subdev(device, subdev); + +This can fail if the subdev module disappeared before it could be registered. +After this function was called successfully the subdev->dev field points to +the v4l2_device. + +You can unregister a sub-device using: + + v4l2_device_unregister_subdev(subdev); + +Afterwards the subdev module can be unloaded and subdev->dev == NULL. + +You can call an ops function either directly: + + err = subdev->ops->core->g_chip_ident(subdev, &chip); + +but it is better and easier to use this macro: + + err = v4l2_subdev_call(subdev, core, g_chip_ident, &chip); + +The macro will to the right NULL pointer checks and returns -ENODEV if subdev +is NULL, -ENOIOCTLCMD if either subdev->core or subdev->core->g_chip_ident is +NULL, or the actual result of the subdev->ops->core->g_chip_ident ops. + +It is also possible to call all or a subset of the sub-devices: + + v4l2_device_call_all(dev, 0, core, g_chip_ident, &chip); + +Any subdev that does not support this ops is skipped and error results are +ignored. If you want to check for errors use this: + + err = v4l2_device_call_until_err(dev, 0, core, g_chip_ident, &chip); + +Any error except -ENOIOCTLCMD will exit the loop with that error. If no +errors (except -ENOIOCTLCMD) occured, then 0 is returned. + +The second argument to both calls is a group ID. If 0, then all subdevs are +called. If non-zero, then only those whose group ID match that value will +be called. Before a bridge driver registers a subdev it can set subdev->grp_id +to whatever value it wants (it's 0 by default). This value is owned by the +bridge driver and the sub-device driver will never modify or use it. + +The group ID gives the bridge driver more control how callbacks are called. +For example, there may be multiple audio chips on a board, each capable of +changing the volume. But usually only one will actually be used when the +user want to change the volume. You can set the group ID for that subdev to +e.g. AUDIO_CONTROLLER and specify that as the group ID value when calling +v4l2_device_call_all(). That ensures that it will only go to the subdev +that needs it. + +The advantage of using v4l2_subdev is that it is a generic struct and does +not contain any knowledge about the underlying hardware. So a driver might +contain several subdevs that use an I2C bus, but also a subdev that is +controlled through GPIO pins. This distinction is only relevant when setting +up the device, but once the subdev is registered it is completely transparent. + + +I2C sub-device drivers +---------------------- + +Since these drivers are so common, special helper functions are available to +ease the use of these drivers (v4l2-common.h). + +The recommended method of adding v4l2_subdev support to an I2C driver is to +embed the v4l2_subdev struct into the state struct that is created for each +I2C device instance. Very simple devices have no state struct and in that case +you can just create a v4l2_subdev directly. + +A typical state struct would look like this (where 'chipname' is replaced by +the name of the chip): + +struct chipname_state { + struct v4l2_subdev sd; + ... /* additional state fields */ +}; + +Initialize the v4l2_subdev struct as follows: + + v4l2_i2c_subdev_init(&state->sd, client, subdev_ops); + +This function will fill in all the fields of v4l2_subdev and ensure that the +v4l2_subdev and i2c_client both point to one another. + +You should also add a helper inline function to go from a v4l2_subdev pointer +to a chipname_state struct: + +static inline struct chipname_state *to_state(struct v4l2_subdev *sd) +{ + return container_of(sd, struct chipname_state, sd); +} + +Use this to go from the v4l2_subdev struct to the i2c_client struct: + + struct i2c_client *client = v4l2_get_subdevdata(sd); + +And this to go from an i2c_client to a v4l2_subdev struct: + + struct v4l2_subdev *sd = i2c_get_clientdata(client); + +Finally you need to make a command function to make driver->command() +call the right subdev_ops functions: + +static int subdev_command(struct i2c_client *client, unsigned cmd, void *arg) +{ + return v4l2_subdev_command(i2c_get_clientdata(client), cmd, arg); +} + +If driver->command is never used then you can leave this out. Eventually the +driver->command usage should be removed from v4l. + +Make sure to call v4l2_device_unregister_subdev(sd) when the remove() callback +is called. This will unregister the sub-device from the bridge driver. It is +safe to call this even if the sub-device was never registered. + + +The bridge driver also has some helper functions it can use: + +struct v4l2_subdev *sd = v4l2_i2c_new_subdev(adapter, "module_foo", "chipid", 0x36); + +This loads the given module (can be NULL if no module needs to be loaded) and +calls i2c_new_device() with the given i2c_adapter and chip/address arguments. +If all goes well, then it registers the subdev with the v4l2_device. It gets +the v4l2_device by calling i2c_get_adapdata(adapter), so you should make sure +that adapdata is set to v4l2_device when you setup the i2c_adapter in your +driver. + +You can also use v4l2_i2c_new_probed_subdev() which is very similar to +v4l2_i2c_new_subdev(), except that it has an array of possible I2C addresses +that it should probe. Internally it calls i2c_new_probed_device(). + +Both functions return NULL if something went wrong. + + +struct video_device +------------------- + +Not yet documented. diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile index 492ab3dce71..84a2be0cbbe 100644 --- a/drivers/media/video/Makefile +++ b/drivers/media/video/Makefile @@ -10,7 +10,7 @@ stkwebcam-objs := stk-webcam.o stk-sensor.o omap2cam-objs := omap24xxcam.o omap24xxcam-dma.o -videodev-objs := v4l2-dev.o v4l2-ioctl.o +videodev-objs := v4l2-dev.o v4l2-ioctl.o v4l2-device.o v4l2-subdev.o obj-$(CONFIG_VIDEO_DEV) += videodev.o v4l2-compat-ioctl32.o v4l2-int-device.o diff --git a/drivers/media/video/v4l2-device.c b/drivers/media/video/v4l2-device.c new file mode 100644 index 00000000000..9eefde03159 --- /dev/null +++ b/drivers/media/video/v4l2-device.c @@ -0,0 +1,86 @@ +/* + V4L2 device support. + + Copyright (C) 2008 Hans Verkuil + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +int v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev) +{ + if (dev == NULL || v4l2_dev == NULL) + return -EINVAL; + /* Warn if we apparently re-register a device */ + WARN_ON(dev_get_drvdata(dev)); + INIT_LIST_HEAD(&v4l2_dev->subdevs); + spin_lock_init(&v4l2_dev->lock); + v4l2_dev->dev = dev; + snprintf(v4l2_dev->name, sizeof(v4l2_dev->name), "%s %s", + dev->driver->name, dev->bus_id); + dev_set_drvdata(dev, v4l2_dev); + return 0; +} +EXPORT_SYMBOL_GPL(v4l2_device_register); + +void v4l2_device_unregister(struct v4l2_device *v4l2_dev) +{ + struct v4l2_subdev *sd, *next; + + if (v4l2_dev == NULL || v4l2_dev->dev == NULL) + return; + dev_set_drvdata(v4l2_dev->dev, NULL); + /* unregister subdevs */ + list_for_each_entry_safe(sd, next, &v4l2_dev->subdevs, list) + v4l2_device_unregister_subdev(sd); + + v4l2_dev->dev = NULL; +} +EXPORT_SYMBOL_GPL(v4l2_device_unregister); + +int v4l2_device_register_subdev(struct v4l2_device *dev, struct v4l2_subdev *sd) +{ + /* Check for valid input */ + if (dev == NULL || sd == NULL || !sd->name[0]) + return -EINVAL; + /* Warn if we apparently re-register a subdev */ + WARN_ON(sd->dev); + if (!try_module_get(sd->owner)) + return -ENODEV; + sd->dev = dev; + spin_lock(&dev->lock); + list_add_tail(&sd->list, &dev->subdevs); + spin_unlock(&dev->lock); + return 0; +} +EXPORT_SYMBOL_GPL(v4l2_device_register_subdev); + +void v4l2_device_unregister_subdev(struct v4l2_subdev *sd) +{ + /* return if it isn't registered */ + if (sd == NULL || sd->dev == NULL) + return; + spin_lock(&sd->dev->lock); + list_del(&sd->list); + spin_unlock(&sd->dev->lock); + sd->dev = NULL; + module_put(sd->owner); +} +EXPORT_SYMBOL_GPL(v4l2_device_unregister_subdev); diff --git a/drivers/media/video/v4l2-subdev.c b/drivers/media/video/v4l2-subdev.c new file mode 100644 index 00000000000..fe1f01c970a --- /dev/null +++ b/drivers/media/video/v4l2-subdev.c @@ -0,0 +1,108 @@ +/* + V4L2 sub-device support. + + Copyright (C) 2008 Hans Verkuil + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +int v4l2_subdev_command(struct v4l2_subdev *sd, unsigned cmd, void *arg) +{ + switch (cmd) { + case VIDIOC_QUERYCTRL: + return v4l2_subdev_call(sd, core, querymenu, arg); + case VIDIOC_G_CTRL: + return v4l2_subdev_call(sd, core, g_ctrl, arg); + case VIDIOC_S_CTRL: + return v4l2_subdev_call(sd, core, s_ctrl, arg); + case VIDIOC_QUERYMENU: + return v4l2_subdev_call(sd, core, queryctrl, arg); + case VIDIOC_LOG_STATUS: + return v4l2_subdev_call(sd, core, log_status); + case VIDIOC_G_CHIP_IDENT: + return v4l2_subdev_call(sd, core, g_chip_ident, arg); + case VIDIOC_INT_S_STANDBY: + return v4l2_subdev_call(sd, core, s_standby, *(u32 *)arg); + case VIDIOC_INT_RESET: + return v4l2_subdev_call(sd, core, reset, *(u32 *)arg); + case VIDIOC_INT_S_GPIO: + return v4l2_subdev_call(sd, core, s_gpio, *(u32 *)arg); + case VIDIOC_INT_INIT: + return v4l2_subdev_call(sd, core, init, *(u32 *)arg); +#ifdef CONFIG_VIDEO_ADV_DEBUG + case VIDIOC_DBG_G_REGISTER: + return v4l2_subdev_call(sd, core, g_register, arg); + case VIDIOC_DBG_S_REGISTER: + return v4l2_subdev_call(sd, core, s_register, arg); +#endif + + case VIDIOC_INT_S_TUNER_MODE: + return v4l2_subdev_call(sd, tuner, s_mode, *(enum v4l2_tuner_type *)arg); + case AUDC_SET_RADIO: + return v4l2_subdev_call(sd, tuner, s_radio); + case VIDIOC_S_TUNER: + return v4l2_subdev_call(sd, tuner, s_tuner, arg); + case VIDIOC_G_TUNER: + return v4l2_subdev_call(sd, tuner, g_tuner, arg); + case VIDIOC_S_STD: + return v4l2_subdev_call(sd, tuner, s_std, *(v4l2_std_id *)arg); + case VIDIOC_S_FREQUENCY: + return v4l2_subdev_call(sd, tuner, s_frequency, arg); + case VIDIOC_G_FREQUENCY: + return v4l2_subdev_call(sd, tuner, g_frequency, arg); + case TUNER_SET_TYPE_ADDR: + return v4l2_subdev_call(sd, tuner, s_type_addr, arg); + case TUNER_SET_CONFIG: + return v4l2_subdev_call(sd, tuner, s_config, arg); + + case VIDIOC_INT_AUDIO_CLOCK_FREQ: + return v4l2_subdev_call(sd, audio, s_clock_freq, *(u32 *)arg); + case VIDIOC_INT_S_AUDIO_ROUTING: + return v4l2_subdev_call(sd, audio, s_routing, arg); + case VIDIOC_INT_I2S_CLOCK_FREQ: + return v4l2_subdev_call(sd, audio, s_i2s_clock_freq, *(u32 *)arg); + + case VIDIOC_INT_S_VIDEO_ROUTING: + return v4l2_subdev_call(sd, video, s_routing, arg); + case VIDIOC_INT_S_CRYSTAL_FREQ: + return v4l2_subdev_call(sd, video, s_crystal_freq, arg); + case VIDIOC_INT_DECODE_VBI_LINE: + return v4l2_subdev_call(sd, video, decode_vbi_line, arg); + case VIDIOC_INT_S_VBI_DATA: + return v4l2_subdev_call(sd, video, s_vbi_data, arg); + case VIDIOC_INT_G_VBI_DATA: + return v4l2_subdev_call(sd, video, g_vbi_data, arg); + case VIDIOC_S_FMT: + return v4l2_subdev_call(sd, video, s_fmt, arg); + case VIDIOC_G_FMT: + return v4l2_subdev_call(sd, video, g_fmt, arg); + case VIDIOC_INT_S_STD_OUTPUT: + return v4l2_subdev_call(sd, video, s_std_output, *(v4l2_std_id *)arg); + case VIDIOC_STREAMON: + return v4l2_subdev_call(sd, video, s_stream, 1); + case VIDIOC_STREAMOFF: + return v4l2_subdev_call(sd, video, s_stream, 0); + + default: + return v4l2_subdev_call(sd, core, ioctl, cmd, arg); + } +} +EXPORT_SYMBOL_GPL(v4l2_subdev_command); diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h new file mode 100644 index 00000000000..97b283a0428 --- /dev/null +++ b/include/media/v4l2-device.h @@ -0,0 +1,109 @@ +/* + V4L2 device support header. + + Copyright (C) 2008 Hans Verkuil + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _V4L2_DEVICE_H +#define _V4L2_DEVICE_H + +#include + +/* Each instance of a V4L2 device should create the v4l2_device struct, + either stand-alone or embedded in a larger struct. + + It allows easy access to sub-devices (see v4l2-subdev.h) and provides + basic V4L2 device-level support. + */ + +#define V4L2_DEVICE_NAME_SIZE (BUS_ID_SIZE + 16) + +struct v4l2_device { + /* dev->driver_data points to this struct */ + struct device *dev; + /* used to keep track of the registered subdevs */ + struct list_head subdevs; + /* lock this struct; can be used by the driver as well if this + struct is embedded into a larger struct. */ + spinlock_t lock; + /* unique device name, by default the driver name + bus ID */ + char name[V4L2_DEVICE_NAME_SIZE]; +}; + +/* Initialize v4l2_dev and make dev->driver_data point to v4l2_dev */ +int __must_check v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev); +/* Set v4l2_dev->dev->driver_data to NULL and unregister all sub-devices */ +void v4l2_device_unregister(struct v4l2_device *v4l2_dev); + +/* Register a subdev with a v4l2 device. While registered the subdev module + is marked as in-use. An error is returned if the module is no longer + loaded when you attempt to register it. */ +int __must_check v4l2_device_register_subdev(struct v4l2_device *dev, struct v4l2_subdev *sd); +/* Unregister a subdev with a v4l2 device. Can also be called if the subdev + wasn't registered. In that case it will do nothing. */ +void v4l2_device_unregister_subdev(struct v4l2_subdev *sd); + +/* Iterate over all subdevs. */ +#define v4l2_device_for_each_subdev(sd, dev) \ + list_for_each_entry(sd, &(dev)->subdevs, list) + +/* Call the specified callback for all subdevs matching the condition. + Ignore any errors. Note that you cannot add or delete a subdev + while walking the subdevs list. */ +#define __v4l2_device_call_subdevs(dev, cond, o, f, args...) \ + do { \ + struct v4l2_subdev *sd; \ + \ + list_for_each_entry(sd, &(dev)->subdevs, list) \ + if ((cond) && sd->ops->o && sd->ops->o->f) \ + sd->ops->o->f(sd , ##args); \ + } while (0) + +/* Call the specified callback for all subdevs matching the condition. + If the callback returns an error other than 0 or -ENOIOCTLCMD, then + return with that error code. Note that you cannot add or delete a + subdev while walking the subdevs list. */ +#define __v4l2_device_call_subdevs_until_err(dev, cond, o, f, args...) \ +({ \ + struct v4l2_subdev *sd; \ + int err = 0; \ + \ + list_for_each_entry(sd, &(dev)->subdevs, list) { \ + if ((cond) && sd->ops->o && sd->ops->o->f) \ + err = sd->ops->o->f(sd , ##args); \ + if (err && err != -ENOIOCTLCMD) \ + break; \ + } \ + (err == -ENOIOCTLCMD) ? 0 : err; \ +}) + +/* Call the specified callback for all subdevs matching grp_id (if 0, then + match them all). Ignore any errors. Note that you cannot add or delete + a subdev while walking the subdevs list. */ +#define v4l2_device_call_all(dev, grp_id, o, f, args...) \ + __v4l2_device_call_subdevs(dev, \ + !(grp_id) || sd->grp_id == (grp_id), o, f , ##args) + +/* Call the specified callback for all subdevs matching grp_id (if 0, then + match them all). If the callback returns an error other than 0 or + -ENOIOCTLCMD, then return with that error code. Note that you cannot + add or delete a subdev while walking the subdevs list. */ +#define v4l2_device_call_until_err(dev, grp_id, o, f, args...) \ + __v4l2_device_call_subdevs_until_err(dev, \ + !(grp_id) || sd->grp_id == (grp_id), o, f , ##args) + +#endif diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h new file mode 100644 index 00000000000..bc9e0fbf282 --- /dev/null +++ b/include/media/v4l2-subdev.h @@ -0,0 +1,188 @@ +/* + V4L2 sub-device support header. + + Copyright (C) 2008 Hans Verkuil + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _V4L2_SUBDEV_H +#define _V4L2_SUBDEV_H + +#include + +struct v4l2_device; +struct v4l2_subdev; +struct tuner_setup; + +/* Sub-devices are devices that are connected somehow to the main bridge + device. These devices are usually audio/video muxers/encoders/decoders or + sensors and webcam controllers. + + Usually these devices are controlled through an i2c bus, but other busses + may also be used. + + The v4l2_subdev struct provides a way of accessing these devices in a + generic manner. Most operations that these sub-devices support fall in + a few categories: core ops, audio ops, video ops and tuner ops. + + More categories can be added if needed, although this should remain a + limited set (no more than approx. 8 categories). + + Each category has its own set of ops that subdev drivers can implement. + + A subdev driver can leave the pointer to the category ops NULL if + it does not implement them (e.g. an audio subdev will generally not + implement the video category ops). The exception is the core category: + this must always be present. + + These ops are all used internally so it is no problem to change, remove + or add ops or move ops from one to another category. Currently these + ops are based on the original ioctls, but since ops are not limited to + one argument there is room for improvement here once all i2c subdev + drivers are converted to use these ops. + */ + +/* Core ops: it is highly recommended to implement at least these ops: + + g_chip_ident + log_status + g_register + s_register + + This provides basic debugging support. + + The ioctl ops is meant for generic ioctl-like commands. Depending on + the use-case it might be better to use subdev-specific ops (currently + not yet implemented) since ops provide proper type-checking. + */ +struct v4l2_subdev_core_ops { + int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip); + int (*log_status)(struct v4l2_subdev *sd); + int (*init)(struct v4l2_subdev *sd, u32 val); + int (*s_standby)(struct v4l2_subdev *sd, u32 standby); + int (*reset)(struct v4l2_subdev *sd, u32 val); + int (*s_gpio)(struct v4l2_subdev *sd, u32 val); + int (*queryctrl)(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc); + int (*g_ctrl)(struct v4l2_subdev *sd, struct v4l2_control *ctrl); + int (*s_ctrl)(struct v4l2_subdev *sd, struct v4l2_control *ctrl); + int (*querymenu)(struct v4l2_subdev *sd, struct v4l2_querymenu *qm); + int (*ioctl)(struct v4l2_subdev *sd, int cmd, void *arg); +#ifdef CONFIG_VIDEO_ADV_DEBUG + int (*g_register)(struct v4l2_subdev *sd, struct v4l2_register *reg); + int (*s_register)(struct v4l2_subdev *sd, struct v4l2_register *reg); +#endif +}; + +struct v4l2_subdev_tuner_ops { + int (*s_mode)(struct v4l2_subdev *sd, enum v4l2_tuner_type); + int (*s_radio)(struct v4l2_subdev *sd); + int (*s_frequency)(struct v4l2_subdev *sd, struct v4l2_frequency *freq); + int (*g_frequency)(struct v4l2_subdev *sd, struct v4l2_frequency *freq); + int (*g_tuner)(struct v4l2_subdev *sd, struct v4l2_tuner *vt); + int (*s_tuner)(struct v4l2_subdev *sd, struct v4l2_tuner *vt); + int (*s_std)(struct v4l2_subdev *sd, v4l2_std_id norm); + int (*s_type_addr)(struct v4l2_subdev *sd, struct tuner_setup *type); + int (*s_config)(struct v4l2_subdev *sd, const struct v4l2_priv_tun_config *config); +}; + +struct v4l2_subdev_audio_ops { + int (*s_clock_freq)(struct v4l2_subdev *sd, u32 freq); + int (*s_i2s_clock_freq)(struct v4l2_subdev *sd, u32 freq); + int (*s_routing)(struct v4l2_subdev *sd, const struct v4l2_routing *route); +}; + +struct v4l2_subdev_video_ops { + int (*s_routing)(struct v4l2_subdev *sd, const struct v4l2_routing *route); + int (*s_crystal_freq)(struct v4l2_subdev *sd, struct v4l2_crystal_freq *freq); + int (*decode_vbi_line)(struct v4l2_subdev *sd, struct v4l2_decode_vbi_line *vbi_line); + int (*s_vbi_data)(struct v4l2_subdev *sd, const struct v4l2_sliced_vbi_data *vbi_data); + int (*g_vbi_data)(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_data *vbi_data); + int (*s_std_output)(struct v4l2_subdev *sd, v4l2_std_id std); + int (*s_stream)(struct v4l2_subdev *sd, int enable); + int (*s_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt); + int (*g_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt); +}; + +struct v4l2_subdev_ops { + const struct v4l2_subdev_core_ops *core; + const struct v4l2_subdev_tuner_ops *tuner; + const struct v4l2_subdev_audio_ops *audio; + const struct v4l2_subdev_video_ops *video; +}; + +#define V4L2_SUBDEV_NAME_SIZE 32 + +/* Each instance of a subdev driver should create this struct, either + stand-alone or embedded in a larger struct. + */ +struct v4l2_subdev { + struct list_head list; + struct module *owner; + struct v4l2_device *dev; + const struct v4l2_subdev_ops *ops; + /* name must be unique */ + char name[V4L2_SUBDEV_NAME_SIZE]; + /* can be used to group similar subdevs, value is driver-specific */ + u32 grp_id; + /* pointer to private data */ + void *priv; +}; + +static inline void v4l2_set_subdevdata(struct v4l2_subdev *sd, void *p) +{ + sd->priv = p; +} + +static inline void *v4l2_get_subdevdata(const struct v4l2_subdev *sd) +{ + return sd->priv; +} + +/* Convert an ioctl-type command to the proper v4l2_subdev_ops function call. + This is used by subdev modules that can be called by both old-style ioctl + commands and through the v4l2_subdev_ops. + + The ioctl API of the subdev driver can call this function to call the + right ops based on the ioctl cmd and arg. + + Once all subdev drivers have been converted and all drivers no longer + use the ioctl interface, then this function can be removed. + */ +int v4l2_subdev_command(struct v4l2_subdev *sd, unsigned cmd, void *arg); + +static inline void v4l2_subdev_init(struct v4l2_subdev *sd, + const struct v4l2_subdev_ops *ops) +{ + INIT_LIST_HEAD(&sd->list); + /* ops->core MUST be set */ + BUG_ON(!ops || !ops->core); + sd->ops = ops; + sd->dev = NULL; + sd->name[0] = '\0'; + sd->grp_id = 0; + sd->priv = NULL; +} + +/* Call an ops of a v4l2_subdev, doing the right checks against + NULL pointers. + + Example: err = v4l2_subdev_call(sd, core, g_chip_ident, &chip); + */ +#define v4l2_subdev_call(sd, o, f, args...) \ + (!(sd) ? -ENODEV : (((sd) && (sd)->ops->o && (sd)->ops->o->f) ? \ + (sd)->ops->o->f((sd) , ##args) : -ENOIOCTLCMD)) + +#endif -- cgit v1.2.3-70-g09d2 From 3319dc98a742d445a660268a6ce3426ad0922e2a Mon Sep 17 00:00:00 2001 From: Jean-Francois Moine Date: Mon, 1 Dec 2008 14:44:02 -0300 Subject: V4L/DVB (9848): gspca: Webcam 06f8:3004 added in sonixj. Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 1 + drivers/media/video/gspca/sonixj.c | 1 + 2 files changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 4c96adc0ba5..8d0210b2403 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -171,6 +171,7 @@ sunplus 06d6:0031 Trust 610 LCD PowerC@m Zoom spca506 06e1:a190 ADS Instant VCD ov534 06f8:3002 Hercules Blog Webcam ov534 06f8:3003 Hercules Dualpix HD Weblog +sonixj 06f8:3004 Hercules Classic Silver spca508 0733:0110 ViewQuest VQ110 spca508 0130:0130 Clone Digital Webcam 11043 spca501 0733:0401 Intel Create and Share diff --git a/drivers/media/video/gspca/sonixj.c b/drivers/media/video/gspca/sonixj.c index 504976960ce..698f200df23 100644 --- a/drivers/media/video/gspca/sonixj.c +++ b/drivers/media/video/gspca/sonixj.c @@ -1730,6 +1730,7 @@ static const __devinitdata struct usb_device_id device_table[] = { #endif {USB_DEVICE(0x0471, 0x0328), BSI(SN9C105, MI0360, 0x5d)}, {USB_DEVICE(0x0471, 0x0330), BSI(SN9C105, MI0360, 0x5d)}, + {USB_DEVICE(0x06f8, 0x3004), BSI(SN9C105, OV7660, 0x21)}, {USB_DEVICE(0x0c45, 0x6040), BSI(SN9C102P, HV7131R, 0x11)}, /* bw600.inf: {USB_DEVICE(0x0c45, 0x6040), BSI(SN9C102P, MI0360, 0x5d)}, */ -- cgit v1.2.3-70-g09d2 From d7bb7317d4caca55de72e5bd2229d68ed7cce7af Mon Sep 17 00:00:00 2001 From: Jean-Francois Moine Date: Tue, 2 Dec 2008 06:56:47 -0300 Subject: V4L/DVB (9849): gspca: Add the webcam 0c45:613a in the gspca documentation. Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 8d0210b2403..acf6c9f8e76 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -257,6 +257,7 @@ sonixj 0c45:612a Avant Camera sonixj 0c45:612c Typhoon Rasy Cam 1.3MPix sonixj 0c45:6130 Sonix Pccam sonixj 0c45:6138 Sn9c120 Mo4000 +sonixj 0c45:613a Microdia Sonix PC Camera sonixj 0c45:613b Surfer SN-206 sonixj 0c45:613c Sonix Pccam168 sonixj 0c45:6143 Sonix Pccam168 -- cgit v1.2.3-70-g09d2 From 121520a7084b48cb26437c6e89d4b491c3e4d4d5 Mon Sep 17 00:00:00 2001 From: Jean-Francois Moine Date: Wed, 3 Dec 2008 07:19:22 -0300 Subject: V4L/DVB (9853): gspca: Webcam 093a:2622 added in pac7311. Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 1 + drivers/media/video/gspca/pac7311.c | 1 + 2 files changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index acf6c9f8e76..72d31bd649c 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -217,6 +217,7 @@ pac7311 093a:2608 Trust WB-3300p pac7311 093a:260e Gigaware VGA PC Camera, Trust WB-3350p, SIGMA cam 2350 pac7311 093a:260f SnakeCam pac7311 093a:2621 PAC731x +pac7311 093a:2622 Genius Eye 312 pac7311 093a:2624 PAC7302 pac7311 093a:2626 Labtec 2200 pac7311 093a:262a Webcam 300k diff --git a/drivers/media/video/gspca/pac7311.c b/drivers/media/video/gspca/pac7311.c index fbd45e235d9..80c5975c8fe 100644 --- a/drivers/media/video/gspca/pac7311.c +++ b/drivers/media/video/gspca/pac7311.c @@ -1065,6 +1065,7 @@ static __devinitdata struct usb_device_id device_table[] = { {USB_DEVICE(0x093a, 0x260e), .driver_info = SENSOR_PAC7311}, {USB_DEVICE(0x093a, 0x260f), .driver_info = SENSOR_PAC7311}, {USB_DEVICE(0x093a, 0x2621), .driver_info = SENSOR_PAC7302}, + {USB_DEVICE(0x093a, 0x2622), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x2624), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x2626), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x262a), .driver_info = SENSOR_PAC7302}, -- cgit v1.2.3-70-g09d2 From a9da98a4336df020e5f089b963295373c6c3e6b9 Mon Sep 17 00:00:00 2001 From: Jean-Francois Moine Date: Wed, 3 Dec 2008 07:29:26 -0300 Subject: V4L/DVB (9854): gspca: Add the webcam 0c45:60fe in the gspca documentation. Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 72d31bd649c..a38c87246da 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -253,6 +253,7 @@ sonixj 0c45:60c0 Sangha Sn535 sonixj 0c45:60ec SN9C105+MO4000 sonixj 0c45:60fb Surfer NoName sonixj 0c45:60fc LG-LIC300 +sonixj 0c45:60fe Microdia Audio sonixj 0c45:6128 Microdia/Sonix SNP325 sonixj 0c45:612a Avant Camera sonixj 0c45:612c Typhoon Rasy Cam 1.3MPix -- cgit v1.2.3-70-g09d2 From 8852153548b31abb99c1c0772d03f92054f1f80d Mon Sep 17 00:00:00 2001 From: Jean-Francois Moine Date: Wed, 10 Dec 2008 05:06:13 -0300 Subject: V4L/DVB (9870): gspca - vc032x: Webcam 15b8:6002 and sensor po1200 added. Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 1 + drivers/media/video/gspca/vc032x.c | 319 +++++++++++++++++++++++++++++++++++- 2 files changed, 312 insertions(+), 8 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index a38c87246da..81d7d891c15 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -271,6 +271,7 @@ spca561 10fd:7e50 FlyCam Usb 100 zc3xx 10fd:8050 Typhoon Webshot II USB 300k ov534 1415:2000 Sony HD Eye for PS3 (SLEH 00201) pac207 145f:013a Trust WB-1300N +vc032x 15b8:6002 HP 2.0 Megapixel rz406aa spca501 1776:501c Arowana 300K CMOS Camera t613 17a1:0128 TASCORP JPEG Webcam, NGS Cyclops vc032x 17ef:4802 Lenovo Vc0323+MI1310_SOC diff --git a/drivers/media/video/gspca/vc032x.c b/drivers/media/video/gspca/vc032x.c index 9ac66c9b558..b1ab37e1222 100644 --- a/drivers/media/video/gspca/vc032x.c +++ b/drivers/media/video/gspca/vc032x.c @@ -47,7 +47,8 @@ struct sd { #define SENSOR_MI1310_SOC 3 #define SENSOR_OV7660 4 #define SENSOR_OV7670 5 -#define SENSOR_PO3130NC 6 +#define SENSOR_PO1200 6 +#define SENSOR_PO3130NC 7 }; /* V4L2 controls supported by the driver */ @@ -132,6 +133,14 @@ static struct v4l2_pix_format vc0323_mode[] = { .priv = 0}, }; +static struct v4l2_pix_format svga_mode[] = { + {800, 600, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, + .bytesperline = 800, + .sizeimage = 800 * 600 * 1 / 4 + 590, + .colorspace = V4L2_COLORSPACE_JPEG, + .priv = 0}, +}; + /* OV7660/7670 registers */ #define OV7660_REG_MVFP 0x1e #define OV7660_MVFP_MIRROR 0x20 @@ -1466,6 +1475,281 @@ static const __u8 ov7670_initQVGA_JPG[][4] = { {0x00, 0x77, 0x05, 0xaa }, {}, }; + +/* PO1200 - values from usbvm326.inf and ms-win trace */ +static const __u8 po1200_gamma[17] = { + 0x00, 0x13, 0x38, 0x59, 0x79, 0x92, 0xa7, 0xb9, 0xc8, + 0xd4, 0xdf, 0xe7, 0xee, 0xf4, 0xf9, 0xfc, 0xff +}; +static const __u8 po1200_matrix[9] = { + 0x60, 0xf9, 0xe5, 0xe7, 0x50, 0x05, 0xf3, 0xe6, 0x5e +}; +static const __u8 po1200_initVGA_data[][4] = { + {0xb0, 0x03, 0x19, 0xcc}, /* reset? */ + {0xb0, 0x03, 0x19, 0xcc}, +/* {0x00, 0x00, 0x33, 0xdd}, */ + {0xb0, 0x04, 0x02, 0xcc}, + {0xb0, 0x02, 0x02, 0xcc}, + {0xb3, 0x5d, 0x00, 0xcc}, + {0xb3, 0x01, 0x01, 0xcc}, + {0xb3, 0x00, 0x64, 0xcc}, + {0xb3, 0x00, 0x65, 0xcc}, + {0xb3, 0x05, 0x01, 0xcc}, + {0xb3, 0x06, 0x01, 0xcc}, + {0xb3, 0x5c, 0x01, 0xcc}, + {0xb3, 0x08, 0x01, 0xcc}, + {0xb3, 0x09, 0x0c, 0xcc}, + {0xb3, 0x00, 0x67, 0xcc}, + {0xb3, 0x02, 0xb2, 0xcc}, + {0xb3, 0x03, 0x18, 0xcc}, + {0xb3, 0x04, 0x15, 0xcc}, + {0xb3, 0x20, 0x00, 0xcc}, + {0xb3, 0x21, 0x00, 0xcc}, + {0xb3, 0x22, 0x02, 0xcc}, + {0xb3, 0x23, 0x58, 0xcc}, + {0xb3, 0x14, 0x00, 0xcc}, + {0xb3, 0x15, 0x00, 0xcc}, + {0xb3, 0x16, 0x03, 0xcc}, + {0xb3, 0x17, 0x1f, 0xcc}, + {0xbc, 0x00, 0x71, 0xcc}, + {0xbc, 0x01, 0x01, 0xcc}, + {0xb0, 0x54, 0x13, 0xcc}, + {0xb3, 0x00, 0x67, 0xcc}, + {0xb3, 0x34, 0x01, 0xcc}, + {0xb3, 0x35, 0xdc, 0xcc}, + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0x12, 0x05, 0xaa}, + {0x00, 0x13, 0x02, 0xaa}, + {0x00, 0x1e, 0xc6, 0xaa}, + {0x00, 0x21, 0x00, 0xaa}, + {0x00, 0x25, 0x02, 0xaa}, + {0x00, 0x3c, 0x4f, 0xaa}, + {0x00, 0x3f, 0xe0, 0xaa}, + {0x00, 0x42, 0xff, 0xaa}, + {0x00, 0x45, 0x34, 0xaa}, + {0x00, 0x55, 0xfe, 0xaa}, + {0x00, 0x59, 0xd3, 0xaa}, + {0x00, 0x5e, 0x04, 0xaa}, + {0x00, 0x61, 0xb8, 0xaa}, + {0x00, 0x62, 0x02, 0xaa}, + {0x00, 0xa7, 0x31, 0xaa}, + {0x00, 0xa9, 0x66, 0xaa}, + {0x00, 0xb0, 0x00, 0xaa}, + {0x00, 0xb1, 0x00, 0xaa}, + {0x00, 0xb3, 0x11, 0xaa}, + {0x00, 0xb6, 0x26, 0xaa}, + {0x00, 0xb7, 0x20, 0xaa}, + {0x00, 0xba, 0x04, 0xaa}, + {0x00, 0x88, 0x42, 0xaa}, + {0x00, 0x89, 0x9a, 0xaa}, + {0x00, 0x8a, 0x88, 0xaa}, + {0x00, 0x8b, 0x8e, 0xaa}, + {0x00, 0x8c, 0x3e, 0xaa}, + {0x00, 0x8d, 0x90, 0xaa}, + {0x00, 0x8e, 0x87, 0xaa}, + {0x00, 0x8f, 0x96, 0xaa}, + {0x00, 0x90, 0x3d, 0xaa}, + {0x00, 0x64, 0x00, 0xaa}, + {0x00, 0x65, 0x10, 0xaa}, + {0x00, 0x66, 0x20, 0xaa}, + {0x00, 0x67, 0x2b, 0xaa}, + {0x00, 0x68, 0x36, 0xaa}, + {0x00, 0x69, 0x49, 0xaa}, + {0x00, 0x6a, 0x5a, 0xaa}, + {0x00, 0x6b, 0x7f, 0xaa}, + {0x00, 0x6c, 0x9b, 0xaa}, + {0x00, 0x6d, 0xba, 0xaa}, + {0x00, 0x6e, 0xd4, 0xaa}, + {0x00, 0x6f, 0xea, 0xaa}, + {0x00, 0x70, 0x00, 0xaa}, + {0x00, 0x71, 0x10, 0xaa}, + {0x00, 0x72, 0x20, 0xaa}, + {0x00, 0x73, 0x2b, 0xaa}, + {0x00, 0x74, 0x36, 0xaa}, + {0x00, 0x75, 0x49, 0xaa}, + {0x00, 0x76, 0x5a, 0xaa}, + {0x00, 0x77, 0x7f, 0xaa}, + {0x00, 0x78, 0x9b, 0xaa}, + {0x00, 0x79, 0xba, 0xaa}, + {0x00, 0x7a, 0xd4, 0xaa}, + {0x00, 0x7b, 0xea, 0xaa}, + {0x00, 0x7c, 0x00, 0xaa}, + {0x00, 0x7d, 0x10, 0xaa}, + {0x00, 0x7e, 0x20, 0xaa}, + {0x00, 0x7f, 0x2b, 0xaa}, + {0x00, 0x80, 0x36, 0xaa}, + {0x00, 0x81, 0x49, 0xaa}, + {0x00, 0x82, 0x5a, 0xaa}, + {0x00, 0x83, 0x7f, 0xaa}, + {0x00, 0x84, 0x9b, 0xaa}, + {0x00, 0x85, 0xba, 0xaa}, + {0x00, 0x86, 0xd4, 0xaa}, + {0x00, 0x87, 0xea, 0xaa}, + {0x00, 0x57, 0x2a, 0xaa}, + {0x00, 0x03, 0x01, 0xaa}, + {0x00, 0x04, 0x10, 0xaa}, + {0x00, 0x05, 0x10, 0xaa}, + {0x00, 0x06, 0x10, 0xaa}, + {0x00, 0x07, 0x10, 0xaa}, + {0x00, 0x08, 0x13, 0xaa}, + {0x00, 0x0a, 0x00, 0xaa}, + {0x00, 0x0b, 0x10, 0xaa}, + {0x00, 0x0c, 0x20, 0xaa}, + {0x00, 0x0d, 0x18, 0xaa}, + {0x00, 0x22, 0x01, 0xaa}, + {0x00, 0x23, 0x60, 0xaa}, + {0x00, 0x25, 0x08, 0xaa}, + {0x00, 0x26, 0x82, 0xaa}, + {0x00, 0x2e, 0x0f, 0xaa}, + {0x00, 0x2f, 0x1e, 0xaa}, + {0x00, 0x30, 0x2d, 0xaa}, + {0x00, 0x31, 0x3c, 0xaa}, + {0x00, 0x32, 0x4b, 0xaa}, + {0x00, 0x33, 0x5a, 0xaa}, + {0x00, 0x34, 0x69, 0xaa}, + {0x00, 0x35, 0x78, 0xaa}, + {0x00, 0x36, 0x87, 0xaa}, + {0x00, 0x37, 0x96, 0xaa}, + {0x00, 0x38, 0xa5, 0xaa}, + {0x00, 0x39, 0xb4, 0xaa}, + {0x00, 0x3a, 0xc3, 0xaa}, + {0x00, 0x3b, 0xd2, 0xaa}, + {0x00, 0x3c, 0xe1, 0xaa}, + {0x00, 0x3e, 0xff, 0xaa}, + {0x00, 0x3f, 0xff, 0xaa}, + {0x00, 0x40, 0xff, 0xaa}, + {0x00, 0x41, 0xff, 0xaa}, + {0x00, 0x42, 0xff, 0xaa}, + {0x00, 0x43, 0xff, 0xaa}, + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0x20, 0xc4, 0xaa}, + {0x00, 0x13, 0x03, 0xaa}, + {0x00, 0x3c, 0x50, 0xaa}, + {0x00, 0x61, 0x6a, 0xaa}, + {0x00, 0x51, 0x5b, 0xaa}, + {0x00, 0x52, 0x91, 0xaa}, + {0x00, 0x53, 0x4c, 0xaa}, + {0x00, 0x54, 0x50, 0xaa}, + {0x00, 0x56, 0x02, 0xaa}, + {0xb6, 0x00, 0x00, 0xcc}, + {0xb6, 0x03, 0x03, 0xcc}, + {0xb6, 0x02, 0x20, 0xcc}, + {0xb6, 0x05, 0x02, 0xcc}, + {0xb6, 0x04, 0x58, 0xcc}, + {0xb6, 0x12, 0xf8, 0xcc}, + {0xb6, 0x13, 0x21, 0xcc}, + {0xb6, 0x18, 0x03, 0xcc}, + {0xb6, 0x17, 0xa9, 0xcc}, + {0xb6, 0x16, 0x80, 0xcc}, + {0xb6, 0x22, 0x12, 0xcc}, + {0xb6, 0x23, 0x0b, 0xcc}, + {0xbf, 0xc0, 0x39, 0xcc}, + {0xbf, 0xc1, 0x04, 0xcc}, + {0xbf, 0xcc, 0x00, 0xcc}, + {0xb8, 0x06, 0x20, 0xcc}, + {0xb8, 0x07, 0x03, 0xcc}, + {0xb8, 0x08, 0x58, 0xcc}, + {0xb8, 0x09, 0x02, 0xcc}, + {0xb3, 0x01, 0x41, 0xcc}, + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0xd9, 0x0f, 0xaa}, + {0x00, 0xda, 0xaa, 0xaa}, + {0x00, 0xd9, 0x10, 0xaa}, + {0x00, 0xda, 0xaa, 0xaa}, + {0x00, 0xd9, 0x11, 0xaa}, + {0x00, 0xda, 0x00, 0xaa}, + {0x00, 0xd9, 0x12, 0xaa}, + {0x00, 0xda, 0xff, 0xaa}, + {0x00, 0xd9, 0x13, 0xaa}, + {0x00, 0xda, 0xff, 0xaa}, + {0x00, 0xe8, 0x11, 0xaa}, + {0x00, 0xe9, 0x12, 0xaa}, + {0x00, 0xea, 0x5c, 0xaa}, + {0x00, 0xeb, 0xff, 0xaa}, + {0x00, 0xd8, 0x80, 0xaa}, + {0x00, 0xe6, 0x02, 0xaa}, + {0x00, 0xd6, 0x40, 0xaa}, + {0x00, 0xe3, 0x05, 0xaa}, + {0x00, 0xe0, 0x40, 0xaa}, + {0x00, 0xde, 0x03, 0xaa}, + {0x00, 0xdf, 0x03, 0xaa}, + {0x00, 0xdb, 0x02, 0xaa}, + {0x00, 0xdc, 0x00, 0xaa}, + {0x00, 0xdd, 0x03, 0xaa}, + {0x00, 0xe1, 0x08, 0xaa}, + {0x00, 0xe2, 0x01, 0xaa}, + {0x00, 0xd6, 0x40, 0xaa}, + {0x00, 0xe4, 0x40, 0xaa}, + {0x00, 0xa8, 0x9f, 0xaa}, + {0x00, 0xb4, 0x16, 0xaa}, + {0xb0, 0x02, 0x06, 0xcc}, + {0xb0, 0x18, 0x06, 0xcc}, + {0xb0, 0x19, 0x06, 0xcc}, + {0xb3, 0x5d, 0x18, 0xcc}, + {0xb3, 0x05, 0x00, 0xcc}, + {0xb3, 0x06, 0x00, 0xcc}, + {0x00, 0xb4, 0x0e, 0xaa}, + {0x00, 0xb5, 0x49, 0xaa}, + {0x00, 0xb6, 0x1c, 0xaa}, + {0x00, 0xb7, 0x96, 0xaa}, +/* end of usbvm326.inf - start of ms-win trace */ + {0xb6, 0x12, 0xf8, 0xcc}, + {0xb6, 0x13, 0x3d, 0xcc}, +/*read b306*/ + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0x1a, 0x09, 0xaa}, + {0x00, 0x1b, 0x8a, 0xaa}, +/*read b827*/ + {0xb8, 0x27, 0x00, 0xcc}, + {0xb8, 0x26, 0x60, 0xcc}, + {0xb8, 0x26, 0x60, 0xcc}, +/*gamma - to do?*/ + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0xae, 0x84, 0xaa}, +/*gamma again*/ + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0x96, 0xa0, 0xaa}, +/*matrix*/ + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0x91, 0x35, 0xaa}, + {0x00, 0x92, 0x22, 0xaa}, +/*gamma*/ + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0x95, 0x85, 0xaa}, +/*matrix*/ + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0x61, 0xb8, 0xaa}, + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0x4d, 0x20, 0xaa}, + {0xb8, 0x22, 0x40, 0xcc}, + {0xb8, 0x23, 0x40, 0xcc}, + {0xb8, 0x24, 0x40, 0xcc}, + {0xb8, 0x81, 0x09, 0xcc}, + {0x00, 0x00, 0x64, 0xdd}, + {0x00, 0x03, 0x01, 0xaa}, +/*read 46*/ + {0x00, 0x46, 0x3c, 0xaa}, + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0x16, 0x40, 0xaa}, + {0x00, 0x17, 0x40, 0xaa}, + {0x00, 0x18, 0x40, 0xaa}, + {0x00, 0x19, 0x41, 0xaa}, + {0x00, 0x03, 0x01, 0xaa}, + {0x00, 0x46, 0x3c, 0xaa}, + {0x00, 0x00, 0x18, 0xdd}, +/*read bfff*/ + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0x1e, 0x46, 0xaa}, + {0x00, 0xa8, 0x8f, 0xaa}, + {0x00, 0x03, 0x00, 0xaa}, + {0x00, 0xb4, 0x1c, 0xaa}, + {0x00, 0xb5, 0x92, 0xaa}, + {0x00, 0xb6, 0x39, 0xaa}, + {0x00, 0xb7, 0x24, 0xaa}, +/*write 89 0400 1415*/ +}; + struct sensor_info { int sensorId; __u8 I2cAdd; @@ -1486,6 +1770,7 @@ static const struct sensor_info sensor_info_data[] = { {SENSOR_MI1310_SOC, 0x80 | 0x5d, 0x00, 0x143a, 0x24, 0x25, 0x01}, /* (tested in vc032x_probe_sensor) */ /* {SENSOR_MI0360, 0x80 | 0x5d, 0x00, 0x8243, 0x24, 0x25, 0x01}, */ + {SENSOR_PO1200, 0x80 | 0x5c, 0x00, 0x1200, 0x67, 0x67, 0x01}, }; /* read 'len' bytes in gspca_dev->usb_buf */ @@ -1709,6 +1994,9 @@ static int sd_config(struct gspca_dev *gspca_dev, case SENSOR_OV7670: PDEBUG(D_PROBE, "Find Sensor OV7670"); break; + case SENSOR_PO1200: + PDEBUG(D_PROBE, "Find Sensor PO1200"); + break; case SENSOR_PO3130NC: PDEBUG(D_PROBE, "Find Sensor PO3130NC"); break; @@ -1719,8 +2007,13 @@ static int sd_config(struct gspca_dev *gspca_dev, cam->cam_mode = vc0321_mode; cam->nmodes = ARRAY_SIZE(vc0321_mode); } else { - cam->cam_mode = vc0323_mode; - cam->nmodes = ARRAY_SIZE(vc0323_mode); + if (sensor != SENSOR_PO1200) { + cam->cam_mode = vc0323_mode; + cam->nmodes = ARRAY_SIZE(vc0323_mode); + } else { + cam->cam_mode = svga_mode; + cam->nmodes = ARRAY_SIZE(svga_mode); + } } sd->qindex = 7; @@ -1888,6 +2181,11 @@ static int sd_start(struct gspca_dev *gspca_dev) } usb_exchange(gspca_dev, po3130_rundata); break; + case SENSOR_PO1200: + GammaT = po1200_gamma; + MatrixT = po1200_matrix; + usb_exchange(gspca_dev, po1200_initVGA_data); + break; default: PDEBUG(D_PROBE, "Damned !! no sensor found Bye"); return -EMEDIUMTYPE; @@ -1920,11 +2218,15 @@ static int sd_start(struct gspca_dev *gspca_dev) reg_w(gspca_dev->dev, 0xa0, 0x23, 0xb800); * ISP CTRL_BAS */ /* set the led on 0x0892 0x0896 */ - reg_w(gspca_dev->dev, 0x89, 0xffff, 0xfdff); - msleep(100); - setquality(gspca_dev); - sethvflip(gspca_dev); - setlightfreq(gspca_dev); + if (sd->sensor != SENSOR_PO1200) { + reg_w(gspca_dev->dev, 0x89, 0xffff, 0xfdff); + msleep(100); + setquality(gspca_dev); + sethvflip(gspca_dev); + setlightfreq(gspca_dev); + } else { + reg_w(gspca_dev->dev, 0x89, 0x0400, 0x1415); + } } return 0; } @@ -2071,6 +2373,7 @@ static const __devinitdata struct usb_device_id device_table[] = { {USB_DEVICE(0x0ac8, 0x0328), .driver_info = BRIDGE_VC0321}, {USB_DEVICE(0x0ac8, 0xc001), .driver_info = BRIDGE_VC0321}, {USB_DEVICE(0x0ac8, 0xc002), .driver_info = BRIDGE_VC0321}, + {USB_DEVICE(0x15b8, 0x6002), .driver_info = BRIDGE_VC0323}, {USB_DEVICE(0x17ef, 0x4802), .driver_info = BRIDGE_VC0323}, {} }; -- cgit v1.2.3-70-g09d2 From c665f4dd99a584036c2bd79a6baa25b06cae42f8 Mon Sep 17 00:00:00 2001 From: Devin Heitmueller Date: Tue, 16 Dec 2008 23:35:23 -0300 Subject: V4L/DVB (9922): em28xx: don't assume every eb1a:2820 reference design is a Prolink PlayTV USB2 Don't operate under the assumption that every device that uses the em2820 default USB ID is a Prolink PlayTV USB. We have an eeprom hash, so use that, since otherwise we cannot support other devices with the 2820 default USB ID (such as the ADS Tech Instant TV USB USBAV-704) Signed-off-by: Devin Heitmueller Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.em28xx | 4 ++-- drivers/media/video/em28xx/em28xx-cards.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index a6734eb7bf7..0c4c721daba 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx @@ -1,5 +1,5 @@ 0 -> Unknown EM2800 video grabber (em2800) [eb1a:2800] - 1 -> Unknown EM2750/28xx video grabber (em2820/em2840) [eb1a:2820,eb1a:2860,eb1a:2861,eb1a:2870,eb1a:2881,eb1a:2883] + 1 -> Unknown EM2750/28xx video grabber (em2820/em2840) [eb1a:2820,eb1a:2821,eb1a:2860,eb1a:2861,eb1a:2870,eb1a:2881,eb1a:2883] 2 -> Terratec Cinergy 250 USB (em2820/em2840) [0ccd:0036] 3 -> Pinnacle PCTV USB 2 (em2820/em2840) [2304:0208] 4 -> Hauppauge WinTV USB 2 (em2820/em2840) [2040:4200,2040:4201] @@ -12,7 +12,7 @@ 11 -> Terratec Hybrid XS (em2880) [0ccd:0042] 12 -> Kworld PVR TV 2800 RF (em2820/em2840) 13 -> Terratec Prodigy XS (em2880) [0ccd:0047] - 14 -> Pixelview Prolink PlayTV USB 2.0 (em2820/em2840) [eb1a:2821] + 14 -> Pixelview Prolink PlayTV USB 2.0 (em2820/em2840) 15 -> V-Gear PocketTV (em2800) 16 -> Hauppauge WinTV HVR 950 (em2883) [2040:6513,2040:6517,2040:651b] 17 -> Pinnacle PCTV HD Pro Stick (em2880) [2304:0227] diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index 4b78a03528f..e5d9424d03f 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -1246,7 +1246,7 @@ struct usb_device_id em28xx_id_table [] = { { USB_DEVICE(0xeb1a, 0x2820), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2821), - .driver_info = EM2820_BOARD_PROLINK_PLAYTV_USB2 }, + .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2860), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2861), -- cgit v1.2.3-70-g09d2 From a47ddf1425554ca0b1e9b16b20a9d631e5daaaa8 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 19 Dec 2008 10:20:22 -0300 Subject: V4L/DVB (9943): v4l2: document video_device. Add the missing video_device documentation to v4l2-framework.txt. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/v4l2-framework.txt | 160 ++++++++++++++++++++++++++- 1 file changed, 159 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt index 60eaf54e7ef..eeae76c22a9 100644 --- a/Documentation/video4linux/v4l2-framework.txt +++ b/Documentation/video4linux/v4l2-framework.txt @@ -86,6 +86,9 @@ to v4l2_dev. Registration will also set v4l2_dev->name to a value derived from dev (driver name followed by the bus_id, to be precise). You may change the name after registration if you want. +The first 'dev' argument is normally the struct device pointer of a pci_dev, +usb_device or platform_device. + You unregister with: v4l2_device_unregister(struct v4l2_device *v4l2_dev); @@ -359,4 +362,159 @@ Both functions return NULL if something went wrong. struct video_device ------------------- -Not yet documented. +The actual device nodes in the /dev directory are created using the +video_device struct (v4l2-dev.h). This struct can either be allocated +dynamically or embedded in a larger struct. + +To allocate it dynamically use: + + struct video_device *vdev = video_device_alloc(); + + if (vdev == NULL) + return -ENOMEM; + + vdev->release = video_device_release; + +If you embed it in a larger struct, then you must set the release() +callback to your own function: + + struct video_device *vdev = &my_vdev->vdev; + + vdev->release = my_vdev_release; + +The release callback must be set and it is called when the last user +of the video device exits. + +The default video_device_release() callback just calls kfree to free the +allocated memory. + +You should also set these fields: + +- parent: set to the parent device (same device as was used to register + v4l2_device). +- name: set to something descriptive and unique. +- fops: set to the file_operations struct. +- ioctl_ops: if you use the v4l2_ioctl_ops to simplify ioctl maintenance + (highly recommended to use this and it might become compulsory in the + future!), then set this to your v4l2_ioctl_ops struct. + +If you use v4l2_ioctl_ops, then you should set .unlocked_ioctl to +__video_ioctl2 or .ioctl to video_ioctl2 in your file_operations struct. + + +video_device registration +------------------------- + +Next you register the video device: this will create the character device +for you. + + err = video_register_device(vdev, VFL_TYPE_GRABBER, -1); + if (err) { + video_device_release(vdev); // or kfree(my_vdev); + return err; + } + +Which device is registered depends on the type argument. The following +types exist: + +VFL_TYPE_GRABBER: videoX for video input/output devices +VFL_TYPE_VBI: vbiX for vertical blank data (i.e. closed captions, teletext) +VFL_TYPE_RADIO: radioX for radio tuners +VFL_TYPE_VTX: vtxX for teletext devices (deprecated, don't use) + +The last argument gives you a certain amount of control over the device +kernel number used (i.e. the X in videoX). Normally you will pass -1 to +let the v4l2 framework pick the first free number. But if a driver creates +many devices, then it can be useful to have different video devices in +separate ranges. For example, video capture devices start at 0, video +output devices start at 16. + +So you can use the last argument to specify a minimum kernel number and +the v4l2 framework will try to pick the first free number that is equal +or higher to what you passed. If that fails, then it will just pick the +first free number. + +Whenever a device node is created some attributes are also created for you. +If you look in /sys/class/video4linux you see the devices. Go into e.g. +video0 and you will see 'name' and 'index' attributes. The 'name' attribute +is the 'name' field of the video_device struct. The 'index' attribute is +a device node index that can be assigned by the driver, or that is calculated +for you. + +If you call video_register_device(), then the index is just increased by +1 for each device node you register. The first video device node you register +always starts off with 0. + +Alternatively you can call video_register_device_index() which is identical +to video_register_device(), but with an extra index argument. Here you can +pass a specific index value (between 0 and 31) that should be used. + +Users can setup udev rules that utilize the index attribute to make fancy +device names (e.g. 'mpegX' for MPEG video capture device nodes). + +After the device was successfully registered, then you can use these fields: + +- vfl_type: the device type passed to video_register_device. +- minor: the assigned device minor number. +- num: the device kernel number (i.e. the X in videoX). +- index: the device index number (calculated or set explicitly using + video_register_device_index). + +If the registration failed, then you need to call video_device_release() +to free the allocated video_device struct, or free your own struct if the +video_device was embedded in it. The vdev->release() callback will never +be called if the registration failed, nor should you ever attempt to +unregister the device if the registration failed. + + +video_device cleanup +-------------------- + +When the video device nodes have to be removed, either during the unload +of the driver or because the USB device was disconnected, then you should +unregister them: + + video_unregister_device(vdev); + +This will remove the device nodes from sysfs (causing udev to remove them +from /dev). + +After video_unregister_device() returns no new opens can be done. + +However, in the case of USB devices some application might still have one +of these device nodes open. You should block all new accesses to read, +write, poll, etc. except possibly for certain ioctl operations like +queueing buffers. + +When the last user of the video device node exits, then the vdev->release() +callback is called and you can do the final cleanup there. + + +video_device helper functions +----------------------------- + +There are a few useful helper functions: + +You can set/get driver private data in the video_device struct using: + +void *video_get_drvdata(struct video_device *dev); +void video_set_drvdata(struct video_device *dev, void *data); + +Note that you can safely call video_set_drvdata() before calling +video_register_device(). + +And this function: + +struct video_device *video_devdata(struct file *file); + +returns the video_device belonging to the file struct. + +The final helper function combines video_get_drvdata with +video_devdata: + +void *video_drvdata(struct file *file); + +You can go from a video_device struct to the v4l2_device struct using: + +struct v4l2_device *v4l2_dev = dev_get_drvdata(vdev->parent); + -- cgit v1.2.3-70-g09d2 From ca8959bb07f27514f811200b4f71669b1783dc54 Mon Sep 17 00:00:00 2001 From: Jean-Francois Moine Date: Mon, 15 Dec 2008 04:12:57 -0300 Subject: V4L/DVB (9984): gspca - pac7311: Webcam 093a:262c added. Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 1 + drivers/media/video/gspca/pac7311.c | 1 + 2 files changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 81d7d891c15..a5545945648 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -221,6 +221,7 @@ pac7311 093a:2622 Genius Eye 312 pac7311 093a:2624 PAC7302 pac7311 093a:2626 Labtec 2200 pac7311 093a:262a Webcam 300k +pac7311 093a:262c Philips SPC 230 NC zc3xx 0ac8:0302 Z-star Vimicro zc0302 vc032x 0ac8:0321 Vimicro generic vc0321 vc032x 0ac8:0323 Vimicro Vc0323 diff --git a/drivers/media/video/gspca/pac7311.c b/drivers/media/video/gspca/pac7311.c index 80c5975c8fe..80af367bd35 100644 --- a/drivers/media/video/gspca/pac7311.c +++ b/drivers/media/video/gspca/pac7311.c @@ -1069,6 +1069,7 @@ static __devinitdata struct usb_device_id device_table[] = { {USB_DEVICE(0x093a, 0x2624), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x2626), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x262a), .driver_info = SENSOR_PAC7302}, + {USB_DEVICE(0x093a, 0x262c), .driver_info = SENSOR_PAC7302}, {} }; MODULE_DEVICE_TABLE(usb, device_table); -- cgit v1.2.3-70-g09d2 From 87945895bf14b0b4dacbcef6dc08f284177affc8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 20 Dec 2008 14:30:58 -0300 Subject: V4L/DVB (9992): gspca - pac207: Webcam 093a:2461 added. Signed-off-by: Hans de Goede Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 3 ++- drivers/media/video/gspca/pac207.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index a5545945648..ec00740e6c4 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -202,7 +202,8 @@ sunplus 08ca:2050 Medion MD 41437 sunplus 08ca:2060 Aiptek PocketDV5300 tv8532 0923:010f ICM532 cams mars 093a:050f Mars-Semi Pc-Camera -pac207 093a:2460 PAC207 Qtec Webcam 100 +pac207 093a:2460 Qtec Webcam 100 +pac207 093a:2461 HP Webcam pac207 093a:2463 Philips SPC 220 NC pac207 093a:2464 Labtec Webcam 1200 pac207 093a:2468 PAC207 diff --git a/drivers/media/video/gspca/pac207.c b/drivers/media/video/gspca/pac207.c index 84bcaaf4d32..e46b8e8f0a7 100644 --- a/drivers/media/video/gspca/pac207.c +++ b/drivers/media/video/gspca/pac207.c @@ -529,6 +529,7 @@ static const struct sd_desc sd_desc = { static const __devinitdata struct usb_device_id device_table[] = { {USB_DEVICE(0x041e, 0x4028)}, {USB_DEVICE(0x093a, 0x2460)}, + {USB_DEVICE(0x093a, 0x2461)}, {USB_DEVICE(0x093a, 0x2463)}, {USB_DEVICE(0x093a, 0x2464)}, {USB_DEVICE(0x093a, 0x2468)}, -- cgit v1.2.3-70-g09d2 From 88a40cfbf25d82758237250a04d9bed51266215c Mon Sep 17 00:00:00 2001 From: Fabio Rossi Date: Fri, 26 Dec 2008 14:41:48 -0300 Subject: V4L/DVB (9999): gspca - zc3xx: Webcam 046d:089d added. Signed-off-by: Fabio Rossi Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 1 + drivers/media/video/gspca/zc3xx.c | 1 + 2 files changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index ec00740e6c4..f34155f33a2 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -53,6 +53,7 @@ zc3xx 0461:0a00 MicroInnovation WebCam320 spca500 046d:0890 Logitech QuickCam traveler vc032x 046d:0892 Logitech Orbicam vc032x 046d:0896 Logitech Orbicam +zc3xx 046d:089d Logitech QuickCam E2500 zc3xx 046d:08a0 Logitech QC IM zc3xx 046d:08a1 Logitech QC IM 0x08A1 +sound zc3xx 046d:08a2 Labtec Webcam Pro diff --git a/drivers/media/video/gspca/zc3xx.c b/drivers/media/video/gspca/zc3xx.c index a62c8ac7b4a..8ded9e6e8a7 100644 --- a/drivers/media/video/gspca/zc3xx.c +++ b/drivers/media/video/gspca/zc3xx.c @@ -7533,6 +7533,7 @@ static const __devinitdata struct usb_device_id device_table[] = { {USB_DEVICE(0x0458, 0x700c)}, {USB_DEVICE(0x0458, 0x700f)}, {USB_DEVICE(0x0461, 0x0a00)}, + {USB_DEVICE(0x046d, 0x089d), .driver_info = SENSOR_MC501CB}, {USB_DEVICE(0x046d, 0x08a0)}, {USB_DEVICE(0x046d, 0x08a1)}, {USB_DEVICE(0x046d, 0x08a2)}, -- cgit v1.2.3-70-g09d2 From 71d50f30724c901c3d8cc7a19bdb3c33e1ee5463 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 27 Dec 2008 03:43:53 -0300 Subject: V4L/DVB (10044): gspca - pac7311: Webcam 093a:2620 added. Signed-off-by: Hans de Goede Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 1 + drivers/media/video/gspca/pac7311.c | 1 + 2 files changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index f34155f33a2..5daf2c80167 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -218,6 +218,7 @@ pac7311 093a:2603 PAC7312 pac7311 093a:2608 Trust WB-3300p pac7311 093a:260e Gigaware VGA PC Camera, Trust WB-3350p, SIGMA cam 2350 pac7311 093a:260f SnakeCam +pac7311 093a:2620 Apollo AC-905 pac7311 093a:2621 PAC731x pac7311 093a:2622 Genius Eye 312 pac7311 093a:2624 PAC7302 diff --git a/drivers/media/video/gspca/pac7311.c b/drivers/media/video/gspca/pac7311.c index 80af367bd35..1b72e15cb3a 100644 --- a/drivers/media/video/gspca/pac7311.c +++ b/drivers/media/video/gspca/pac7311.c @@ -1064,6 +1064,7 @@ static __devinitdata struct usb_device_id device_table[] = { {USB_DEVICE(0x093a, 0x2608), .driver_info = SENSOR_PAC7311}, {USB_DEVICE(0x093a, 0x260e), .driver_info = SENSOR_PAC7311}, {USB_DEVICE(0x093a, 0x260f), .driver_info = SENSOR_PAC7311}, + {USB_DEVICE(0x093a, 0x2620), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x2621), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x2622), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x2624), .driver_info = SENSOR_PAC7302}, -- cgit v1.2.3-70-g09d2 From 4c98834addfee3fdd42c505c37569261bf669d94 Mon Sep 17 00:00:00 2001 From: Erik Andren Date: Mon, 29 Dec 2008 07:35:23 -0300 Subject: V4L/DVB (10048): gspca - stv06xx: New subdriver. Signed-off-by: Erik Andren Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 3 + drivers/media/video/gspca/Kconfig | 1 + drivers/media/video/gspca/Makefile | 2 +- drivers/media/video/gspca/stv06xx/Kconfig | 9 + drivers/media/video/gspca/stv06xx/Makefile | 6 + drivers/media/video/gspca/stv06xx/stv06xx.c | 522 ++++++++++++++++++++ drivers/media/video/gspca/stv06xx/stv06xx.h | 107 +++++ drivers/media/video/gspca/stv06xx/stv06xx_hdcs.c | 533 +++++++++++++++++++++ drivers/media/video/gspca/stv06xx/stv06xx_hdcs.h | 263 ++++++++++ drivers/media/video/gspca/stv06xx/stv06xx_pb0100.c | 430 +++++++++++++++++ drivers/media/video/gspca/stv06xx/stv06xx_pb0100.h | 275 +++++++++++ drivers/media/video/gspca/stv06xx/stv06xx_sensor.h | 92 ++++ drivers/media/video/gspca/stv06xx/stv06xx_vv6410.c | 251 ++++++++++ drivers/media/video/gspca/stv06xx/stv06xx_vv6410.h | 315 ++++++++++++ 14 files changed, 2808 insertions(+), 1 deletion(-) create mode 100644 drivers/media/video/gspca/stv06xx/Kconfig create mode 100644 drivers/media/video/gspca/stv06xx/Makefile create mode 100644 drivers/media/video/gspca/stv06xx/stv06xx.c create mode 100644 drivers/media/video/gspca/stv06xx/stv06xx.h create mode 100644 drivers/media/video/gspca/stv06xx/stv06xx_hdcs.c create mode 100644 drivers/media/video/gspca/stv06xx/stv06xx_hdcs.h create mode 100644 drivers/media/video/gspca/stv06xx/stv06xx_pb0100.c create mode 100644 drivers/media/video/gspca/stv06xx/stv06xx_pb0100.h create mode 100644 drivers/media/video/gspca/stv06xx/stv06xx_sensor.h create mode 100644 drivers/media/video/gspca/stv06xx/stv06xx_vv6410.c create mode 100644 drivers/media/video/gspca/stv06xx/stv06xx_vv6410.h (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 5daf2c80167..f54281d78c1 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -50,6 +50,9 @@ ov519 045e:028c Micro$oft xbox cam spca508 0461:0815 Micro Innovation IC200 sunplus 0461:0821 Fujifilm MV-1 zc3xx 0461:0a00 MicroInnovation WebCam320 +stv06xx 046d:0840 QuickCam Express +stv06xx 046d:0850 LEGO cam / QuickCam Web +stv06xx 046d:0870 Dexxa WebCam USB spca500 046d:0890 Logitech QuickCam traveler vc032x 046d:0892 Logitech Orbicam vc032x 046d:0896 Logitech Orbicam diff --git a/drivers/media/video/gspca/Kconfig b/drivers/media/video/gspca/Kconfig index 770fb699d04..ee6a691dff2 100644 --- a/drivers/media/video/gspca/Kconfig +++ b/drivers/media/video/gspca/Kconfig @@ -18,6 +18,7 @@ menuconfig USB_GSPCA if USB_GSPCA && VIDEO_V4L2 source "drivers/media/video/gspca/m5602/Kconfig" +source "drivers/media/video/gspca/stv06xx/Kconfig" config USB_GSPCA_CONEX tristate "Conexant Camera Driver" diff --git a/drivers/media/video/gspca/Makefile b/drivers/media/video/gspca/Makefile index 6c8046e232c..bd8d9ee4050 100644 --- a/drivers/media/video/gspca/Makefile +++ b/drivers/media/video/gspca/Makefile @@ -47,4 +47,4 @@ gspca_vc032x-objs := vc032x.o gspca_zc3xx-objs := zc3xx.o obj-$(CONFIG_USB_M5602) += m5602/ - +obj-$(CONFIG_USB_STV06XX) += stv06xx/ diff --git a/drivers/media/video/gspca/stv06xx/Kconfig b/drivers/media/video/gspca/stv06xx/Kconfig new file mode 100644 index 00000000000..634ad38d9fb --- /dev/null +++ b/drivers/media/video/gspca/stv06xx/Kconfig @@ -0,0 +1,9 @@ +config USB_STV06XX + tristate "STV06XX USB Camera Driver" + depends on USB_GSPCA + help + Say Y here if you want support for cameras based on + the ST STV06XX chip. + + To compile this driver as a module, choose M here: the + module will be called gspca_stv06xx. diff --git a/drivers/media/video/gspca/stv06xx/Makefile b/drivers/media/video/gspca/stv06xx/Makefile new file mode 100644 index 00000000000..8f002b6233b --- /dev/null +++ b/drivers/media/video/gspca/stv06xx/Makefile @@ -0,0 +1,6 @@ +obj-$(CONFIG_USB_STV06XX) += gspca_stv06xx.o + +gspca_stv06xx-objs := stv06xx.o \ + stv06xx_vv6410.o \ + stv06xx_hdcs.o \ + stv06xx_pb0100.o diff --git a/drivers/media/video/gspca/stv06xx/stv06xx.c b/drivers/media/video/gspca/stv06xx/stv06xx.c new file mode 100644 index 00000000000..29e43718bfd --- /dev/null +++ b/drivers/media/video/gspca/stv06xx/stv06xx.c @@ -0,0 +1,522 @@ +/* + * Copyright (c) 2001 Jean-Fredric Clere, Nikolas Zimmermann, Georg Acher + * Mark Cave-Ayland, Carlo E Prelz, Dick Streefland + * Copyright (c) 2002, 2003 Tuukka Toivonen + * Copyright (c) 2008 Erik AndrĂ©n + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * P/N 861037: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0010: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0020: Sensor Photobit PB100 ASIC STV0600-1 - QuickCam Express + * P/N 861055: Sensor ST VV6410 ASIC STV0610 - LEGO cam + * P/N 861075-0040: Sensor HDCS1000 ASIC + * P/N 961179-0700: Sensor ST VV6410 ASIC STV0602 - Dexxa WebCam USB + * P/N 861040-0000: Sensor ST VV6410 ASIC STV0610 - QuickCam Web + */ + +#include "stv06xx_sensor.h" + +MODULE_AUTHOR("Erik AndrĂ©n"); +MODULE_DESCRIPTION("STV06XX USB Camera Driver"); +MODULE_LICENSE("GPL"); + +int dump_bridge; +int dump_sensor; + +int stv06xx_write_bridge(struct sd *sd, u16 address, u16 i2c_data) +{ + int err; + struct usb_device *udev = sd->gspca_dev.dev; + __u8 *buf = sd->gspca_dev.usb_buf; + u8 len = (i2c_data > 0xff) ? 2 : 1; + + buf[0] = i2c_data & 0xff; + buf[1] = (i2c_data >> 8) & 0xff; + + err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), + 0x04, 0x40, address, 0, buf, len, + STV06XX_URB_MSG_TIMEOUT); + + + PDEBUG(D_CONF, "Written 0x%x to address 0x%x, status: %d", + i2c_data, address, err); + + return (err < 0) ? err : 0; +} + +int stv06xx_read_bridge(struct sd *sd, u16 address, u8 *i2c_data) +{ + int err; + struct usb_device *udev = sd->gspca_dev.dev; + __u8 *buf = sd->gspca_dev.usb_buf; + + err = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), + 0x04, 0xc0, address, 0, buf, 1, + STV06XX_URB_MSG_TIMEOUT); + + *i2c_data = buf[0]; + + PDEBUG(D_CONF, "Read 0x%x from address 0x%x, status %d", + *i2c_data, address, err); + + return (err < 0) ? err : 0; +} + +/* Wraps the normal write sensor bytes / words functions for writing a + single value */ +int stv06xx_write_sensor(struct sd *sd, u8 address, u16 value) +{ + if (sd->sensor->i2c_len == 2) { + u16 data[2] = { address, value }; + return stv06xx_write_sensor_words(sd, data, 1); + } else { + u8 data[2] = { address, value }; + return stv06xx_write_sensor_bytes(sd, data, 1); + } +} + +static int stv06xx_write_sensor_finish(struct sd *sd) +{ + int err = 0; + + if (IS_850(sd)) { + struct usb_device *udev = sd->gspca_dev.dev; + __u8 *buf = sd->gspca_dev.usb_buf; + + /* Quickam Web needs an extra packet */ + buf[0] = 0; + err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), + 0x04, 0x40, 0x1704, 0, buf, 1, + STV06XX_URB_MSG_TIMEOUT); + } + + return (err < 0) ? err : 0; +} + +int stv06xx_write_sensor_bytes(struct sd *sd, const u8 *data, u8 len) +{ + int err, i, j; + struct usb_device *udev = sd->gspca_dev.dev; + __u8 *buf = sd->gspca_dev.usb_buf; + + PDEBUG(D_USBO, "I2C: Command buffer contains %d entries", len); + for (i = 0; i < len;) { + /* Build the command buffer */ + memset(buf, 0, I2C_BUFFER_LENGTH); + for (j = 0; j < I2C_MAX_BYTES && i < len; j++, i++) { + buf[j] = data[2*i]; + buf[0x10 + j] = data[2*i+1]; + PDEBUG(D_USBO, "I2C: Writing 0x%02x to reg 0x%02x", + data[2*i+1], data[2*i]); + } + buf[0x20] = sd->sensor->i2c_addr; + buf[0x21] = j - 1; /* Number of commands to send - 1 */ + buf[0x22] = I2C_WRITE_CMD; + err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), + 0x04, 0x40, 0x0400, 0, buf, + I2C_BUFFER_LENGTH, + STV06XX_URB_MSG_TIMEOUT); + if (err < 0) + return err; + } + return stv06xx_write_sensor_finish(sd); +} + +int stv06xx_write_sensor_words(struct sd *sd, const u16 *data, u8 len) +{ + int err, i, j; + struct usb_device *udev = sd->gspca_dev.dev; + __u8 *buf = sd->gspca_dev.usb_buf; + + PDEBUG(D_USBO, "I2C: Command buffer contains %d entries", len); + + for (i = 0; i < len;) { + /* Build the command buffer */ + memset(buf, 0, I2C_BUFFER_LENGTH); + for (j = 0; j < I2C_MAX_WORDS && i < len; j++, i++) { + buf[j] = data[2*i]; + buf[0x10 + j * 2] = data[2*i+1]; + buf[0x10 + j * 2 + 1] = data[2*i+1] >> 8; + PDEBUG(D_USBO, "I2C: Writing 0x%04x to reg 0x%02x", + data[2*i+1], data[2*i]); + } + buf[0x20] = sd->sensor->i2c_addr; + buf[0x21] = j - 1; /* Number of commands to send - 1 */ + buf[0x22] = I2C_WRITE_CMD; + err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), + 0x04, 0x40, 0x0400, 0, buf, + I2C_BUFFER_LENGTH, + STV06XX_URB_MSG_TIMEOUT); + if (err < 0) + return err; + } + return stv06xx_write_sensor_finish(sd); +} + +int stv06xx_read_sensor(struct sd *sd, const u8 address, u16 *value) +{ + int err; + struct usb_device *udev = sd->gspca_dev.dev; + __u8 *buf = sd->gspca_dev.usb_buf; + + err = stv06xx_write_bridge(sd, STV_I2C_FLUSH, sd->sensor->i2c_flush); + if (err < 0) + return err; + + /* Clear mem */ + memset(buf, 0, I2C_BUFFER_LENGTH); + + buf[0] = address; + buf[0x20] = sd->sensor->i2c_addr; + buf[0x21] = 0; + + /* Read I2C register */ + buf[0x22] = I2C_READ_CMD; + + err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), + 0x04, 0x40, 0x1400, 0, buf, I2C_BUFFER_LENGTH, + STV06XX_URB_MSG_TIMEOUT); + if (err < 0) { + PDEBUG(D_ERR, "I2C Read: error writing address: %d", err); + return err; + } + + err = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), + 0x04, 0xc0, 0x1410, 0, buf, sd->sensor->i2c_len, + STV06XX_URB_MSG_TIMEOUT); + if (sd->sensor->i2c_len == 2) + *value = buf[0] | (buf[1] << 8); + else + *value = buf[0]; + + PDEBUG(D_USBO, "I2C: Read 0x%x from address 0x%x, status: %d", + *value, address, err); + + return (err < 0) ? err : 0; +} + +/* Dumps all bridge registers */ +static void stv06xx_dump_bridge(struct sd *sd) +{ + int i; + u8 data, buf; + + info("Dumping all stv06xx bridge registers"); + for (i = 0x1400; i < 0x160f; i++) { + stv06xx_read_bridge(sd, i, &data); + + info("Read 0x%x from address 0x%x", data, i); + } + + for (i = 0x1400; i < 0x160f; i++) { + stv06xx_read_bridge(sd, i, &data); + buf = data; + + stv06xx_write_bridge(sd, i, 0xff); + stv06xx_read_bridge(sd, i, &data); + if (data == 0xff) + info("Register 0x%x is read/write", i); + else if (data != buf) + info("Register 0x%x is read/write," + "but only partially", i); + else + info("Register 0x%x is read-only", i); + + stv06xx_write_bridge(sd, i, buf); + } +} + +/* this function is called at probe and resume time */ +static int stv06xx_init(struct gspca_dev *gspca_dev) +{ + struct sd *sd = (struct sd *) gspca_dev; + int err; + + PDEBUG(D_PROBE, "Initializing camera"); + + /* Let the usb init settle for a bit + before performing the initialization */ + msleep(250); + + err = sd->sensor->init(sd); + + if (dump_sensor) + sd->sensor->dump(sd); + + return (err < 0) ? err : 0; +} + +/* Start the camera */ +static int stv06xx_start(struct gspca_dev *gspca_dev) +{ + struct sd *sd = (struct sd *) gspca_dev; + int err; + + /* Prepare the sensor for start */ + err = sd->sensor->start(sd); + if (err < 0) + goto out; + + /* Start isochronous streaming */ + err = stv06xx_write_bridge(sd, STV_ISO_ENABLE, 1); + +out: + if (err < 0) + PDEBUG(D_STREAM, "Starting stream failed"); + else + PDEBUG(D_STREAM, "Started streaming"); + + return (err < 0) ? err : 0; +} + +static void stv06xx_stopN(struct gspca_dev *gspca_dev) +{ + int err; + struct sd *sd = (struct sd *) gspca_dev; + + /* stop ISO-streaming */ + err = stv06xx_write_bridge(sd, STV_ISO_ENABLE, 0); + if (err < 0) + goto out; + + err = sd->sensor->stop(sd); + if (err < 0) + goto out; + +out: + if (err < 0) + PDEBUG(D_STREAM, "Failed to stop stream"); + else + PDEBUG(D_STREAM, "Stopped streaming"); +} + +/* + * Analyse an USB packet of the data stream and store it appropriately. + * Each packet contains an integral number of chunks. Each chunk has + * 2-bytes identification, followed by 2-bytes that describe the chunk + * length. Known/guessed chunk identifications are: + * 8001/8005/C001/C005 - Begin new frame + * 8002/8006/C002/C006 - End frame + * 0200/4200 - Contains actual image data, bayer or compressed + * 0005 - 11 bytes of unknown data + * 0100 - 2 bytes of unknown data + * The 0005 and 0100 chunks seem to appear only in compressed stream. + */ +static void stv06xx_pkt_scan(struct gspca_dev *gspca_dev, + struct gspca_frame *frame, /* target */ + __u8 *data, /* isoc packet */ + int len) /* iso packet length */ +{ + PDEBUG(D_PACK, "Packet of length %d arrived", len); + + /* A packet may contain several frames + loop until the whole packet is reached */ + while (len) { + int id, chunk_len; + + if (len < 4) { + PDEBUG(D_PACK, "Packet is smaller than 4 bytes"); + return; + } + + /* Capture the id */ + id = (data[0] << 8) | data[1]; + + /* Capture the chunk length */ + chunk_len = (data[2] << 8) | data[3]; + PDEBUG(D_PACK, "Chunk id: %x, length: %d", id, chunk_len); + + data += 4; + len -= 4; + + if (len < chunk_len) { + PDEBUG(D_ERR, "URB packet length is smaller" + " than the specified chunk length"); + return; + } + + switch (id) { + case 0x0200: + case 0x4200: + PDEBUG(D_PACK, "Frame data packet detected"); + + gspca_frame_add(gspca_dev, INTER_PACKET, frame, + data, chunk_len); + break; + + case 0x8001: + case 0x8005: + case 0xc001: + case 0xc005: + PDEBUG(D_PACK, "Starting new frame"); + + /* Create a new frame, chunk length should be zero */ + gspca_frame_add(gspca_dev, FIRST_PACKET, + frame, data, 0); + + if (chunk_len) + PDEBUG(D_ERR, "Chunk length is " + "non-zero on a SOF"); + break; + + case 0x8002: + case 0x8006: + case 0xc002: + PDEBUG(D_PACK, "End of frame detected"); + + /* Complete the last frame (if any) */ + gspca_frame_add(gspca_dev, LAST_PACKET, frame, data, 0); + + if (chunk_len) + PDEBUG(D_ERR, "Chunk length is " + "non-zero on a EOF"); + break; + + case 0x0005: + PDEBUG(D_PACK, "Chunk 0x005 detected"); + /* Unknown chunk with 11 bytes of data, + occurs just before end of each frame + in compressed mode */ + break; + + case 0x0100: + PDEBUG(D_PACK, "Chunk 0x0100 detected"); + /* Unknown chunk with 2 bytes of data, + occurs 2-3 times per USB interrupt */ + break; + default: + PDEBUG(D_PACK, "Unknown chunk %d detected", id); + /* Unknown chunk */ + } + data += chunk_len; + len -= chunk_len; + } +} + +static int stv06xx_config(struct gspca_dev *gspca_dev, + const struct usb_device_id *id); + +/* sub-driver description */ +static const struct sd_desc sd_desc = { + .name = MODULE_NAME, + .config = stv06xx_config, + .init = stv06xx_init, + .start = stv06xx_start, + .stopN = stv06xx_stopN, + .pkt_scan = stv06xx_pkt_scan +}; + +/* This function is called at probe time */ +static int stv06xx_config(struct gspca_dev *gspca_dev, + const struct usb_device_id *id) +{ + struct sd *sd = (struct sd *) gspca_dev; + struct cam *cam; + + PDEBUG(D_PROBE, "Configuring camera"); + + cam = &gspca_dev->cam; + cam->epaddr = STV_ISOC_ENDPOINT_ADDR; + sd->desc = sd_desc; + gspca_dev->sd_desc = &sd->desc; + + if (dump_bridge) + stv06xx_dump_bridge(sd); + + sd->sensor = &stv06xx_sensor_vv6410; + if (!sd->sensor->probe(sd)) + return 0; + + sd->sensor = &stv06xx_sensor_hdcs1x00; + if (!sd->sensor->probe(sd)) + return 0; + + sd->sensor = &stv06xx_sensor_hdcs1020; + if (!sd->sensor->probe(sd)) + return 0; + + sd->sensor = &stv06xx_sensor_pb0100; + if (!sd->sensor->probe(sd)) + return 0; + + sd->sensor = NULL; + return -ENODEV; +} + + + +/* -- module initialisation -- */ +static const __devinitdata struct usb_device_id device_table[] = { + {USB_DEVICE(0x046d, 0x0840)}, /* QuickCam Express */ + {USB_DEVICE(0x046d, 0x0850)}, /* LEGO cam / QuickCam Web */ + {USB_DEVICE(0x046d, 0x0870)}, /* Dexxa WebCam USB */ + {} +}; +MODULE_DEVICE_TABLE(usb, device_table); + +/* -- device connect -- */ +static int sd_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + PDEBUG(D_PROBE, "Probing for a stv06xx device"); + return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), + THIS_MODULE); +} + +void sd_disconnect(struct usb_interface *intf) +{ + struct gspca_dev *gspca_dev = usb_get_intfdata(intf); + struct sd *sd = (struct sd *) gspca_dev; + PDEBUG(D_PROBE, "Disconnecting the stv06xx device"); + + if (sd->sensor->disconnect) + sd->sensor->disconnect(sd); + gspca_disconnect(intf); +} + +static struct usb_driver sd_driver = { + .name = MODULE_NAME, + .id_table = device_table, + .probe = sd_probe, + .disconnect = sd_disconnect, +#ifdef CONFIG_PM + .suspend = gspca_suspend, + .resume = gspca_resume, +#endif +}; + +/* -- module insert / remove -- */ +static int __init sd_mod_init(void) +{ + if (usb_register(&sd_driver) < 0) + return -1; + PDEBUG(D_PROBE, "registered"); + return 0; +} +static void __exit sd_mod_exit(void) +{ + usb_deregister(&sd_driver); + PDEBUG(D_PROBE, "deregistered"); +} + +module_init(sd_mod_init); +module_exit(sd_mod_exit); + +module_param(dump_bridge, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(dump_bridge, "Dumps all usb bridge registers at startup"); + +module_param(dump_sensor, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(dump_sensor, "Dumps all sensor registers at startup"); diff --git a/drivers/media/video/gspca/stv06xx/stv06xx.h b/drivers/media/video/gspca/stv06xx/stv06xx.h new file mode 100644 index 00000000000..1207e7d17f1 --- /dev/null +++ b/drivers/media/video/gspca/stv06xx/stv06xx.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2001 Jean-Fredric Clere, Nikolas Zimmermann, Georg Acher + * Mark Cave-Ayland, Carlo E Prelz, Dick Streefland + * Copyright (c) 2002, 2003 Tuukka Toivonen + * Copyright (c) 2008 Erik AndrĂ©n + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * P/N 861037: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0010: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0020: Sensor Photobit PB100 ASIC STV0600-1 - QuickCam Express + * P/N 861055: Sensor ST VV6410 ASIC STV0610 - LEGO cam + * P/N 861075-0040: Sensor HDCS1000 ASIC + * P/N 961179-0700: Sensor ST VV6410 ASIC STV0602 - Dexxa WebCam USB + * P/N 861040-0000: Sensor ST VV6410 ASIC STV0610 - QuickCam Web + */ + +#ifndef STV06XX_H_ +#define STV06XX_H_ + +#include "gspca.h" + +#define MODULE_NAME "STV06xx" + +#define STV_ISOC_ENDPOINT_ADDR 0x81 + +#ifndef V4L2_PIX_FMT_SGRBG8 +#define V4L2_PIX_FMT_SGRBG8 v4l2_fourcc('G', 'R', 'B', 'G') +#endif + +#define STV_REG23 0x0423 + +/* Control registers of the STV0600 ASIC */ +#define STV_I2C_PARTNER 0x1420 +#define STV_I2C_VAL_REG_VAL_PAIRS_MIN1 0x1421 +#define STV_I2C_READ_WRITE_TOGGLE 0x1422 +#define STV_I2C_FLUSH 0x1423 +#define STV_I2C_SUCC_READ_REG_VALS 0x1424 + +#define STV_ISO_ENABLE 0x1440 +#define STV_SCAN_RATE 0x1443 +#define STV_LED_CTRL 0x1445 +#define STV_STV0600_EMULATION 0x1446 +#define STV_REG00 0x1500 +#define STV_REG01 0x1501 +#define STV_REG02 0x1502 +#define STV_REG03 0x1503 +#define STV_REG04 0x1504 + +#define STV_ISO_SIZE_L 0x15c1 +#define STV_ISO_SIZE_H 0x15c2 + +/* Refers to the CIF 352x288 and QCIF 176x144 */ +/* 1: 288 lines, 2: 144 lines */ +#define STV_Y_CTRL 0x15c3 + +/* 0xa: 352 columns, 0x6: 176 columns */ +#define STV_X_CTRL 0x1680 + +#define STV06XX_URB_MSG_TIMEOUT 5000 + +#define I2C_MAX_BYTES 16 +#define I2C_MAX_WORDS 8 + +#define I2C_BUFFER_LENGTH 0x23 +#define I2C_READ_CMD 3 +#define I2C_WRITE_CMD 1 + +#define LED_ON 1 +#define LED_OFF 0 + +/* STV06xx device descriptor */ +struct sd { + struct gspca_dev gspca_dev; + + /* A pointer to the currently connected sensor */ + const struct stv06xx_sensor *sensor; + + /* A pointer to the sd_desc struct */ + struct sd_desc desc; + + /* Sensor private data */ + void *sensor_priv; +}; + +int stv06xx_write_bridge(struct sd *sd, u16 address, u16 i2c_data); +int stv06xx_read_bridge(struct sd *sd, u16 address, u8 *i2c_data); + +int stv06xx_write_sensor_bytes(struct sd *sd, const u8 *data, u8 len); +int stv06xx_write_sensor_words(struct sd *sd, const u16 *data, u8 len); + +int stv06xx_read_sensor(struct sd *sd, const u8 address, u16 *value); +int stv06xx_write_sensor(struct sd *sd, u8 address, u16 value); + +#endif diff --git a/drivers/media/video/gspca/stv06xx/stv06xx_hdcs.c b/drivers/media/video/gspca/stv06xx/stv06xx_hdcs.c new file mode 100644 index 00000000000..1cfe5850455 --- /dev/null +++ b/drivers/media/video/gspca/stv06xx/stv06xx_hdcs.c @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2001 Jean-Fredric Clere, Nikolas Zimmermann, Georg Acher + * Mark Cave-Ayland, Carlo E Prelz, Dick Streefland + * Copyright (c) 2002, 2003 Tuukka Toivonen + * Copyright (c) 2008 Erik AndrĂ©n + * Copyright (c) 2008 Chia-I Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * P/N 861037: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0010: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0020: Sensor Photobit PB100 ASIC STV0600-1 - QuickCam Express + * P/N 861055: Sensor ST VV6410 ASIC STV0610 - LEGO cam + * P/N 861075-0040: Sensor HDCS1000 ASIC + * P/N 961179-0700: Sensor ST VV6410 ASIC STV0602 - Dexxa WebCam USB + * P/N 861040-0000: Sensor ST VV6410 ASIC STV0610 - QuickCam Web + */ + +#include "stv06xx_hdcs.h" + +enum hdcs_power_state { + HDCS_STATE_SLEEP, + HDCS_STATE_IDLE, + HDCS_STATE_RUN +}; + +/* no lock? */ +struct hdcs { + enum hdcs_power_state state; + int w, h; + + /* visible area of the sensor array */ + struct { + int left, top; + int width, height; + int border; + } array; + + struct { + /* Column timing overhead */ + u8 cto; + /* Column processing overhead */ + u8 cpo; + /* Row sample period constant */ + u16 rs; + /* Exposure reset duration */ + u16 er; + } exp; + + int psmp; +}; + +static int hdcs_reg_write_seq(struct sd *sd, u8 reg, u8 *vals, u8 len) +{ + u8 regs[I2C_MAX_BYTES * 2]; + int i; + + if (unlikely((len <= 0) || (len >= I2C_MAX_BYTES) || + (reg + len > 0xff))) + return -EINVAL; + + for (i = 0; i < len; i++, reg++) { + regs[2*i] = reg; + regs[2*i+1] = vals[i]; + } + + return stv06xx_write_sensor_bytes(sd, regs, len); +} + +static int hdcs_set_state(struct sd *sd, enum hdcs_power_state state) +{ + struct hdcs *hdcs = sd->sensor_priv; + u8 val; + int ret; + + if (hdcs->state == state) + return 0; + + /* we need to go idle before running or sleeping */ + if (hdcs->state != HDCS_STATE_IDLE) { + ret = stv06xx_write_sensor(sd, HDCS_REG_CONTROL(sd), 0); + if (ret) + return ret; + } + + hdcs->state = HDCS_STATE_IDLE; + + if (state == HDCS_STATE_IDLE) + return 0; + + switch (state) { + case HDCS_STATE_SLEEP: + val = HDCS_SLEEP_MODE; + break; + + case HDCS_STATE_RUN: + val = HDCS_RUN_ENABLE; + break; + + default: + return -EINVAL; + } + + ret = stv06xx_write_sensor(sd, HDCS_REG_CONTROL(sd), val); + if (ret < 0) + hdcs->state = state; + + return ret; +} + +static int hdcs_reset(struct sd *sd) +{ + struct hdcs *hdcs = sd->sensor_priv; + int err; + + err = stv06xx_write_sensor(sd, HDCS_REG_CONTROL(sd), 1); + if (err < 0) + return err; + + err = stv06xx_write_sensor(sd, HDCS_REG_CONTROL(sd), 0); + if (err < 0) + hdcs->state = HDCS_STATE_IDLE; + + return err; +} + +static int hdcs_get_exposure(struct gspca_dev *gspca_dev, __s32 *val) +{ + struct sd *sd = (struct sd *) gspca_dev; + struct hdcs *hdcs = sd->sensor_priv; + + /* Column time period */ + int ct; + /* Column processing period */ + int cp; + /* Row processing period */ + int rp; + int cycles; + int err; + int rowexp; + u16 data[2]; + + err = stv06xx_read_sensor(sd, HDCS_ROWEXPL, &data[0]); + if (err < 0) + return err; + + err = stv06xx_read_sensor(sd, HDCS_ROWEXPH, &data[1]); + if (err < 0) + return err; + + rowexp = (data[1] << 8) | data[0]; + + ct = hdcs->exp.cto + hdcs->psmp + (HDCS_ADC_START_SIG_DUR + 2); + cp = hdcs->exp.cto + (hdcs->w * ct / 2); + rp = hdcs->exp.rs + cp; + + cycles = rp * rowexp; + *val = cycles / HDCS_CLK_FREQ_MHZ; + PDEBUG(D_V4L2, "Read exposure %d", *val); + return 0; +} + +static int hdcs_set_exposure(struct gspca_dev *gspca_dev, __s32 val) +{ + struct sd *sd = (struct sd *) gspca_dev; + struct hdcs *hdcs = sd->sensor_priv; + int rowexp, srowexp; + int max_srowexp; + /* Column time period */ + int ct; + /* Column processing period */ + int cp; + /* Row processing period */ + int rp; + /* Minimum number of column timing periods + within the column processing period */ + int mnct; + int cycles, err; + u8 exp[4]; + + cycles = val * HDCS_CLK_FREQ_MHZ; + + ct = hdcs->exp.cto + hdcs->psmp + (HDCS_ADC_START_SIG_DUR + 2); + cp = hdcs->exp.cto + (hdcs->w * ct / 2); + + /* the cycles one row takes */ + rp = hdcs->exp.rs + cp; + + rowexp = cycles / rp; + + /* the remaining cycles */ + cycles -= rowexp * rp; + + /* calculate sub-row exposure */ + if (IS_1020(sd)) { + /* see HDCS-1020 datasheet 3.5.6.4, p. 63 */ + srowexp = hdcs->w - (cycles + hdcs->exp.er + 13) / ct; + + mnct = (hdcs->exp.er + 12 + ct - 1) / ct; + max_srowexp = hdcs->w - mnct; + } else { + /* see HDCS-1000 datasheet 3.4.5.5, p. 61 */ + srowexp = cp - hdcs->exp.er - 6 - cycles; + + mnct = (hdcs->exp.er + 5 + ct - 1) / ct; + max_srowexp = cp - mnct * ct - 1; + } + + if (srowexp < 0) + srowexp = 0; + else if (srowexp > max_srowexp) + srowexp = max_srowexp; + + if (IS_1020(sd)) { + exp[0] = rowexp & 0xff; + exp[1] = rowexp >> 8; + exp[2] = (srowexp >> 2) & 0xff; + /* this clears exposure error flag */ + exp[3] = 0x1; + err = hdcs_reg_write_seq(sd, HDCS_ROWEXPL, exp, 4); + } else { + exp[0] = rowexp & 0xff; + exp[1] = rowexp >> 8; + exp[2] = srowexp & 0xff; + exp[3] = srowexp >> 8; + err = hdcs_reg_write_seq(sd, HDCS_ROWEXPL, exp, 4); + if (err < 0) + return err; + + /* clear exposure error flag */ + err = stv06xx_write_sensor(sd, + HDCS_STATUS, BIT(4)); + } + PDEBUG(D_V4L2, "Writing exposure %d, rowexp %d, srowexp %d", + val, rowexp, srowexp); + return err; +} + +static int hdcs_set_gains(struct sd *sd, u8 r, u8 g, u8 b) +{ + u8 gains[4]; + + /* the voltage gain Av = (1 + 19 * val / 127) * (1 + bit7) */ + if (r > 127) + r = 0x80 | (r / 2); + if (g > 127) + g = 0x80 | (g / 2); + if (b > 127) + b = 0x80 | (b / 2); + + gains[0] = g; + gains[1] = r; + gains[2] = b; + gains[3] = g; + + return hdcs_reg_write_seq(sd, HDCS_ERECPGA, gains, 4); +} + +static int hdcs_get_gain(struct gspca_dev *gspca_dev, __s32 *val) +{ + struct sd *sd = (struct sd *) gspca_dev; + int err; + u16 data; + + err = stv06xx_read_sensor(sd, HDCS_ERECPGA, &data); + + /* Bit 7 doubles the gain */ + if (data & 0x80) + *val = (data & 0x7f) * 2; + else + *val = data; + + PDEBUG(D_V4L2, "Read gain %d", *val); + return err; +} + +static int hdcs_set_gain(struct gspca_dev *gspca_dev, __s32 val) +{ + PDEBUG(D_V4L2, "Writing gain %d", val); + return hdcs_set_gains((struct sd *) gspca_dev, + val & 0xff, val & 0xff, val & 0xff); +} + +static int hdcs_set_size(struct sd *sd, + unsigned int width, unsigned int height) +{ + struct hdcs *hdcs = sd->sensor_priv; + u8 win[4]; + unsigned int x, y; + int err; + + /* must be multiple of 4 */ + width = (width + 3) & ~0x3; + height = (height + 3) & ~0x3; + + if (width > hdcs->array.width) + width = hdcs->array.width; + + if (IS_1020(sd)) { + /* the borders are also invalid */ + if (height + 2 * hdcs->array.border + HDCS_1020_BOTTOM_Y_SKIP + > hdcs->array.height) + height = hdcs->array.height - 2 * hdcs->array.border - + HDCS_1020_BOTTOM_Y_SKIP; + + y = (hdcs->array.height - HDCS_1020_BOTTOM_Y_SKIP - height) / 2 + + hdcs->array.top; + } else if (height > hdcs->array.height) { + height = hdcs->array.height; + y = hdcs->array.top + (hdcs->array.height - height) / 2; + } + + x = hdcs->array.left + (hdcs->array.width - width) / 2; + + win[0] = y / 4; + win[1] = x / 4; + win[2] = (y + height) / 4 - 1; + win[3] = (x + width) / 4 - 1; + + err = hdcs_reg_write_seq(sd, HDCS_FWROW, win, 4); + if (err < 0) + return err; + + /* Update the current width and height */ + hdcs->w = width; + hdcs->h = height; + return err; +} + +static int hdcs_probe_1x00(struct sd *sd) +{ + struct hdcs *hdcs; + u16 sensor; + int ret; + + ret = stv06xx_read_sensor(sd, HDCS_IDENT, &sensor); + if (ret < 0 || sensor != 0x08) + return -ENODEV; + + info("HDCS-1000/1100 sensor detected"); + + sd->gspca_dev.cam.cam_mode = stv06xx_sensor_hdcs1x00.modes; + sd->gspca_dev.cam.nmodes = stv06xx_sensor_hdcs1x00.nmodes; + sd->desc.ctrls = stv06xx_sensor_hdcs1x00.ctrls; + sd->desc.nctrls = stv06xx_sensor_hdcs1x00.nctrls; + + hdcs = kmalloc(sizeof(struct hdcs), GFP_KERNEL); + if (!hdcs) + return -ENOMEM; + + hdcs->array.left = 8; + hdcs->array.top = 8; + hdcs->array.width = HDCS_1X00_DEF_WIDTH; + hdcs->array.height = HDCS_1X00_DEF_HEIGHT; + hdcs->array.border = 4; + + hdcs->exp.cto = 4; + hdcs->exp.cpo = 2; + hdcs->exp.rs = 186; + hdcs->exp.er = 100; + + /* + * Frame rate on HDCS-1000 0x46D:0x840 depends on PSMP: + * 4 = doesn't work at all + * 5 = 7.8 fps, + * 6 = 6.9 fps, + * 8 = 6.3 fps, + * 10 = 5.5 fps, + * 15 = 4.4 fps, + * 31 = 2.8 fps + * + * Frame rate on HDCS-1000 0x46D:0x870 depends on PSMP: + * 15 = doesn't work at all + * 18 = doesn't work at all + * 19 = 7.3 fps + * 20 = 7.4 fps + * 21 = 7.4 fps + * 22 = 7.4 fps + * 24 = 6.3 fps + * 30 = 5.4 fps + */ + hdcs->psmp = IS_870(sd) ? 20 : 5; + + sd->sensor_priv = hdcs; + + return 0; +} + +static int hdcs_probe_1020(struct sd *sd) +{ + struct hdcs *hdcs; + u16 sensor; + int ret; + + ret = stv06xx_read_sensor(sd, HDCS_IDENT, &sensor); + if (ret < 0 || sensor != 0x10) + return -ENODEV; + + info("HDCS-1020 sensor detected"); + + sd->gspca_dev.cam.cam_mode = stv06xx_sensor_hdcs1020.modes; + sd->gspca_dev.cam.nmodes = stv06xx_sensor_hdcs1020.nmodes; + sd->desc.ctrls = stv06xx_sensor_hdcs1020.ctrls; + sd->desc.nctrls = stv06xx_sensor_hdcs1020.nctrls; + + hdcs = kmalloc(sizeof(struct hdcs), GFP_KERNEL); + if (!hdcs) + return -ENOMEM; + + /* + * From Andrey's test image: looks like HDCS-1020 upper-left + * visible pixel is at 24,8 (y maybe even smaller?) and lower-right + * visible pixel at 375,299 (x maybe even larger?) + */ + hdcs->array.left = 24; + hdcs->array.top = 4; + hdcs->array.width = HDCS_1020_DEF_WIDTH; + hdcs->array.height = 304; + hdcs->array.border = 4; + + hdcs->psmp = 6; + + hdcs->exp.cto = 3; + hdcs->exp.cpo = 3; + hdcs->exp.rs = 155; + hdcs->exp.er = 96; + + sd->sensor_priv = hdcs; + + return 0; +} + +static int hdcs_start(struct sd *sd) +{ + PDEBUG(D_STREAM, "Starting stream"); + + return hdcs_set_state(sd, HDCS_STATE_RUN); +} + +static int hdcs_stop(struct sd *sd) +{ + PDEBUG(D_STREAM, "Halting stream"); + + return hdcs_set_state(sd, HDCS_STATE_SLEEP); +} + +static void hdcs_disconnect(struct sd *sd) +{ + PDEBUG(D_PROBE, "Disconnecting the sensor"); + kfree(sd->sensor_priv); +} + +static int hdcs_init(struct sd *sd) +{ + struct hdcs *hdcs = sd->sensor_priv; + int i, err = 0; + + /* Set the STV0602AA in STV0600 emulation mode */ + if (IS_870(sd)) + stv06xx_write_bridge(sd, STV_STV0600_EMULATION, 1); + + /* Execute the bridge init */ + for (i = 0; i < ARRAY_SIZE(stv_bridge_init) && !err; i++) { + err = stv06xx_write_bridge(sd, stv_bridge_init[i][0], + stv_bridge_init[i][1]); + } + if (err < 0) + return err; + + /* sensor soft reset */ + hdcs_reset(sd); + + /* Execute the sensor init */ + for (i = 0; i < ARRAY_SIZE(stv_sensor_init) && !err; i++) { + err = stv06xx_write_sensor(sd, stv_sensor_init[i][0], + stv_sensor_init[i][1]); + } + if (err < 0) + return err; + + /* Enable continous frame capture, bit 2: stop when frame complete */ + err = stv06xx_write_sensor(sd, HDCS_REG_CONFIG(sd), BIT(3)); + if (err < 0) + return err; + + /* Set PGA sample duration + (was 0x7E for IS_870, but caused slow framerate with HDCS-1020) */ + if (IS_1020(sd)) + err = stv06xx_write_sensor(sd, HDCS_TCTRL, + (HDCS_ADC_START_SIG_DUR << 6) | hdcs->psmp); + else + err = stv06xx_write_sensor(sd, HDCS_TCTRL, + (HDCS_ADC_START_SIG_DUR << 5) | hdcs->psmp); + if (err < 0) + return err; + + err = hdcs_set_gains(sd, HDCS_DEFAULT_GAIN, HDCS_DEFAULT_GAIN, + HDCS_DEFAULT_GAIN); + if (err < 0) + return err; + + err = hdcs_set_exposure(&sd->gspca_dev, HDCS_DEFAULT_EXPOSURE); + if (err < 0) + return err; + + err = hdcs_set_size(sd, hdcs->array.width, hdcs->array.height); + return err; +} + +static int hdcs_dump(struct sd *sd) +{ + u16 reg, val; + + info("Dumping sensor registers:"); + + for (reg = HDCS_IDENT; reg <= HDCS_ROWEXPH; reg++) { + stv06xx_read_sensor(sd, reg, &val); + info("reg 0x%02x = 0x%02x", reg, val); + } + return 0; +} diff --git a/drivers/media/video/gspca/stv06xx/stv06xx_hdcs.h b/drivers/media/video/gspca/stv06xx/stv06xx_hdcs.h new file mode 100644 index 00000000000..9c7279a4cd8 --- /dev/null +++ b/drivers/media/video/gspca/stv06xx/stv06xx_hdcs.h @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2001 Jean-Fredric Clere, Nikolas Zimmermann, Georg Acher + * Mark Cave-Ayland, Carlo E Prelz, Dick Streefland + * Copyright (c) 2002, 2003 Tuukka Toivonen + * Copyright (c) 2008 Erik AndrĂ©n + * Copyright (c) 2008 Chia-I Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * P/N 861037: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0010: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0020: Sensor Photobit PB100 ASIC STV0600-1 - QuickCam Express + * P/N 861055: Sensor ST VV6410 ASIC STV0610 - LEGO cam + * P/N 861075-0040: Sensor HDCS1000 ASIC + * P/N 961179-0700: Sensor ST VV6410 ASIC STV0602 - Dexxa WebCam USB + * P/N 861040-0000: Sensor ST VV6410 ASIC STV0610 - QuickCam Web + */ + +#ifndef STV06XX_HDCS_H_ +#define STV06XX_HDCS_H_ + +#include "stv06xx_sensor.h" + +#define HDCS_REG_CONFIG(sd) (IS_1020(sd) ? HDCS20_CONFIG : HDCS00_CONFIG) +#define HDCS_REG_CONTROL(sd) (IS_1020(sd) ? HDCS20_CONTROL : HDCS00_CONTROL) + +#define HDCS_1X00_DEF_WIDTH 360 +#define HDCS_1X00_DEF_HEIGHT 296 + +#define HDCS_1020_DEF_WIDTH 352 +#define HDCS_1020_DEF_HEIGHT 292 + +#define HDCS_1020_BOTTOM_Y_SKIP 4 + +#define HDCS_CLK_FREQ_MHZ 25 + +#define HDCS_ADC_START_SIG_DUR 3 + +/* LSB bit of I2C or register address signifies write (0) or read (1) */ +/* I2C Registers common for both HDCS-1000/1100 and HDCS-1020 */ +/* Identifications Register */ +#define HDCS_IDENT (0x00 << 1) +/* Status Register */ +#define HDCS_STATUS (0x01 << 1) +/* Interrupt Mask Register */ +#define HDCS_IMASK (0x02 << 1) +/* Pad Control Register */ +#define HDCS_PCTRL (0x03 << 1) +/* Pad Drive Control Register */ +#define HDCS_PDRV (0x04 << 1) +/* Interface Control Register */ +#define HDCS_ICTRL (0x05 << 1) +/* Interface Timing Register */ +#define HDCS_ITMG (0x06 << 1) +/* Baud Fraction Register */ +#define HDCS_BFRAC (0x07 << 1) +/* Baud Rate Register */ +#define HDCS_BRATE (0x08 << 1) +/* ADC Control Register */ +#define HDCS_ADCCTRL (0x09 << 1) +/* First Window Row Register */ +#define HDCS_FWROW (0x0a << 1) +/* First Window Column Register */ +#define HDCS_FWCOL (0x0b << 1) +/* Last Window Row Register */ +#define HDCS_LWROW (0x0c << 1) +/* Last Window Column Register */ +#define HDCS_LWCOL (0x0d << 1) +/* Timing Control Register */ +#define HDCS_TCTRL (0x0e << 1) +/* PGA Gain Register: Even Row, Even Column */ +#define HDCS_ERECPGA (0x0f << 1) +/* PGA Gain Register: Even Row, Odd Column */ +#define HDCS_EROCPGA (0x10 << 1) +/* PGA Gain Register: Odd Row, Even Column */ +#define HDCS_ORECPGA (0x11 << 1) +/* PGA Gain Register: Odd Row, Odd Column */ +#define HDCS_OROCPGA (0x12 << 1) +/* Row Exposure Low Register */ +#define HDCS_ROWEXPL (0x13 << 1) +/* Row Exposure High Register */ +#define HDCS_ROWEXPH (0x14 << 1) + +/* I2C Registers only for HDCS-1000/1100 */ +/* Sub-Row Exposure Low Register */ +#define HDCS00_SROWEXPL (0x15 << 1) +/* Sub-Row Exposure High Register */ +#define HDCS00_SROWEXPH (0x16 << 1) +/* Configuration Register */ +#define HDCS00_CONFIG (0x17 << 1) +/* Control Register */ +#define HDCS00_CONTROL (0x18 << 1) + +/* I2C Registers only for HDCS-1020 */ +/* Sub-Row Exposure Register */ +#define HDCS20_SROWEXP (0x15 << 1) +/* Error Control Register */ +#define HDCS20_ERROR (0x16 << 1) +/* Interface Timing 2 Register */ +#define HDCS20_ITMG2 (0x17 << 1) +/* Interface Control 2 Register */ +#define HDCS20_ICTRL2 (0x18 << 1) +/* Horizontal Blank Register */ +#define HDCS20_HBLANK (0x19 << 1) +/* Vertical Blank Register */ +#define HDCS20_VBLANK (0x1a << 1) +/* Configuration Register */ +#define HDCS20_CONFIG (0x1b << 1) +/* Control Register */ +#define HDCS20_CONTROL (0x1c << 1) + +#define HDCS_RUN_ENABLE (1 << 2) +#define HDCS_SLEEP_MODE (1 << 1) + +#define HDCS_DEFAULT_EXPOSURE 5000 +#define HDCS_DEFAULT_GAIN 128 + +static int hdcs_probe_1x00(struct sd *sd); +static int hdcs_probe_1020(struct sd *sd); +static int hdcs_start(struct sd *sd); +static int hdcs_init(struct sd *sd); +static int hdcs_stop(struct sd *sd); +static int hdcs_dump(struct sd *sd); +static void hdcs_disconnect(struct sd *sd); + +static int hdcs_get_exposure(struct gspca_dev *gspca_dev, __s32 *val); +static int hdcs_set_exposure(struct gspca_dev *gspca_dev, __s32 val); +static int hdcs_set_gain(struct gspca_dev *gspca_dev, __s32 val); +static int hdcs_get_gain(struct gspca_dev *gspca_dev, __s32 *val); + +const struct stv06xx_sensor stv06xx_sensor_hdcs1x00 = { + .name = "HP HDCS-1000/1100", + .i2c_flush = 0, + .i2c_addr = (0x55 << 1), + .i2c_len = 1, + + .init = hdcs_init, + .probe = hdcs_probe_1x00, + .start = hdcs_start, + .stop = hdcs_stop, + .disconnect = hdcs_disconnect, + .dump = hdcs_dump, + + .nctrls = 2, + .ctrls = { + { + { + .id = V4L2_CID_EXPOSURE, + .type = V4L2_CTRL_TYPE_INTEGER, + .name = "exposure", + .minimum = 0x00, + .maximum = 0xffff, + .step = 0x1, + .default_value = HDCS_DEFAULT_EXPOSURE, + .flags = V4L2_CTRL_FLAG_SLIDER + }, + .set = hdcs_set_exposure, + .get = hdcs_get_exposure + }, + { + { + .id = V4L2_CID_GAIN, + .type = V4L2_CTRL_TYPE_INTEGER, + .name = "gain", + .minimum = 0x00, + .maximum = 0xff, + .step = 0x1, + .default_value = HDCS_DEFAULT_GAIN, + .flags = V4L2_CTRL_FLAG_SLIDER + }, + .set = hdcs_set_gain, + .get = hdcs_get_gain + } + }, + + .nmodes = 1, + .modes = { + { + HDCS_1X00_DEF_WIDTH, + HDCS_1X00_DEF_HEIGHT, + V4L2_PIX_FMT_SBGGR8, + V4L2_FIELD_NONE, + .sizeimage = + HDCS_1X00_DEF_WIDTH * HDCS_1X00_DEF_HEIGHT, + .bytesperline = HDCS_1X00_DEF_WIDTH, + .colorspace = V4L2_COLORSPACE_SRGB, + .priv = 1 + } + } +}; + +const struct stv06xx_sensor stv06xx_sensor_hdcs1020 = { + .name = "HDCS-1020", + .i2c_flush = 0, + .i2c_addr = (0x55 << 1), + .i2c_len = 1, + + .nctrls = 0, + .ctrls = {}, + + .init = hdcs_init, + .probe = hdcs_probe_1020, + .start = hdcs_start, + .stop = hdcs_stop, + .dump = hdcs_dump, + + .nmodes = 1, + .modes = { + { + HDCS_1020_DEF_WIDTH, + HDCS_1020_DEF_HEIGHT, + V4L2_PIX_FMT_SBGGR8, + V4L2_FIELD_NONE, + .sizeimage = + HDCS_1020_DEF_WIDTH * HDCS_1020_DEF_HEIGHT, + .bytesperline = HDCS_1020_DEF_WIDTH, + .colorspace = V4L2_COLORSPACE_SRGB, + .priv = 1 + } + } +}; + +static const u16 stv_bridge_init[][2] = { + {STV_ISO_ENABLE, 0}, + {STV_REG23, 0}, + {STV_REG00, 0x1d}, + {STV_REG01, 0xb5}, + {STV_REG02, 0xa8}, + {STV_REG03, 0x95}, + {STV_REG04, 0x07}, + + {STV_SCAN_RATE, 0x20}, + {STV_ISO_SIZE_L, 847}, + {STV_Y_CTRL, 0x01}, + {STV_X_CTRL, 0x0a} +}; + +static const u8 stv_sensor_init[][2] = { + /* Clear status (writing 1 will clear the corresponding status bit) */ + {HDCS_STATUS, BIT(6) | BIT(5) | BIT(4) | BIT(3) | BIT(2) | BIT(1)}, + /* Disable all interrupts */ + {HDCS_IMASK, 0x00}, + {HDCS_PCTRL, BIT(6) | BIT(5) | BIT(1) | BIT(0)}, + {HDCS_PDRV, 0x00}, + {HDCS_ICTRL, BIT(5)}, + {HDCS_ITMG, BIT(4) | BIT(1)}, + /* ADC output resolution to 10 bits */ + {HDCS_ADCCTRL, 10} +}; + +#endif diff --git a/drivers/media/video/gspca/stv06xx/stv06xx_pb0100.c b/drivers/media/video/gspca/stv06xx/stv06xx_pb0100.c new file mode 100644 index 00000000000..d0a0f859645 --- /dev/null +++ b/drivers/media/video/gspca/stv06xx/stv06xx_pb0100.c @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2001 Jean-Fredric Clere, Nikolas Zimmermann, Georg Acher + * Mark Cave-Ayland, Carlo E Prelz, Dick Streefland + * Copyright (c) 2002, 2003 Tuukka Toivonen + * Copyright (c) 2008 Erik AndrĂ©n + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * P/N 861037: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0010: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0020: Sensor Photobit PB100 ASIC STV0600-1 - QuickCam Express + * P/N 861055: Sensor ST VV6410 ASIC STV0610 - LEGO cam + * P/N 861075-0040: Sensor HDCS1000 ASIC + * P/N 961179-0700: Sensor ST VV6410 ASIC STV0602 - Dexxa WebCam USB + * P/N 861040-0000: Sensor ST VV6410 ASIC STV0610 - QuickCam Web + */ + +/* + * The spec file for the PB-0100 suggests the following for best quality + * images after the sensor has been reset : + * + * PB_ADCGAINL = R60 = 0x03 (3 dec) : sets low reference of ADC + to produce good black level + * PB_PREADCTRL = R32 = 0x1400 (5120 dec) : Enables global gain changes + through R53 + * PB_ADCMINGAIN = R52 = 0x10 (16 dec) : Sets the minimum gain for + auto-exposure + * PB_ADCGLOBALGAIN = R53 = 0x10 (16 dec) : Sets the global gain + * PB_EXPGAIN = R14 = 0x11 (17 dec) : Sets the auto-exposure value + * PB_UPDATEINT = R23 = 0x02 (2 dec) : Sets the speed on + auto-exposure routine + * PB_CFILLIN = R5 = 0x0E (14 dec) : Sets the frame rate + */ + +#include "stv06xx_pb0100.h" + +static int pb0100_probe(struct sd *sd) +{ + u16 sensor; + int i, err; + s32 *sensor_settings; + + err = stv06xx_read_sensor(sd, PB_IDENT, &sensor); + + if (err < 0) + return -ENODEV; + + if ((sensor >> 8) == 0x64) { + sensor_settings = kmalloc( + stv06xx_sensor_pb0100.nctrls * sizeof(s32), + GFP_KERNEL); + if (!sensor_settings) + return -ENOMEM; + + info("Photobit pb0100 sensor detected"); + + sd->gspca_dev.cam.cam_mode = stv06xx_sensor_pb0100.modes; + sd->gspca_dev.cam.nmodes = stv06xx_sensor_pb0100.nmodes; + sd->desc.ctrls = stv06xx_sensor_pb0100.ctrls; + sd->desc.nctrls = stv06xx_sensor_pb0100.nctrls; + for (i = 0; i < stv06xx_sensor_pb0100.nctrls; i++) + sensor_settings[i] = stv06xx_sensor_pb0100. + ctrls[i].qctrl.default_value; + sd->sensor_priv = sensor_settings; + + return 0; + } + + return -ENODEV; +} + +static int pb0100_start(struct sd *sd) +{ + int err; + struct cam *cam = &sd->gspca_dev.cam; + s32 *sensor_settings = sd->sensor_priv; + u32 mode = cam->cam_mode[sd->gspca_dev.curr_mode].priv; + + /* Setup sensor window */ + if (mode & PB0100_CROP_TO_VGA) { + stv06xx_write_sensor(sd, PB_RSTART, 30); + stv06xx_write_sensor(sd, PB_CSTART, 20); + stv06xx_write_sensor(sd, PB_RWSIZE, 240 - 1); + stv06xx_write_sensor(sd, PB_CWSIZE, 320 - 1); + } else { + stv06xx_write_sensor(sd, PB_RSTART, 8); + stv06xx_write_sensor(sd, PB_CSTART, 4); + stv06xx_write_sensor(sd, PB_RWSIZE, 288 - 1); + stv06xx_write_sensor(sd, PB_CWSIZE, 352 - 1); + } + + if (mode & PB0100_SUBSAMPLE) { + stv06xx_write_bridge(sd, STV_Y_CTRL, 0x02); /* Wrong, FIXME */ + stv06xx_write_bridge(sd, STV_X_CTRL, 0x06); + + stv06xx_write_bridge(sd, STV_SCAN_RATE, 0x10); + } else { + stv06xx_write_bridge(sd, STV_Y_CTRL, 0x01); + stv06xx_write_bridge(sd, STV_X_CTRL, 0x0a); + /* larger -> slower */ + stv06xx_write_bridge(sd, STV_SCAN_RATE, 0x20); + } + + /* set_gain also sets red and blue balance */ + pb0100_set_gain(&sd->gspca_dev, sensor_settings[GAIN_IDX]); + pb0100_set_exposure(&sd->gspca_dev, sensor_settings[EXPOSURE_IDX]); + pb0100_set_autogain_target(&sd->gspca_dev, + sensor_settings[AUTOGAIN_TARGET_IDX]); + pb0100_set_autogain(&sd->gspca_dev, sensor_settings[AUTOGAIN_IDX]); + + err = stv06xx_write_sensor(sd, PB_CONTROL, BIT(5)|BIT(3)|BIT(1)); + PDEBUG(D_STREAM, "Started stream, status: %d", err); + + return (err < 0) ? err : 0; +} + +static int pb0100_stop(struct sd *sd) +{ + int err; + + err = stv06xx_write_sensor(sd, PB_ABORTFRAME, 1); + + if (err < 0) + goto out; + + /* Set bit 1 to zero */ + err = stv06xx_write_sensor(sd, PB_CONTROL, BIT(5)|BIT(3)); + + PDEBUG(D_STREAM, "Halting stream"); +out: + return (err < 0) ? err : 0; +} + +/* FIXME: Sort the init commands out and put them into tables, + this is only for getting the camera to work */ +/* FIXME: No error handling for now, + add this once the init has been converted to proper tables */ +static int pb0100_init(struct sd *sd) +{ + stv06xx_write_bridge(sd, STV_REG00, 1); + stv06xx_write_bridge(sd, STV_SCAN_RATE, 0); + + /* Reset sensor */ + stv06xx_write_sensor(sd, PB_RESET, 1); + stv06xx_write_sensor(sd, PB_RESET, 0); + + /* Disable chip */ + stv06xx_write_sensor(sd, PB_CONTROL, BIT(5)|BIT(3)); + + /* Gain stuff...*/ + stv06xx_write_sensor(sd, PB_PREADCTRL, BIT(12)|BIT(10)|BIT(6)); + stv06xx_write_sensor(sd, PB_ADCGLOBALGAIN, 12); + + /* Set up auto-exposure */ + /* ADC VREF_HI new setting for a transition + from the Expose1 to the Expose2 setting */ + stv06xx_write_sensor(sd, PB_R28, 12); + /* gain max for autoexposure */ + stv06xx_write_sensor(sd, PB_ADCMAXGAIN, 180); + /* gain min for autoexposure */ + stv06xx_write_sensor(sd, PB_ADCMINGAIN, 12); + /* Maximum frame integration time (programmed into R8) + allowed for auto-exposure routine */ + stv06xx_write_sensor(sd, PB_R54, 3); + /* Minimum frame integration time (programmed into R8) + allowed for auto-exposure routine */ + stv06xx_write_sensor(sd, PB_R55, 0); + stv06xx_write_sensor(sd, PB_UPDATEINT, 1); + /* R15 Expose0 (maximum that auto-exposure may use) */ + stv06xx_write_sensor(sd, PB_R15, 800); + /* R17 Expose2 (minimum that auto-exposure may use) */ + stv06xx_write_sensor(sd, PB_R17, 10); + + stv06xx_write_sensor(sd, PB_EXPGAIN, 0); + + /* 0x14 */ + stv06xx_write_sensor(sd, PB_VOFFSET, 0); + /* 0x0D */ + stv06xx_write_sensor(sd, PB_ADCGAINH, 11); + /* Set black level (important!) */ + stv06xx_write_sensor(sd, PB_ADCGAINL, 0); + + /* ??? */ + stv06xx_write_bridge(sd, STV_REG00, 0x11); + stv06xx_write_bridge(sd, STV_REG03, 0x45); + stv06xx_write_bridge(sd, STV_REG04, 0x07); + + /* ISO-Size (0x27b: 635... why? - HDCS uses 847) */ + stv06xx_write_bridge(sd, STV_ISO_SIZE_L, 847); + + /* Scan/timing for the sensor */ + stv06xx_write_sensor(sd, PB_ROWSPEED, BIT(4)|BIT(3)|BIT(1)); + stv06xx_write_sensor(sd, PB_CFILLIN, 14); + stv06xx_write_sensor(sd, PB_VBL, 0); + stv06xx_write_sensor(sd, PB_FINTTIME, 0); + stv06xx_write_sensor(sd, PB_RINTTIME, 123); + + stv06xx_write_bridge(sd, STV_REG01, 0xc2); + stv06xx_write_bridge(sd, STV_REG02, 0xb0); + return 0; +} + +static int pb0100_dump(struct sd *sd) +{ + return 0; +} + +static int pb0100_get_gain(struct gspca_dev *gspca_dev, __s32 *val) +{ + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + *val = sensor_settings[GAIN_IDX]; + + return 0; +} + +static int pb0100_set_gain(struct gspca_dev *gspca_dev, __s32 val) +{ + int err; + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + if (sensor_settings[AUTOGAIN_IDX]) + return -EBUSY; + + sensor_settings[GAIN_IDX] = val; + err = stv06xx_write_sensor(sd, PB_G1GAIN, val); + if (!err) + err = stv06xx_write_sensor(sd, PB_G2GAIN, val); + PDEBUG(D_V4L2, "Set green gain to %d, status: %d", val, err); + + if (!err) + err = pb0100_set_red_balance(gspca_dev, + sensor_settings[RED_BALANCE_IDX]); + if (!err) + err = pb0100_set_blue_balance(gspca_dev, + sensor_settings[BLUE_BALANCE_IDX]); + + return err; +} + +static int pb0100_get_red_balance(struct gspca_dev *gspca_dev, __s32 *val) +{ + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + *val = sensor_settings[RED_BALANCE_IDX]; + + return 0; +} + +static int pb0100_set_red_balance(struct gspca_dev *gspca_dev, __s32 val) +{ + int err; + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + if (sensor_settings[AUTOGAIN_IDX]) + return -EBUSY; + + sensor_settings[RED_BALANCE_IDX] = val; + val += sensor_settings[GAIN_IDX]; + if (val < 0) + val = 0; + else if (val > 255) + val = 255; + + err = stv06xx_write_sensor(sd, PB_RGAIN, val); + PDEBUG(D_V4L2, "Set red gain to %d, status: %d", val, err); + + return err; +} + +static int pb0100_get_blue_balance(struct gspca_dev *gspca_dev, __s32 *val) +{ + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + *val = sensor_settings[BLUE_BALANCE_IDX]; + + return 0; +} + +static int pb0100_set_blue_balance(struct gspca_dev *gspca_dev, __s32 val) +{ + int err; + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + if (sensor_settings[AUTOGAIN_IDX]) + return -EBUSY; + + sensor_settings[BLUE_BALANCE_IDX] = val; + val += sensor_settings[GAIN_IDX]; + if (val < 0) + val = 0; + else if (val > 255) + val = 255; + + err = stv06xx_write_sensor(sd, PB_BGAIN, val); + PDEBUG(D_V4L2, "Set blue gain to %d, status: %d", val, err); + + return err; +} + +static int pb0100_get_exposure(struct gspca_dev *gspca_dev, __s32 *val) +{ + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + *val = sensor_settings[EXPOSURE_IDX]; + + return 0; +} + +static int pb0100_set_exposure(struct gspca_dev *gspca_dev, __s32 val) +{ + int err; + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + if (sensor_settings[AUTOGAIN_IDX]) + return -EBUSY; + + sensor_settings[EXPOSURE_IDX] = val; + err = stv06xx_write_sensor(sd, PB_RINTTIME, val); + PDEBUG(D_V4L2, "Set exposure to %d, status: %d", val, err); + + return err; +} + +static int pb0100_get_autogain(struct gspca_dev *gspca_dev, __s32 *val) +{ + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + *val = sensor_settings[AUTOGAIN_IDX]; + + return 0; +} + +static int pb0100_set_autogain(struct gspca_dev *gspca_dev, __s32 val) +{ + int err; + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + sensor_settings[AUTOGAIN_IDX] = val; + if (sensor_settings[AUTOGAIN_IDX]) { + if (sensor_settings[NATURAL_IDX]) + val = BIT(6)|BIT(4)|BIT(0); + else + val = BIT(4)|BIT(0); + } else + val = 0; + + err = stv06xx_write_sensor(sd, PB_EXPGAIN, val); + PDEBUG(D_V4L2, "Set autogain to %d (natural: %d), status: %d", + sensor_settings[AUTOGAIN_IDX], sensor_settings[NATURAL_IDX], + err); + + return err; +} + +static int pb0100_get_autogain_target(struct gspca_dev *gspca_dev, __s32 *val) +{ + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + *val = sensor_settings[AUTOGAIN_TARGET_IDX]; + + return 0; +} + +static int pb0100_set_autogain_target(struct gspca_dev *gspca_dev, __s32 val) +{ + int err, totalpixels, brightpixels, darkpixels; + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + sensor_settings[AUTOGAIN_TARGET_IDX] = val; + + /* Number of pixels counted by the sensor when subsampling the pixels. + * Slightly larger than the real value to avoid oscillation */ + totalpixels = gspca_dev->width * gspca_dev->height; + totalpixels = totalpixels/(8*8) + totalpixels/(64*64); + + brightpixels = (totalpixels * val) >> 8; + darkpixels = totalpixels - brightpixels; + err = stv06xx_write_sensor(sd, PB_R21, brightpixels); + if (!err) + err = stv06xx_write_sensor(sd, PB_R22, darkpixels); + + PDEBUG(D_V4L2, "Set autogain target to %d, status: %d", val, err); + + return err; +} + +static int pb0100_get_natural(struct gspca_dev *gspca_dev, __s32 *val) +{ + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + *val = sensor_settings[NATURAL_IDX]; + + return 0; +} + +static int pb0100_set_natural(struct gspca_dev *gspca_dev, __s32 val) +{ + struct sd *sd = (struct sd *) gspca_dev; + s32 *sensor_settings = sd->sensor_priv; + + sensor_settings[NATURAL_IDX] = val; + + return pb0100_set_autogain(gspca_dev, sensor_settings[AUTOGAIN_IDX]); +} diff --git a/drivers/media/video/gspca/stv06xx/stv06xx_pb0100.h b/drivers/media/video/gspca/stv06xx/stv06xx_pb0100.h new file mode 100644 index 00000000000..5ea21a1154c --- /dev/null +++ b/drivers/media/video/gspca/stv06xx/stv06xx_pb0100.h @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2001 Jean-Fredric Clere, Nikolas Zimmermann, Georg Acher + * Mark Cave-Ayland, Carlo E Prelz, Dick Streefland + * Copyright (c) 2002, 2003 Tuukka Toivonen + * Copyright (c) 2008 Erik AndrĂ©n + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * P/N 861037: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0010: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0020: Sensor Photobit PB100 ASIC STV0600-1 - QuickCam Express + * P/N 861055: Sensor ST VV6410 ASIC STV0610 - LEGO cam + * P/N 861075-0040: Sensor HDCS1000 ASIC + * P/N 961179-0700: Sensor ST VV6410 ASIC STV0602 - Dexxa WebCam USB + * P/N 861040-0000: Sensor ST VV6410 ASIC STV0610 - QuickCam Web + */ + +#ifndef STV06XX_PB0100_H_ +#define STV06XX_PB0100_H_ + +#include "stv06xx_sensor.h" + +/* mode priv field flags */ +#define PB0100_CROP_TO_VGA 0x01 +#define PB0100_SUBSAMPLE 0x02 + +/* I2C Registers */ +#define PB_IDENT 0x00 /* Chip Version */ +#define PB_RSTART 0x01 /* Row Window Start */ +#define PB_CSTART 0x02 /* Column Window Start */ +#define PB_RWSIZE 0x03 /* Row Window Size */ +#define PB_CWSIZE 0x04 /* Column Window Size */ +#define PB_CFILLIN 0x05 /* Column Fill-In */ +#define PB_VBL 0x06 /* Vertical Blank Count */ +#define PB_CONTROL 0x07 /* Control Mode */ +#define PB_FINTTIME 0x08 /* Integration Time/Frame Unit Count */ +#define PB_RINTTIME 0x09 /* Integration Time/Row Unit Count */ +#define PB_ROWSPEED 0x0a /* Row Speed Control */ +#define PB_ABORTFRAME 0x0b /* Abort Frame */ +#define PB_R12 0x0c /* Reserved */ +#define PB_RESET 0x0d /* Reset */ +#define PB_EXPGAIN 0x0e /* Exposure Gain Command */ +#define PB_R15 0x0f /* Expose0 */ +#define PB_R16 0x10 /* Expose1 */ +#define PB_R17 0x11 /* Expose2 */ +#define PB_R18 0x12 /* Low0_DAC */ +#define PB_R19 0x13 /* Low1_DAC */ +#define PB_R20 0x14 /* Low2_DAC */ +#define PB_R21 0x15 /* Threshold11 */ +#define PB_R22 0x16 /* Threshold0x */ +#define PB_UPDATEINT 0x17 /* Update Interval */ +#define PB_R24 0x18 /* High_DAC */ +#define PB_R25 0x19 /* Trans0H */ +#define PB_R26 0x1a /* Trans1L */ +#define PB_R27 0x1b /* Trans1H */ +#define PB_R28 0x1c /* Trans2L */ +#define PB_R29 0x1d /* Reserved */ +#define PB_R30 0x1e /* Reserved */ +#define PB_R31 0x1f /* Wait to Read */ +#define PB_PREADCTRL 0x20 /* Pixel Read Control Mode */ +#define PB_R33 0x21 /* IREF_VLN */ +#define PB_R34 0x22 /* IREF_VLP */ +#define PB_R35 0x23 /* IREF_VLN_INTEG */ +#define PB_R36 0x24 /* IREF_MASTER */ +#define PB_R37 0x25 /* IDACP */ +#define PB_R38 0x26 /* IDACN */ +#define PB_R39 0x27 /* DAC_Control_Reg */ +#define PB_R40 0x28 /* VCL */ +#define PB_R41 0x29 /* IREF_VLN_ADCIN */ +#define PB_R42 0x2a /* Reserved */ +#define PB_G1GAIN 0x2b /* Green 1 Gain */ +#define PB_BGAIN 0x2c /* Blue Gain */ +#define PB_RGAIN 0x2d /* Red Gain */ +#define PB_G2GAIN 0x2e /* Green 2 Gain */ +#define PB_R47 0x2f /* Dark Row Address */ +#define PB_R48 0x30 /* Dark Row Options */ +#define PB_R49 0x31 /* Reserved */ +#define PB_R50 0x32 /* Image Test Data */ +#define PB_ADCMAXGAIN 0x33 /* Maximum Gain */ +#define PB_ADCMINGAIN 0x34 /* Minimum Gain */ +#define PB_ADCGLOBALGAIN 0x35 /* Global Gain */ +#define PB_R54 0x36 /* Maximum Frame */ +#define PB_R55 0x37 /* Minimum Frame */ +#define PB_R56 0x38 /* Reserved */ +#define PB_VOFFSET 0x39 /* VOFFSET */ +#define PB_R58 0x3a /* Snap-Shot Sequence Trigger */ +#define PB_ADCGAINH 0x3b /* VREF_HI */ +#define PB_ADCGAINL 0x3c /* VREF_LO */ +#define PB_R61 0x3d /* Reserved */ +#define PB_R62 0x3e /* Reserved */ +#define PB_R63 0x3f /* Reserved */ +#define PB_R64 0x40 /* Red/Blue Gain */ +#define PB_R65 0x41 /* Green 2/Green 1 Gain */ +#define PB_R66 0x42 /* VREF_HI/LO */ +#define PB_R67 0x43 /* Integration Time/Row Unit Count */ +#define PB_R240 0xf0 /* ADC Test */ +#define PB_R241 0xf1 /* Chip Enable */ +#define PB_R242 0xf2 /* Reserved */ + +static int pb0100_probe(struct sd *sd); +static int pb0100_start(struct sd *sd); +static int pb0100_init(struct sd *sd); +static int pb0100_stop(struct sd *sd); +static int pb0100_dump(struct sd *sd); + +/* V4L2 controls supported by the driver */ +static int pb0100_get_gain(struct gspca_dev *gspca_dev, __s32 *val); +static int pb0100_set_gain(struct gspca_dev *gspca_dev, __s32 val); +static int pb0100_get_red_balance(struct gspca_dev *gspca_dev, __s32 *val); +static int pb0100_set_red_balance(struct gspca_dev *gspca_dev, __s32 val); +static int pb0100_get_blue_balance(struct gspca_dev *gspca_dev, __s32 *val); +static int pb0100_set_blue_balance(struct gspca_dev *gspca_dev, __s32 val); +static int pb0100_get_exposure(struct gspca_dev *gspca_dev, __s32 *val); +static int pb0100_set_exposure(struct gspca_dev *gspca_dev, __s32 val); +static int pb0100_get_autogain(struct gspca_dev *gspca_dev, __s32 *val); +static int pb0100_set_autogain(struct gspca_dev *gspca_dev, __s32 val); +static int pb0100_get_autogain_target(struct gspca_dev *gspca_dev, __s32 *val); +static int pb0100_set_autogain_target(struct gspca_dev *gspca_dev, __s32 val); +static int pb0100_get_natural(struct gspca_dev *gspca_dev, __s32 *val); +static int pb0100_set_natural(struct gspca_dev *gspca_dev, __s32 val); + +const struct stv06xx_sensor stv06xx_sensor_pb0100 = { + .name = "PB-0100", + .i2c_flush = 1, + .i2c_addr = 0xba, + .i2c_len = 2, + + .nctrls = 7, + .ctrls = { +#define GAIN_IDX 0 + { + { + .id = V4L2_CID_GAIN, + .type = V4L2_CTRL_TYPE_INTEGER, + .name = "Gain", + .minimum = 0, + .maximum = 255, + .step = 1, + .default_value = 128 + }, + .set = pb0100_set_gain, + .get = pb0100_get_gain + }, +#define RED_BALANCE_IDX 1 + { + { + .id = V4L2_CID_RED_BALANCE, + .type = V4L2_CTRL_TYPE_INTEGER, + .name = "Red Balance", + .minimum = -255, + .maximum = 255, + .step = 1, + .default_value = 0 + }, + .set = pb0100_set_red_balance, + .get = pb0100_get_red_balance + }, +#define BLUE_BALANCE_IDX 2 + { + { + .id = V4L2_CID_BLUE_BALANCE, + .type = V4L2_CTRL_TYPE_INTEGER, + .name = "Blue Balance", + .minimum = -255, + .maximum = 255, + .step = 1, + .default_value = 0 + }, + .set = pb0100_set_blue_balance, + .get = pb0100_get_blue_balance + }, +#define EXPOSURE_IDX 3 + { + { + .id = V4L2_CID_EXPOSURE, + .type = V4L2_CTRL_TYPE_INTEGER, + .name = "Exposure", + .minimum = 0, + .maximum = 511, + .step = 1, + .default_value = 12 + }, + .set = pb0100_set_exposure, + .get = pb0100_get_exposure + }, +#define AUTOGAIN_IDX 4 + { + { + .id = V4L2_CID_AUTOGAIN, + .type = V4L2_CTRL_TYPE_BOOLEAN, + .name = "Automatic Gain and Exposure", + .minimum = 0, + .maximum = 1, + .step = 1, + .default_value = 1 + }, + .set = pb0100_set_autogain, + .get = pb0100_get_autogain + }, +#define AUTOGAIN_TARGET_IDX 5 + { + { + .id = V4L2_CTRL_CLASS_USER + 0x1000, + .type = V4L2_CTRL_TYPE_INTEGER, + .name = "Automatic Gain Target", + .minimum = 0, + .maximum = 255, + .step = 1, + .default_value = 128 + }, + .set = pb0100_set_autogain_target, + .get = pb0100_get_autogain_target + }, +#define NATURAL_IDX 6 + { + { + .id = V4L2_CTRL_CLASS_USER + 0x1001, + .type = V4L2_CTRL_TYPE_BOOLEAN, + .name = "Natural Light Source", + .minimum = 0, + .maximum = 1, + .step = 1, + .default_value = 1 + }, + .set = pb0100_set_natural, + .get = pb0100_get_natural + }, + }, + + .init = pb0100_init, + .probe = pb0100_probe, + .start = pb0100_start, + .stop = pb0100_stop, + .dump = pb0100_dump, + + .nmodes = 2, + .modes = { +/* low res / subsample modes disabled as they are only half res horizontal, + halving the vertical resolution does not seem to work */ + { + 320, + 240, + V4L2_PIX_FMT_SGRBG8, + V4L2_FIELD_NONE, + .sizeimage = 320 * 240, + .bytesperline = 320, + .colorspace = V4L2_COLORSPACE_SRGB, + .priv = PB0100_CROP_TO_VGA + }, + { + 352, + 288, + V4L2_PIX_FMT_SGRBG8, + V4L2_FIELD_NONE, + .sizeimage = 352 * 288, + .bytesperline = 352, + .colorspace = V4L2_COLORSPACE_SRGB, + .priv = 0 + }, + } +}; + +#endif diff --git a/drivers/media/video/gspca/stv06xx/stv06xx_sensor.h b/drivers/media/video/gspca/stv06xx/stv06xx_sensor.h new file mode 100644 index 00000000000..c726dacefa1 --- /dev/null +++ b/drivers/media/video/gspca/stv06xx/stv06xx_sensor.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2001 Jean-Fredric Clere, Nikolas Zimmermann, Georg Acher + * Mark Cave-Ayland, Carlo E Prelz, Dick Streefland + * Copyright (c) 2002, 2003 Tuukka Toivonen + * Copyright (c) 2008 Erik AndrĂ©n + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * P/N 861037: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0010: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0020: Sensor Photobit PB100 ASIC STV0600-1 - QuickCam Express + * P/N 861055: Sensor ST VV6410 ASIC STV0610 - LEGO cam + * P/N 861075-0040: Sensor HDCS1000 ASIC + * P/N 961179-0700: Sensor ST VV6410 ASIC STV0602 - Dexxa WebCam USB + * P/N 861040-0000: Sensor ST VV6410 ASIC STV0610 - QuickCam Web + */ + +#ifndef STV06XX_SENSOR_H_ +#define STV06XX_SENSOR_H_ + +#include "stv06xx.h" + +#define IS_850(sd) ((sd)->gspca_dev.dev->descriptor.idProduct == 0x850) +#define IS_870(sd) ((sd)->gspca_dev.dev->descriptor.idProduct == 0x870) +#define IS_1020(sd) ((sd)->sensor == &stv06xx_sensor_hdcs1020) + +extern const struct stv06xx_sensor stv06xx_sensor_vv6410; +extern const struct stv06xx_sensor stv06xx_sensor_hdcs1x00; +extern const struct stv06xx_sensor stv06xx_sensor_hdcs1020; +extern const struct stv06xx_sensor stv06xx_sensor_pb0100; + +#define STV06XX_MAX_CTRLS (V4L2_CID_LASTP1 - V4L2_CID_BASE + 10) + +struct stv06xx_sensor { + /* Defines the name of a sensor */ + char name[32]; + + /* Sensor i2c address */ + u8 i2c_addr; + + /* Flush value*/ + u8 i2c_flush; + + /* length of an i2c word */ + u8 i2c_len; + + /* Probes if the sensor is connected */ + int (*probe)(struct sd *sd); + + /* Performs a initialization sequence */ + int (*init)(struct sd *sd); + + /* Executed at device disconnect */ + void (*disconnect)(struct sd *sd); + + /* Reads a sensor register */ + int (*read_sensor)(struct sd *sd, const u8 address, + u8 *i2c_data, const u8 len); + + /* Writes to a sensor register */ + int (*write_sensor)(struct sd *sd, const u8 address, + u8 *i2c_data, const u8 len); + + /* Instructs the sensor to start streaming */ + int (*start)(struct sd *sd); + + /* Instructs the sensor to stop streaming */ + int (*stop)(struct sd *sd); + + /* Instructs the sensor to dump all its contents */ + int (*dump)(struct sd *sd); + + int nctrls; + struct ctrl ctrls[STV06XX_MAX_CTRLS]; + + char nmodes; + struct v4l2_pix_format modes[]; +}; + +#endif diff --git a/drivers/media/video/gspca/stv06xx/stv06xx_vv6410.c b/drivers/media/video/gspca/stv06xx/stv06xx_vv6410.c new file mode 100644 index 00000000000..1ca91f2a6de --- /dev/null +++ b/drivers/media/video/gspca/stv06xx/stv06xx_vv6410.c @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2001 Jean-Fredric Clere, Nikolas Zimmermann, Georg Acher + * Mark Cave-Ayland, Carlo E Prelz, Dick Streefland + * Copyright (c) 2002, 2003 Tuukka Toivonen + * Copyright (c) 2008 Erik AndrĂ©n + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * P/N 861037: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0010: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0020: Sensor Photobit PB100 ASIC STV0600-1 - QuickCam Express + * P/N 861055: Sensor ST VV6410 ASIC STV0610 - LEGO cam + * P/N 861075-0040: Sensor HDCS1000 ASIC + * P/N 961179-0700: Sensor ST VV6410 ASIC STV0602 - Dexxa WebCam USB + * P/N 861040-0000: Sensor ST VV6410 ASIC STV0610 - QuickCam Web + */ + +#include "stv06xx_vv6410.h" + +static int vv6410_probe(struct sd *sd) +{ + u16 data; + int err; + + err = stv06xx_read_sensor(sd, VV6410_DEVICEH, &data); + + if (err < 0) + return -ENODEV; + + if (data == 0x19) { + info("vv6410 sensor detected"); + + sd->gspca_dev.cam.cam_mode = stv06xx_sensor_vv6410.modes; + sd->gspca_dev.cam.nmodes = stv06xx_sensor_vv6410.nmodes; + sd->desc.ctrls = stv06xx_sensor_vv6410.ctrls; + sd->desc.nctrls = stv06xx_sensor_vv6410.nctrls; + return 0; + } + + return -ENODEV; +} + +static int vv6410_init(struct sd *sd) +{ + int err = 0, i; + + for (i = 0; i < ARRAY_SIZE(stv_bridge_init); i++) { + /* if NULL then len contains single value */ + if (stv_bridge_init[i].data == NULL) { + err = stv06xx_write_bridge(sd, + stv_bridge_init[i].start, + stv_bridge_init[i].len); + } else { + int j; + for (j = 0; j < stv_bridge_init[i].len; j++) + err = stv06xx_write_bridge(sd, + stv_bridge_init[i].start + j, + stv_bridge_init[i].data[j]); + } + } + + if (err < 0) + return err; + + err = stv06xx_write_sensor_bytes(sd, (u8 *) vv6410_sensor_init, + ARRAY_SIZE(vv6410_sensor_init)); + + return (err < 0) ? err : 0; +} + +static int vv6410_start(struct sd *sd) +{ + int err; + struct cam *cam = &sd->gspca_dev.cam; + u32 priv = cam->cam_mode[sd->gspca_dev.curr_mode].priv; + + if (priv & VV6410_CROP_TO_QVGA) { + PDEBUG(D_CONF, "Cropping to QVGA"); + stv06xx_write_sensor(sd, VV6410_XENDH, 320 - 1); + stv06xx_write_sensor(sd, VV6410_YENDH, 240 - 1); + } else { + stv06xx_write_sensor(sd, VV6410_XENDH, 360 - 1); + stv06xx_write_sensor(sd, VV6410_YENDH, 294 - 1); + } + + if (priv & VV6410_SUBSAMPLE) { + PDEBUG(D_CONF, "Enabling subsampling"); + stv06xx_write_bridge(sd, STV_Y_CTRL, 0x02); + stv06xx_write_bridge(sd, STV_X_CTRL, 0x06); + + stv06xx_write_bridge(sd, STV_SCAN_RATE, 0x10); + } else { + stv06xx_write_bridge(sd, STV_Y_CTRL, 0x01); + stv06xx_write_bridge(sd, STV_X_CTRL, 0x0a); + + stv06xx_write_bridge(sd, STV_SCAN_RATE, 0x20); + } + + /* Turn on LED */ + err = stv06xx_write_bridge(sd, STV_LED_CTRL, LED_ON); + if (err < 0) + return err; + + err = stv06xx_write_sensor(sd, VV6410_SETUP0, 0); + if (err < 0) + return err; + + PDEBUG(D_STREAM, "Starting stream"); + + return 0; +} + +static int vv6410_stop(struct sd *sd) +{ + int err; + + /* Turn off LED */ + err = stv06xx_write_bridge(sd, STV_LED_CTRL, LED_OFF); + if (err < 0) + return err; + + err = stv06xx_write_sensor(sd, VV6410_SETUP0, VV6410_LOW_POWER_MODE); + if (err < 0) + return err; + + PDEBUG(D_STREAM, "Halting stream"); + + return (err < 0) ? err : 0; +} + +static int vv6410_dump(struct sd *sd) +{ + u8 i; + int err = 0; + + info("Dumping all vv6410 sensor registers"); + for (i = 0; i < 0xff && !err; i++) { + u16 data; + err = stv06xx_read_sensor(sd, i, &data); + info("Register 0x%x contained 0x%x", i, data); + } + return (err < 0) ? err : 0; +} + +static int vv6410_get_hflip(struct gspca_dev *gspca_dev, __s32 *val) +{ + int err; + u16 i2c_data; + struct sd *sd = (struct sd *) gspca_dev; + + err = stv06xx_read_sensor(sd, VV6410_DATAFORMAT, &i2c_data); + + *val = (i2c_data & VV6410_HFLIP) ? 1 : 0; + + PDEBUG(D_V4L2, "Read horizontal flip %d", *val); + + return (err < 0) ? err : 0; +} + +static int vv6410_set_hflip(struct gspca_dev *gspca_dev, __s32 val) +{ + int err; + u16 i2c_data; + struct sd *sd = (struct sd *) gspca_dev; + err = stv06xx_read_sensor(sd, VV6410_DATAFORMAT, &i2c_data); + if (err < 0) + return err; + + if (val) + i2c_data |= VV6410_HFLIP; + else + i2c_data &= ~VV6410_HFLIP; + + PDEBUG(D_V4L2, "Set horizontal flip to %d", val); + err = stv06xx_write_sensor(sd, VV6410_DATAFORMAT, i2c_data); + + return (err < 0) ? err : 0; +} + +static int vv6410_get_vflip(struct gspca_dev *gspca_dev, __s32 *val) +{ + int err; + u16 i2c_data; + struct sd *sd = (struct sd *) gspca_dev; + + err = stv06xx_read_sensor(sd, VV6410_DATAFORMAT, &i2c_data); + + *val = (i2c_data & VV6410_VFLIP) ? 1 : 0; + + PDEBUG(D_V4L2, "Read vertical flip %d", *val); + + return (err < 0) ? err : 0; +} + +static int vv6410_set_vflip(struct gspca_dev *gspca_dev, __s32 val) +{ + int err; + u16 i2c_data; + struct sd *sd = (struct sd *) gspca_dev; + err = stv06xx_read_sensor(sd, VV6410_DATAFORMAT, &i2c_data); + if (err < 0) + return err; + + if (val) + i2c_data |= VV6410_VFLIP; + else + i2c_data &= ~VV6410_VFLIP; + + PDEBUG(D_V4L2, "Set vertical flip to %d", val); + err = stv06xx_write_sensor(sd, VV6410_DATAFORMAT, i2c_data); + + return (err < 0) ? err : 0; +} + +static int vv6410_get_analog_gain(struct gspca_dev *gspca_dev, __s32 *val) +{ + int err; + u16 i2c_data; + struct sd *sd = (struct sd *) gspca_dev; + + err = stv06xx_read_sensor(sd, VV6410_ANALOGGAIN, &i2c_data); + + *val = i2c_data & 0xf; + + PDEBUG(D_V4L2, "Read analog gain %d", *val); + + return (err < 0) ? err : 0; +} + +static int vv6410_set_analog_gain(struct gspca_dev *gspca_dev, __s32 val) +{ + int err; + struct sd *sd = (struct sd *) gspca_dev; + + PDEBUG(D_V4L2, "Set analog gain to %d", val); + err = stv06xx_write_sensor(sd, VV6410_ANALOGGAIN, 0xf0 | (val & 0xf)); + + return (err < 0) ? err : 0; +} diff --git a/drivers/media/video/gspca/stv06xx/stv06xx_vv6410.h b/drivers/media/video/gspca/stv06xx/stv06xx_vv6410.h new file mode 100644 index 00000000000..3ff8c4ea336 --- /dev/null +++ b/drivers/media/video/gspca/stv06xx/stv06xx_vv6410.h @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2001 Jean-Fredric Clere, Nikolas Zimmermann, Georg Acher + * Mark Cave-Ayland, Carlo E Prelz, Dick Streefland + * Copyright (c) 2002, 2003 Tuukka Toivonen + * Copyright (c) 2008 Erik AndrĂ©n + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * P/N 861037: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0010: Sensor HDCS1000 ASIC STV0600 + * P/N 861050-0020: Sensor Photobit PB100 ASIC STV0600-1 - QuickCam Express + * P/N 861055: Sensor ST VV6410 ASIC STV0610 - LEGO cam + * P/N 861075-0040: Sensor HDCS1000 ASIC + * P/N 961179-0700: Sensor ST VV6410 ASIC STV0602 - Dexxa WebCam USB + * P/N 861040-0000: Sensor ST VV6410 ASIC STV0610 - QuickCam Web + */ + +#ifndef STV06XX_VV6410_H_ +#define STV06XX_VV6410_H_ + +#include "stv06xx_sensor.h" + +#define VV6410_COLS 416 +#define VV6410_ROWS 320 + +/* Status registers */ +/* Chip identification number including revision indicator */ +#define VV6410_DEVICEH 0x00 +#define VV6410_DEVICEL 0x01 + +/* User can determine whether timed I2C data + has been consumed by interrogating flag states */ +#define VV6410_STATUS0 0x02 + +/* Current line counter value */ +#define VV6410_LINECOUNTH 0x03 +#define VV6410_LINECOUNTL 0x04 + +/* End x coordinate of image size */ +#define VV6410_XENDH 0x05 +#define VV6410_XENDL 0x06 + +/* End y coordinate of image size */ +#define VV6410_YENDH 0x07 +#define VV6410_YENDL 0x08 + +/* This is the average pixel value returned from the + dark line offset cancellation algorithm */ +#define VV6410_DARKAVGH 0x09 +#define VV6410_DARKAVGL 0x0a + +/* This is the average pixel value returned from the + black line offset cancellation algorithm */ +#define VV6410_BLACKAVGH 0x0b +#define VV6410_BLACKAVGL 0x0c + +/* Flags to indicate whether the x or y image coordinates have been clipped */ +#define VV6410_STATUS1 0x0d + +/* Setup registers */ + +/* Low-power/sleep modes & video timing */ +#define VV6410_SETUP0 0x10 + +/* Various parameters */ +#define VV6410_SETUP1 0x11 + +/* Contains pixel counter reset value used by external sync */ +#define VV6410_SYNCVALUE 0x12 + +/* Frame grabbing modes (FST, LST and QCK) */ +#define VV6410_FGMODES 0x14 + +/* FST and QCK mapping modes. */ +#define VV6410_PINMAPPING 0x15 + +/* Data resolution */ +#define VV6410_DATAFORMAT 0x16 + +/* Output coding formats */ +#define VV6410_OPFORMAT 0x17 + +/* Various mode select bits */ +#define VV6410_MODESELECT 0x18 + +/* Exposure registers */ +/* Fine exposure. */ +#define VV6410_FINEH 0x20 +#define VV6410_FINEL 0x21 + +/* Coarse exposure */ +#define VV6410_COARSEH 0x22 +#define VV6410_COARSEL 0x23 + +/* Analog gain setting */ +#define VV6410_ANALOGGAIN 0x24 + +/* Clock division */ +#define VV6410_CLKDIV 0x25 + +/* Dark line offset cancellation value */ +#define VV6410_DARKOFFSETH 0x2c +#define VV6410_DARKOFFSETL 0x2d + +/* Dark line offset cancellation enable */ +#define VV6410_DARKOFFSETSETUP 0x2e + +/* Video timing registers */ +/* Line Length (Pixel Clocks) */ +#define VV6410_LINELENGTHH 0x52 +#define VV6410_LINELENGTHL 0x53 + +/* X-co-ordinate of top left corner of region of interest (x-offset) */ +#define VV6410_XOFFSETH 0x57 +#define VV6410_XOFFSETL 0x58 + +/* Y-coordinate of top left corner of region of interest (y-offset) */ +#define VV6410_YOFFSETH 0x59 +#define VV6410_YOFFSETL 0x5a + +/* Field length (Lines) */ +#define VV6410_FIELDLENGTHH 0x61 +#define VV6410_FIELDLENGTHL 0x62 + +/* System registers */ +/* Black offset cancellation default value */ +#define VV6410_BLACKOFFSETH 0x70 +#define VV6410_BLACKOFFSETL 0x71 + +/* Black offset cancellation setup */ +#define VV6410_BLACKOFFSETSETUP 0x72 + +/* Analog Control Register 0 */ +#define VV6410_CR0 0x75 + +/* Analog Control Register 1 */ +#define VV6410_CR1 0x76 + +/* ADC Setup Register */ +#define VV6410_AS0 0x77 + +/* Analog Test Register */ +#define VV6410_AT0 0x78 + +/* Audio Amplifier Setup Register */ +#define VV6410_AT1 0x79 + +#define VV6410_HFLIP (1 << 3) +#define VV6410_VFLIP (1 << 4) + +#define VV6410_LOW_POWER_MODE (1 << 0) +#define VV6410_SOFT_RESET (1 << 2) +#define VV6410_PAL_25_FPS (0 << 3) + +#define VV6410_CLK_DIV_2 (1 << 1) + +#define VV6410_FINE_EXPOSURE 320 +#define VV6410_COARSE_EXPOSURE 192 +#define VV6410_DEFAULT_GAIN 5 + +#define VV6410_SUBSAMPLE 0x01 +#define VV6410_CROP_TO_QVGA 0x02 + +static int vv6410_probe(struct sd *sd); +static int vv6410_start(struct sd *sd); +static int vv6410_init(struct sd *sd); +static int vv6410_stop(struct sd *sd); +static int vv6410_dump(struct sd *sd); + +/* V4L2 controls supported by the driver */ +static int vv6410_get_hflip(struct gspca_dev *gspca_dev, __s32 *val); +static int vv6410_set_hflip(struct gspca_dev *gspca_dev, __s32 val); +static int vv6410_get_vflip(struct gspca_dev *gspca_dev, __s32 *val); +static int vv6410_set_vflip(struct gspca_dev *gspca_dev, __s32 val); +static int vv6410_get_analog_gain(struct gspca_dev *gspca_dev, __s32 *val); +static int vv6410_set_analog_gain(struct gspca_dev *gspca_dev, __s32 val); + +const struct stv06xx_sensor stv06xx_sensor_vv6410 = { + .name = "ST VV6410", + .i2c_flush = 5, + .i2c_addr = 0x20, + .i2c_len = 1, + .init = vv6410_init, + .probe = vv6410_probe, + .start = vv6410_start, + .stop = vv6410_stop, + .dump = vv6410_dump, + + .nctrls = 3, + .ctrls = { + { + { + .id = V4L2_CID_HFLIP, + .type = V4L2_CTRL_TYPE_BOOLEAN, + .name = "horizontal flip", + .minimum = 0, + .maximum = 1, + .step = 1, + .default_value = 0 + }, + .set = vv6410_set_hflip, + .get = vv6410_get_hflip + }, { + { + .id = V4L2_CID_VFLIP, + .type = V4L2_CTRL_TYPE_BOOLEAN, + .name = "vertical flip", + .minimum = 0, + .maximum = 1, + .step = 1, + .default_value = 0 + }, + .set = vv6410_set_vflip, + .get = vv6410_get_vflip + }, { + { + .id = V4L2_CID_GAIN, + .type = V4L2_CTRL_TYPE_INTEGER, + .name = "analog gain", + .minimum = 0, + .maximum = 15, + .step = 1, + .default_value = 0 + }, + .set = vv6410_set_analog_gain, + .get = vv6410_get_analog_gain + } + }, + + .nmodes = 1, + .modes = { + { + 356, + 292, + V4L2_PIX_FMT_SGRBG8, + V4L2_FIELD_NONE, + .sizeimage = + 356 * 292, + .bytesperline = 356, + .colorspace = V4L2_COLORSPACE_SRGB, + .priv = 0 + } + } +}; + +/* If NULL, only single value to write, stored in len */ +struct stv_init { + const u8 *data; + u16 start; + u8 len; +}; + +static const u8 x1500[] = { /* 0x1500 - 0x150f */ + 0x0b, 0xa7, 0xb7, 0x00, 0x00 +}; + +static const u8 x1536[] = { /* 0x1536 - 0x153b */ + 0x02, 0x00, 0x60, 0x01, 0x20, 0x01 +}; + +static const u8 x15c1[] = { /* 0x15c1 - 0x15c2 */ + 0xff, 0x03 /* Output word 0x03ff = 1023 (ISO size) */ +}; + +static const struct stv_init stv_bridge_init[] = { + /* This reg is written twice. Some kind of reset? */ + {NULL, 0x1620, 0x80}, + {NULL, 0x1620, 0x00}, + {NULL, 0x1423, 0x04}, + {x1500, 0x1500, ARRAY_SIZE(x1500)}, + {x1536, 0x1536, ARRAY_SIZE(x1536)}, + {x15c1, 0x15c1, ARRAY_SIZE(x15c1)} +}; + +static const u8 vv6410_sensor_init[][2] = { + /* Setup registers */ + {VV6410_SETUP0, VV6410_SOFT_RESET}, + {VV6410_SETUP0, VV6410_LOW_POWER_MODE}, + /* Use shuffled read-out mode */ + {VV6410_SETUP1, BIT(6)}, + /* All modes to 1 */ + {VV6410_FGMODES, BIT(6) | BIT(4) | BIT(2) | BIT(0)}, + {VV6410_PINMAPPING, 0x00}, + /* Pre-clock generator divide off */ + {VV6410_DATAFORMAT, BIT(7) | BIT(0)}, + + /* Exposure registers */ + {VV6410_FINEH, VV6410_FINE_EXPOSURE >> 8}, + {VV6410_FINEL, VV6410_FINE_EXPOSURE & 0xff}, + {VV6410_COARSEH, VV6410_COARSE_EXPOSURE >> 8}, + {VV6410_COARSEL, VV6410_COARSE_EXPOSURE & 0xff}, + {VV6410_ANALOGGAIN, 0xf0 | VV6410_DEFAULT_GAIN}, + {VV6410_CLKDIV, VV6410_CLK_DIV_2}, + + /* System registers */ + /* Enable voltage doubler */ + {VV6410_AS0, BIT(6) | BIT(4) | BIT(3) | BIT(2) | BIT(1)}, + {VV6410_AT0, 0x00}, + /* Power up audio, differential */ + {VV6410_AT1, BIT(4)|BIT(0)}, +}; + +#endif -- cgit v1.2.3-70-g09d2 From da3bcb5d909925397715dff4a7584f21f9857bfa Mon Sep 17 00:00:00 2001 From: Jean-Francois Moine Date: Mon, 29 Dec 2008 09:06:09 -0300 Subject: V4L/DVB (10050): gspca - vc032x: Webcam 046d:0897 added. Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/gspca.txt | 1 + drivers/media/video/gspca/vc032x.c | 1 + 2 files changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index f54281d78c1..1c58a763014 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -56,6 +56,7 @@ stv06xx 046d:0870 Dexxa WebCam USB spca500 046d:0890 Logitech QuickCam traveler vc032x 046d:0892 Logitech Orbicam vc032x 046d:0896 Logitech Orbicam +vc032x 046d:0897 Logitech QuickCam for Dell notebooks zc3xx 046d:089d Logitech QuickCam E2500 zc3xx 046d:08a0 Logitech QC IM zc3xx 046d:08a1 Logitech QC IM 0x08A1 +sound diff --git a/drivers/media/video/gspca/vc032x.c b/drivers/media/video/gspca/vc032x.c index 71335dad433..0525ea51a6d 100644 --- a/drivers/media/video/gspca/vc032x.c +++ b/drivers/media/video/gspca/vc032x.c @@ -2426,6 +2426,7 @@ static const struct sd_desc sd_desc = { static const __devinitdata struct usb_device_id device_table[] = { {USB_DEVICE(0x046d, 0x0892), .driver_info = BRIDGE_VC0321}, {USB_DEVICE(0x046d, 0x0896), .driver_info = BRIDGE_VC0321}, + {USB_DEVICE(0x046d, 0x0897), .driver_info = BRIDGE_VC0321}, {USB_DEVICE(0x0ac8, 0x0321), .driver_info = BRIDGE_VC0321}, {USB_DEVICE(0x0ac8, 0x0323), .driver_info = BRIDGE_VC0323}, {USB_DEVICE(0x0ac8, 0x0328), .driver_info = BRIDGE_VC0321}, -- cgit v1.2.3-70-g09d2 From 1e1addd57bdf56c51dbc292d7760ea3d207fe833 Mon Sep 17 00:00:00 2001 From: Douglas Schilling Landgraf Date: Sat, 27 Dec 2008 21:38:14 -0300 Subject: V4L/DVB (10055): em28xx: Add entry for PixelView PlayTV Box 4 Added board PixelView PlayTV Box 4 Thanks to Vildenei Negrao Pereira for testing and data collection. Signed-off-by: Douglas Schilling Landgraf Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.em28xx | 1 + drivers/media/video/em28xx/em28xx-cards.c | 22 ++++++++++++++++++++++ drivers/media/video/em28xx/em28xx.h | 1 + 3 files changed, 24 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index 0c4c721daba..96b1eb8ac87 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx @@ -59,3 +59,4 @@ 58 -> Compro VideoMate ForYou/Stereo (em2820/em2840) [185b:2041] 59 -> Pinnacle PCTV HD Mini (em2874) [2304:023f] 60 -> Hauppauge WinTV HVR 850 (em2883) [2040:651f] + 61 -> Pixelview PlayTV Box 4 USB 2.0 (em2820/em2840) diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index 3759f3a56cb..0acab0d3c00 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -943,6 +943,27 @@ struct em28xx_board em28xx_boards[] = { .amux = EM28XX_AMUX_LINE_IN, } }, }, + [EM2820_BOARD_PROLINK_PLAYTV_BOX4_USB2] = { + .name = "Pixelview PlayTV Box 4 USB 2.0", + .tda9887_conf = TDA9887_PRESENT, + .tuner_type = TUNER_YMEC_TVF_5533MF, + .decoder = EM28XX_SAA711X, + .input = { { + .type = EM28XX_VMUX_TELEVISION, + .vmux = SAA7115_COMPOSITE2, + .amux = EM28XX_AMUX_VIDEO, + .aout = EM28XX_AOUT_MONO | /* I2S */ + EM28XX_AOUT_MASTER, /* Line out pin */ + }, { + .type = EM28XX_VMUX_COMPOSITE1, + .vmux = SAA7115_COMPOSITE0, + .amux = EM28XX_AMUX_LINE_IN, + }, { + .type = EM28XX_VMUX_SVIDEO, + .vmux = SAA7115_SVIDEO3, + .amux = EM28XX_AMUX_LINE_IN, + } }, + }, [EM2820_BOARD_PROLINK_PLAYTV_USB2] = { .name = "Pixelview Prolink PlayTV USB 2.0", .tda9887_conf = TDA9887_PRESENT, @@ -1350,6 +1371,7 @@ MODULE_DEVICE_TABLE(usb, em28xx_id_table); static struct em28xx_hash_table em28xx_eeprom_hash [] = { /* P/N: SA 60002070465 Tuner: TVF7533-MF */ {0x6ce05a8f, EM2820_BOARD_PROLINK_PLAYTV_USB2, TUNER_YMEC_TVF_5533MF}, + {0x72cc5a8b, EM2820_BOARD_PROLINK_PLAYTV_BOX4_USB2, TUNER_YMEC_TVF_5533MF}, {0x966a0441, EM2880_BOARD_KWORLD_DVB_310U, TUNER_XC2028}, }; diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h index 58a3675abb6..317512da982 100644 --- a/drivers/media/video/em28xx/em28xx.h +++ b/drivers/media/video/em28xx/em28xx.h @@ -99,6 +99,7 @@ #define EM2820_BOARD_COMPRO_VIDEOMATE_FORYOU 58 #define EM2874_BOARD_PINNACLE_PCTV_80E 59 #define EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850 60 +#define EM2820_BOARD_PROLINK_PLAYTV_BOX4_USB2 61 /* Limits minimum and default number of buffers */ #define EM28XX_MIN_BUF 4 -- cgit v1.2.3-70-g09d2 From e890759220759dfe4f3bea91a2deafb565ec10e9 Mon Sep 17 00:00:00 2001 From: Devin Heitmueller Date: Mon, 29 Dec 2008 22:34:35 -0300 Subject: V4L/DVB (10120): em28xx: remove redundant Pinnacle Dazzle DVC 100 profile The DVC 100 profile is redundant since we already have an existing identical profile named "Pinnacle Dazzle DVC 90/DVC 100" Signed-off-by: Devin Heitmueller Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.em28xx | 1 - drivers/media/video/em28xx/em28xx-cards.c | 14 -------------- drivers/media/video/em28xx/em28xx.h | 1 - 3 files changed, 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index 96b1eb8ac87..79fd7f97a36 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx @@ -27,7 +27,6 @@ 26 -> Hercules Smart TV USB 2.0 (em2820/em2840) 27 -> Pinnacle PCTV USB 2 (Philips FM1216ME) (em2820/em2840) 28 -> Leadtek Winfast USB II Deluxe (em2820/em2840) - 29 -> Pinnacle Dazzle DVC 100 (em2820/em2840) 30 -> Videology 20K14XUSB USB2.0 (em2820/em2840) 31 -> Usbgear VD204v9 (em2821) 32 -> Supercomp USB 2.0 TV (em2821) diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index c427c0eeba1..41d86e6f69c 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -339,20 +339,6 @@ struct em28xx_board em28xx_boards[] = { .amux = EM28XX_AMUX_LINE_IN, } }, }, - [EM2820_BOARD_PINNACLE_DVC_100] = { - .name = "Pinnacle Dazzle DVC 100", - .decoder = EM28XX_SAA711X, - .tuner_type = TUNER_ABSENT, /* Capture only device */ - .input = { { - .type = EM28XX_VMUX_COMPOSITE1, - .vmux = SAA7115_COMPOSITE0, - .amux = EM28XX_AMUX_LINE_IN, - }, { - .type = EM28XX_VMUX_SVIDEO, - .vmux = SAA7115_SVIDEO3, - .amux = EM28XX_AMUX_LINE_IN, - } }, - }, [EM2820_BOARD_VIDEOLOGY_20K14XUSB] = { .name = "Videology 20K14XUSB USB2.0", .valid = EM28XX_BOARD_NOT_VALIDATED, diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h index 317512da982..650f4ec8218 100644 --- a/drivers/media/video/em28xx/em28xx.h +++ b/drivers/media/video/em28xx/em28xx.h @@ -67,7 +67,6 @@ #define EM2820_BOARD_HERCULES_SMART_TV_USB2 26 #define EM2820_BOARD_PINNACLE_USB_2_FM1216ME 27 #define EM2820_BOARD_LEADTEK_WINFAST_USBII_DELUXE 28 -#define EM2820_BOARD_PINNACLE_DVC_100 29 #define EM2820_BOARD_VIDEOLOGY_20K14XUSB 30 #define EM2821_BOARD_USBGEAR_VD204 31 #define EM2821_BOARD_SUPERCOMP_USB_2 32 -- cgit v1.2.3-70-g09d2 From 7ed3a7a3113a5399a4591fdf1f2a07c9cd954853 Mon Sep 17 00:00:00 2001 From: Devin Heitmueller Date: Mon, 29 Dec 2008 22:39:42 -0300 Subject: V4L/DVB (10121): em28xx: remove worthless Pinnacle PCTV HD Mini 80e device profile The Pinnacle 80e cannot be supported since Micronas yanked their driver support for the drx-j chipset at the last minute. Remove the device profile since it cannot work without the drx driver and it being there is only likely to confuse people into thinking the device is supported but not working. Signed-off-by: Devin Heitmueller Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.em28xx | 1 - drivers/media/video/em28xx/em28xx-cards.c | 34 ------------------------------- drivers/media/video/em28xx/em28xx.h | 1 - 3 files changed, 36 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index 79fd7f97a36..75bded8a4aa 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx @@ -56,6 +56,5 @@ 56 -> Pinnacle Hybrid Pro (2) (em2882) [2304:0226] 57 -> Kworld PlusTV HD Hybrid 330 (em2883) [eb1a:a316] 58 -> Compro VideoMate ForYou/Stereo (em2820/em2840) [185b:2041] - 59 -> Pinnacle PCTV HD Mini (em2874) [2304:023f] 60 -> Hauppauge WinTV HVR 850 (em2883) [2040:651f] 61 -> Pixelview PlayTV Box 4 USB 2.0 (em2820/em2840) diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index 41d86e6f69c..84191c9dc54 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -110,17 +110,6 @@ static struct em28xx_reg_seq default_tuner_gpio[] = { { -1, -1, -1, -1}, }; -/* Pinnacle PCTV HD Mini (80e) GPIOs - 0-5: not used - 6: demod reset, active low - 7: LED on, active high */ -static struct em28xx_reg_seq em2874_pinnacle_80e_digital[] = { - {EM28XX_R06_I2C_CLK, 0x45, 0xff, 10}, /*400 KHz*/ - {EM2874_R80_GPIO, 0x80, 0xff, 100},/*Demod reset*/ - {EM2874_R80_GPIO, 0xc0, 0xff, 10}, - { -1, -1, -1, -1}, -}; - /* * Board definitions */ @@ -1228,27 +1217,6 @@ struct em28xx_board em28xx_boards[] = { .amux = EM28XX_AMUX_LINE_IN, } }, }, - - /* em2874 tuners are DVB only */ - - [EM2874_BOARD_PINNACLE_PCTV_80E] = { - .name = "Pinnacle PCTV HD Mini", - .tuner_type = TUNER_ABSENT, - .has_dvb = 1, - .dvb_gpio = em2874_pinnacle_80e_digital, - .ir_codes = ir_codes_pinnacle_pctv_hd, - .decoder = EM28XX_NODECODER, - .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | - EM2874_I2C_SECONDARY_BUS_SELECT | - EM28XX_I2C_FREQ_400_KHZ, -#ifdef DJH_DEBUG - .input = { { - .type = EM28XX_VMUX_TELEVISION, - .vmux = TVP5150_COMPOSITE0, - .amux = EM28XX_AMUX_LINE_IN, - } }, -#endif - }, }; const unsigned int em28xx_bcount = ARRAY_SIZE(em28xx_boards); @@ -1342,8 +1310,6 @@ struct usb_device_id em28xx_id_table [] = { .driver_info = EM2882_BOARD_PINNACLE_HYBRID_PRO }, { USB_DEVICE(0x2304, 0x0227), .driver_info = EM2880_BOARD_PINNACLE_PCTV_HD_PRO }, - { USB_DEVICE(0x2304, 0x023f), - .driver_info = EM2874_BOARD_PINNACLE_PCTV_80E }, { USB_DEVICE(0x0413, 0x6023), .driver_info = EM2800_BOARD_LEADTEK_WINFAST_USBII }, { USB_DEVICE(0x093b, 0xa005), diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h index 650f4ec8218..b5eddc26388 100644 --- a/drivers/media/video/em28xx/em28xx.h +++ b/drivers/media/video/em28xx/em28xx.h @@ -96,7 +96,6 @@ #define EM2882_BOARD_PINNACLE_HYBRID_PRO 56 #define EM2883_BOARD_KWORLD_HYBRID_A316 57 #define EM2820_BOARD_COMPRO_VIDEOMATE_FORYOU 58 -#define EM2874_BOARD_PINNACLE_PCTV_80E 59 #define EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850 60 #define EM2820_BOARD_PROLINK_PLAYTV_BOX4_USB2 61 -- cgit v1.2.3-70-g09d2 From aa16c10a347e887ec9505de9eacf3675938be722 Mon Sep 17 00:00:00 2001 From: NĂ©meth MĂ¡rton Date: Mon, 29 Dec 2008 16:37:14 -0300 Subject: V4L/DVB (10128): modify V4L documentation to be a valid XHTML MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify Documentation/video4linux/API.html to be a valid XHTML 1.0 Strict. The result was verified using the http://validator.w3.org/ service. Signed-off-by: MĂ¡rton NĂ©meth Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/API.html | 43 ++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/API.html b/Documentation/video4linux/API.html index afbe9ae7ee9..d749d41f647 100644 --- a/Documentation/video4linux/API.html +++ b/Documentation/video4linux/API.html @@ -1,16 +1,27 @@ -V4L API -

Video For Linux APIs

- - - -
- -V4L original API - -Obsoleted by V4L2 API -
- -V4L2 API - -Should be used for new projects -
+ + + + + V4L API + + +

Video For Linux APIs

+ + + + + + + + + +
+ V4L original API + + Obsoleted by V4L2 API +
+ V4L2 API + Should be used for new projects +
+ + -- cgit v1.2.3-70-g09d2 From c3673464ebc004a3d82063cd41b9cf74d1b55db2 Mon Sep 17 00:00:00 2001 From: Karen Xie Date: Tue, 9 Dec 2008 14:15:32 -0800 Subject: [SCSI] cxgb3i: Add cxgb3i iSCSI driver. This patch implements the cxgb3i iscsi connection acceleration for the open-iscsi initiator. The cxgb3i driver offers the iscsi PDU based offload: - digest insertion and verification - payload direct-placement into host memory buffer. Signed-off-by: Karen Xie Signed-off-by: James Bottomley --- Documentation/scsi/cxgb3i.txt | 85 ++ drivers/scsi/Kconfig | 2 + drivers/scsi/Makefile | 1 + drivers/scsi/cxgb3i/Kbuild | 4 + drivers/scsi/cxgb3i/Kconfig | 6 + drivers/scsi/cxgb3i/cxgb3i.h | 139 +++ drivers/scsi/cxgb3i/cxgb3i_ddp.c | 770 +++++++++++++++ drivers/scsi/cxgb3i/cxgb3i_ddp.h | 306 ++++++ drivers/scsi/cxgb3i/cxgb3i_init.c | 107 ++ drivers/scsi/cxgb3i/cxgb3i_iscsi.c | 951 ++++++++++++++++++ drivers/scsi/cxgb3i/cxgb3i_offload.c | 1810 ++++++++++++++++++++++++++++++++++ drivers/scsi/cxgb3i/cxgb3i_offload.h | 231 +++++ drivers/scsi/cxgb3i/cxgb3i_pdu.c | 402 ++++++++ drivers/scsi/cxgb3i/cxgb3i_pdu.h | 59 ++ 14 files changed, 4873 insertions(+) create mode 100644 Documentation/scsi/cxgb3i.txt create mode 100644 drivers/scsi/cxgb3i/Kbuild create mode 100644 drivers/scsi/cxgb3i/Kconfig create mode 100644 drivers/scsi/cxgb3i/cxgb3i.h create mode 100644 drivers/scsi/cxgb3i/cxgb3i_ddp.c create mode 100644 drivers/scsi/cxgb3i/cxgb3i_ddp.h create mode 100644 drivers/scsi/cxgb3i/cxgb3i_init.c create mode 100644 drivers/scsi/cxgb3i/cxgb3i_iscsi.c create mode 100644 drivers/scsi/cxgb3i/cxgb3i_offload.c create mode 100644 drivers/scsi/cxgb3i/cxgb3i_offload.h create mode 100644 drivers/scsi/cxgb3i/cxgb3i_pdu.c create mode 100644 drivers/scsi/cxgb3i/cxgb3i_pdu.h (limited to 'Documentation') diff --git a/Documentation/scsi/cxgb3i.txt b/Documentation/scsi/cxgb3i.txt new file mode 100644 index 00000000000..8141fa01978 --- /dev/null +++ b/Documentation/scsi/cxgb3i.txt @@ -0,0 +1,85 @@ +Chelsio S3 iSCSI Driver for Linux + +Introduction +============ + +The Chelsio T3 ASIC based Adapters (S310, S320, S302, S304, Mezz cards, etc. +series of products) supports iSCSI acceleration and iSCSI Direct Data Placement +(DDP) where the hardware handles the expensive byte touching operations, such +as CRC computation and verification, and direct DMA to the final host memory +destination: + + - iSCSI PDU digest generation and verification + + On transmitting, Chelsio S3 h/w computes and inserts the Header and + Data digest into the PDUs. + On receiving, Chelsio S3 h/w computes and verifies the Header and + Data digest of the PDUs. + + - Direct Data Placement (DDP) + + S3 h/w can directly place the iSCSI Data-In or Data-Out PDU's + payload into pre-posted final destination host-memory buffers based + on the Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT) + in Data-Out PDUs. + + - PDU Transmit and Recovery + + On transmitting, S3 h/w accepts the complete PDU (header + data) + from the host driver, computes and inserts the digests, decomposes + the PDU into multiple TCP segments if necessary, and transmit all + the TCP segments onto the wire. It handles TCP retransmission if + needed. + + On receving, S3 h/w recovers the iSCSI PDU by reassembling TCP + segments, separating the header and data, calculating and verifying + the digests, then forwards the header to the host. The payload data, + if possible, will be directly placed into the pre-posted host DDP + buffer. Otherwise, the payload data will be sent to the host too. + +The cxgb3i driver interfaces with open-iscsi initiator and provides the iSCSI +acceleration through Chelsio hardware wherever applicable. + +Using the cxgb3i Driver +======================= + +The following steps need to be taken to accelerates the open-iscsi initiator: + +1. Load the cxgb3i driver: "modprobe cxgb3i" + + The cxgb3i module registers a new transport class "cxgb3i" with open-iscsi. + + * in the case of recompiling the kernel, the cxgb3i selection is located at + Device Drivers + SCSI device support ---> + [*] SCSI low-level drivers ---> + Chelsio S3xx iSCSI support + +2. Create an interface file located under /etc/iscsi/ifaces/ for the new + transport class "cxgb3i". + + The content of the file should be in the following format: + iface.transport_name = cxgb3i + iface.net_ifacename = + iface.ipaddress = + + * if iface.ipaddress is specified, needs to be either the + same as the ethX's ip address or an address on the same subnet. Make + sure the ip address is unique in the network. + +3. edit /etc/iscsi/iscsid.conf + The default setting for MaxRecvDataSegmentLength (131072) is too big, + replace "node.conn[0].iscsi.MaxRecvDataSegmentLength" to be a value no + bigger than 15360 (for example 8192): + + node.conn[0].iscsi.MaxRecvDataSegmentLength = 8192 + + * The login would fail for a normal session if MaxRecvDataSegmentLength is + too big. A error message in the format of + "cxgb3i: ERR! MaxRecvSegmentLength too big. Need to be <= ." + would be logged to dmesg. + +4. To direct open-iscsi traffic to go through cxgb3i's accelerated path, + "-I " option needs to be specified with most of the + iscsiadm command. is the transport interface file created + in step 2. diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 673463e4bbf..0e5e084dfb4 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -352,6 +352,8 @@ config ISCSI_TCP http://open-iscsi.org +source "drivers/scsi/cxgb3i/Kconfig" + config SGIWD93_SCSI tristate "SGI WD93C93 SCSI Driver" depends on SGI_HAS_WD93 && SCSI diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 07d0f58de9b..1410697257c 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -126,6 +126,7 @@ obj-$(CONFIG_SCSI_HPTIOP) += hptiop.o obj-$(CONFIG_SCSI_STEX) += stex.o obj-$(CONFIG_SCSI_MVSAS) += mvsas.o obj-$(CONFIG_PS3_ROM) += ps3rom.o +obj-$(CONFIG_SCSI_CXGB3_ISCSI) += libiscsi.o libiscsi_tcp.o cxgb3i/ obj-$(CONFIG_ARM) += arm/ diff --git a/drivers/scsi/cxgb3i/Kbuild b/drivers/scsi/cxgb3i/Kbuild new file mode 100644 index 00000000000..ee7d6d2f9c3 --- /dev/null +++ b/drivers/scsi/cxgb3i/Kbuild @@ -0,0 +1,4 @@ +EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3 + +cxgb3i-y := cxgb3i_init.o cxgb3i_iscsi.o cxgb3i_pdu.o cxgb3i_offload.o +obj-$(CONFIG_SCSI_CXGB3_ISCSI) += cxgb3i_ddp.o cxgb3i.o diff --git a/drivers/scsi/cxgb3i/Kconfig b/drivers/scsi/cxgb3i/Kconfig new file mode 100644 index 00000000000..276281460ec --- /dev/null +++ b/drivers/scsi/cxgb3i/Kconfig @@ -0,0 +1,6 @@ +config SCSI_CXGB3_ISCSI + tristate "Chelsio S3xx iSCSI support" + select CHELSIO_T3 + select SCSI_ISCSI_ATTRS + ---help--- + This driver supports iSCSI offload for the Chelsio S3 series devices. diff --git a/drivers/scsi/cxgb3i/cxgb3i.h b/drivers/scsi/cxgb3i/cxgb3i.h new file mode 100644 index 00000000000..fde6e4c634e --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i.h @@ -0,0 +1,139 @@ +/* + * cxgb3i.h: Chelsio S3xx iSCSI driver. + * + * Copyright (c) 2008 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written by: Karen Xie (kxie@chelsio.com) + */ + +#ifndef __CXGB3I_H__ +#define __CXGB3I_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* from cxgb3 LLD */ +#include "common.h" +#include "t3_cpl.h" +#include "t3cdev.h" +#include "cxgb3_ctl_defs.h" +#include "cxgb3_offload.h" +#include "firmware_exports.h" + +#include "cxgb3i_offload.h" +#include "cxgb3i_ddp.h" + +#define CXGB3I_SCSI_QDEPTH_DFLT 128 +#define CXGB3I_MAX_TARGET CXGB3I_MAX_CONN +#define CXGB3I_MAX_LUN 512 +#define ISCSI_PDU_NONPAYLOAD_MAX \ + (sizeof(struct iscsi_hdr) + ISCSI_MAX_AHS_SIZE + 2*ISCSI_DIGEST_SIZE) + +struct cxgb3i_adapter; +struct cxgb3i_hba; +struct cxgb3i_endpoint; + +/** + * struct cxgb3i_hba - cxgb3i iscsi structure (per port) + * + * @snic: cxgb3i adapter containing this port + * @ndev: pointer to netdev structure + * @shost: pointer to scsi host structure + */ +struct cxgb3i_hba { + struct cxgb3i_adapter *snic; + struct net_device *ndev; + struct Scsi_Host *shost; +}; + +/** + * struct cxgb3i_adapter - cxgb3i adapter structure (per pci) + * + * @listhead: list head to link elements + * @lock: lock for this structure + * @tdev: pointer to t3cdev used by cxgb3 driver + * @pdev: pointer to pci dev + * @hba_cnt: # of hbas (the same as # of ports) + * @hba: all the hbas on this adapter + * @tx_max_size: max. tx packet size supported + * @rx_max_size: max. rx packet size supported + * @tag_format: ddp tag format settings + */ +struct cxgb3i_adapter { + struct list_head list_head; + spinlock_t lock; + struct t3cdev *tdev; + struct pci_dev *pdev; + unsigned char hba_cnt; + struct cxgb3i_hba *hba[MAX_NPORTS]; + + unsigned int tx_max_size; + unsigned int rx_max_size; + + struct cxgb3i_tag_format tag_format; +}; + +/** + * struct cxgb3i_conn - cxgb3i iscsi connection + * + * @listhead: list head to link elements + * @cep: pointer to iscsi_endpoint structure + * @conn: pointer to iscsi_conn structure + * @hba: pointer to the hba this conn. is going through + * @task_idx_bits: # of bits needed for session->cmds_max + */ +struct cxgb3i_conn { + struct list_head list_head; + struct cxgb3i_endpoint *cep; + struct iscsi_conn *conn; + struct cxgb3i_hba *hba; + unsigned int task_idx_bits; +}; + +/** + * struct cxgb3i_endpoint - iscsi tcp endpoint + * + * @c3cn: the h/w tcp connection representation + * @hba: pointer to the hba this conn. is going through + * @cconn: pointer to the associated cxgb3i iscsi connection + */ +struct cxgb3i_endpoint { + struct s3_conn *c3cn; + struct cxgb3i_hba *hba; + struct cxgb3i_conn *cconn; +}; + +int cxgb3i_iscsi_init(void); +void cxgb3i_iscsi_cleanup(void); + +struct cxgb3i_adapter *cxgb3i_adapter_add(struct t3cdev *); +void cxgb3i_adapter_remove(struct t3cdev *); +int cxgb3i_adapter_ulp_init(struct cxgb3i_adapter *); +void cxgb3i_adapter_ulp_cleanup(struct cxgb3i_adapter *); + +struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *); +struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *, + struct net_device *); +void cxgb3i_hba_host_remove(struct cxgb3i_hba *); + +int cxgb3i_pdu_init(void); +void cxgb3i_pdu_cleanup(void); +void cxgb3i_conn_cleanup_task(struct iscsi_task *); +int cxgb3i_conn_alloc_pdu(struct iscsi_task *, u8); +int cxgb3i_conn_init_pdu(struct iscsi_task *, unsigned int, unsigned int); +int cxgb3i_conn_xmit_pdu(struct iscsi_task *); + +void cxgb3i_release_itt(struct iscsi_task *task, itt_t hdr_itt); +int cxgb3i_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt); + +#endif diff --git a/drivers/scsi/cxgb3i/cxgb3i_ddp.c b/drivers/scsi/cxgb3i/cxgb3i_ddp.c new file mode 100644 index 00000000000..1a41f04264f --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i_ddp.c @@ -0,0 +1,770 @@ +/* + * cxgb3i_ddp.c: Chelsio S3xx iSCSI DDP Manager. + * + * Copyright (c) 2008 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written by: Karen Xie (kxie@chelsio.com) + */ + +#include + +/* from cxgb3 LLD */ +#include "common.h" +#include "t3_cpl.h" +#include "t3cdev.h" +#include "cxgb3_ctl_defs.h" +#include "cxgb3_offload.h" +#include "firmware_exports.h" + +#include "cxgb3i_ddp.h" + +#define DRV_MODULE_NAME "cxgb3i_ddp" +#define DRV_MODULE_VERSION "1.0.0" +#define DRV_MODULE_RELDATE "Dec. 1, 2008" + +static char version[] = + "Chelsio S3xx iSCSI DDP " DRV_MODULE_NAME + " v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + +MODULE_AUTHOR("Karen Xie "); +MODULE_DESCRIPTION("cxgb3i ddp pagepod manager"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +#define ddp_log_error(fmt...) printk(KERN_ERR "cxgb3i_ddp: ERR! " fmt) +#define ddp_log_warn(fmt...) printk(KERN_WARNING "cxgb3i_ddp: WARN! " fmt) +#define ddp_log_info(fmt...) printk(KERN_INFO "cxgb3i_ddp: " fmt) + +#ifdef __DEBUG_CXGB3I_DDP__ +#define ddp_log_debug(fmt, args...) \ + printk(KERN_INFO "cxgb3i_ddp: %s - " fmt, __func__ , ## args) +#else +#define ddp_log_debug(fmt...) +#endif + +/* + * iSCSI Direct Data Placement + * + * T3 h/w can directly place the iSCSI Data-In or Data-Out PDU's payload into + * pre-posted final destination host-memory buffers based on the Initiator + * Task Tag (ITT) in Data-In or Target Task Tag (TTT) in Data-Out PDUs. + * + * The host memory address is programmed into h/w in the format of pagepod + * entries. + * The location of the pagepod entry is encoded into ddp tag which is used or + * is the base for ITT/TTT. + */ + +#define DDP_PGIDX_MAX 4 +#define DDP_THRESHOLD 2048 +static unsigned char ddp_page_order[DDP_PGIDX_MAX] = {0, 1, 2, 4}; +static unsigned char ddp_page_shift[DDP_PGIDX_MAX] = {12, 13, 14, 16}; +static unsigned char page_idx = DDP_PGIDX_MAX; + +static LIST_HEAD(cxgb3i_ddp_list); +static DEFINE_RWLOCK(cxgb3i_ddp_rwlock); + +/* + * functions to program the pagepod in h/w + */ +static inline void ulp_mem_io_set_hdr(struct sk_buff *skb, unsigned int addr) +{ + struct ulp_mem_io *req = (struct ulp_mem_io *)skb->head; + + req->wr.wr_lo = 0; + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); + req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(addr >> 5) | + V_ULPTX_CMD(ULP_MEM_WRITE)); + req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE >> 5) | + V_ULPTX_NFLITS((PPOD_SIZE >> 3) + 1)); +} + +static int set_ddp_map(struct cxgb3i_ddp_info *ddp, struct pagepod_hdr *hdr, + unsigned int idx, unsigned int npods, + struct cxgb3i_gather_list *gl) +{ + unsigned int pm_addr = (idx << PPOD_SIZE_SHIFT) + ddp->llimit; + int i; + + for (i = 0; i < npods; i++, idx++, pm_addr += PPOD_SIZE) { + struct sk_buff *skb = ddp->gl_skb[idx]; + struct pagepod *ppod; + int j, pidx; + + /* hold on to the skb until we clear the ddp mapping */ + skb_get(skb); + + ulp_mem_io_set_hdr(skb, pm_addr); + ppod = (struct pagepod *) + (skb->head + sizeof(struct ulp_mem_io)); + memcpy(&(ppod->hdr), hdr, sizeof(struct pagepod)); + for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx) + ppod->addr[j] = pidx < gl->nelem ? + cpu_to_be64(gl->phys_addr[pidx]) : 0UL; + + skb->priority = CPL_PRIORITY_CONTROL; + cxgb3_ofld_send(ddp->tdev, skb); + } + return 0; +} + +static int clear_ddp_map(struct cxgb3i_ddp_info *ddp, unsigned int idx, + unsigned int npods) +{ + unsigned int pm_addr = (idx << PPOD_SIZE_SHIFT) + ddp->llimit; + int i; + + for (i = 0; i < npods; i++, idx++, pm_addr += PPOD_SIZE) { + struct sk_buff *skb = ddp->gl_skb[idx]; + + ddp->gl_skb[idx] = NULL; + memset((skb->head + sizeof(struct ulp_mem_io)), 0, PPOD_SIZE); + ulp_mem_io_set_hdr(skb, pm_addr); + skb->priority = CPL_PRIORITY_CONTROL; + cxgb3_ofld_send(ddp->tdev, skb); + } + return 0; +} + +static inline int ddp_find_unused_entries(struct cxgb3i_ddp_info *ddp, + int start, int max, int count, + struct cxgb3i_gather_list *gl) +{ + unsigned int i, j; + + spin_lock(&ddp->map_lock); + for (i = start; i <= max;) { + for (j = 0; j < count; j++) { + if (ddp->gl_map[i + j]) + break; + } + if (j == count) { + for (j = 0; j < count; j++) + ddp->gl_map[i + j] = gl; + spin_unlock(&ddp->map_lock); + return i; + } + i += j + 1; + } + spin_unlock(&ddp->map_lock); + return -EBUSY; +} + +static inline void ddp_unmark_entries(struct cxgb3i_ddp_info *ddp, + int start, int count) +{ + spin_lock(&ddp->map_lock); + memset(&ddp->gl_map[start], 0, + count * sizeof(struct cxgb3i_gather_list *)); + spin_unlock(&ddp->map_lock); +} + +static inline void ddp_free_gl_skb(struct cxgb3i_ddp_info *ddp, + int idx, int count) +{ + int i; + + for (i = 0; i < count; i++, idx++) + if (ddp->gl_skb[idx]) { + kfree_skb(ddp->gl_skb[idx]); + ddp->gl_skb[idx] = NULL; + } +} + +static inline int ddp_alloc_gl_skb(struct cxgb3i_ddp_info *ddp, int idx, + int count, gfp_t gfp) +{ + int i; + + for (i = 0; i < count; i++) { + struct sk_buff *skb = alloc_skb(sizeof(struct ulp_mem_io) + + PPOD_SIZE, gfp); + if (skb) { + ddp->gl_skb[idx + i] = skb; + skb_put(skb, sizeof(struct ulp_mem_io) + PPOD_SIZE); + } else { + ddp_free_gl_skb(ddp, idx, i); + return -ENOMEM; + } + } + return 0; +} + +/** + * cxgb3i_ddp_find_page_index - return ddp page index for a given page size. + * @pgsz: page size + * return the ddp page index, if no match is found return DDP_PGIDX_MAX. + */ +int cxgb3i_ddp_find_page_index(unsigned long pgsz) +{ + int i; + + for (i = 0; i < DDP_PGIDX_MAX; i++) { + if (pgsz == (1UL << ddp_page_shift[i])) + return i; + } + ddp_log_debug("ddp page size 0x%lx not supported.\n", pgsz); + return DDP_PGIDX_MAX; +} +EXPORT_SYMBOL_GPL(cxgb3i_ddp_find_page_index); + +static inline void ddp_gl_unmap(struct pci_dev *pdev, + struct cxgb3i_gather_list *gl) +{ + int i; + + for (i = 0; i < gl->nelem; i++) + pci_unmap_page(pdev, gl->phys_addr[i], PAGE_SIZE, + PCI_DMA_FROMDEVICE); +} + +static inline int ddp_gl_map(struct pci_dev *pdev, + struct cxgb3i_gather_list *gl) +{ + int i; + + for (i = 0; i < gl->nelem; i++) { + gl->phys_addr[i] = pci_map_page(pdev, gl->pages[i], 0, + PAGE_SIZE, + PCI_DMA_FROMDEVICE); + if (unlikely(pci_dma_mapping_error(pdev, gl->phys_addr[i]))) + goto unmap; + } + + return i; + +unmap: + if (i) { + unsigned int nelem = gl->nelem; + + gl->nelem = i; + ddp_gl_unmap(pdev, gl); + gl->nelem = nelem; + } + return -ENOMEM; +} + +/** + * cxgb3i_ddp_make_gl - build ddp page buffer list + * @xferlen: total buffer length + * @sgl: page buffer scatter-gather list + * @sgcnt: # of page buffers + * @pdev: pci_dev, used for pci map + * @gfp: allocation mode + * + * construct a ddp page buffer list from the scsi scattergather list. + * coalesce buffers as much as possible, and obtain dma addresses for + * each page. + * + * Return the cxgb3i_gather_list constructed from the page buffers if the + * memory can be used for ddp. Return NULL otherwise. + */ +struct cxgb3i_gather_list *cxgb3i_ddp_make_gl(unsigned int xferlen, + struct scatterlist *sgl, + unsigned int sgcnt, + struct pci_dev *pdev, + gfp_t gfp) +{ + struct cxgb3i_gather_list *gl; + struct scatterlist *sg = sgl; + struct page *sgpage = sg_page(sg); + unsigned int sglen = sg->length; + unsigned int sgoffset = sg->offset; + unsigned int npages = (xferlen + sgoffset + PAGE_SIZE - 1) >> + PAGE_SHIFT; + int i = 1, j = 0; + + if (xferlen < DDP_THRESHOLD) { + ddp_log_debug("xfer %u < threshold %u, no ddp.\n", + xferlen, DDP_THRESHOLD); + return NULL; + } + + gl = kzalloc(sizeof(struct cxgb3i_gather_list) + + npages * (sizeof(dma_addr_t) + sizeof(struct page *)), + gfp); + if (!gl) + return NULL; + + gl->pages = (struct page **)&gl->phys_addr[npages]; + gl->length = xferlen; + gl->offset = sgoffset; + gl->pages[0] = sgpage; + + sg = sg_next(sg); + while (sg) { + struct page *page = sg_page(sg); + + if (sgpage == page && sg->offset == sgoffset + sglen) + sglen += sg->length; + else { + /* make sure the sgl is fit for ddp: + * each has the same page size, and + * all of the middle pages are used completely + */ + if ((j && sgoffset) || + ((i != sgcnt - 1) && + ((sglen + sgoffset) & ~PAGE_MASK))) + goto error_out; + + j++; + if (j == gl->nelem || sg->offset) + goto error_out; + gl->pages[j] = page; + sglen = sg->length; + sgoffset = sg->offset; + sgpage = page; + } + i++; + sg = sg_next(sg); + } + gl->nelem = ++j; + + if (ddp_gl_map(pdev, gl) < 0) + goto error_out; + + return gl; + +error_out: + kfree(gl); + return NULL; +} +EXPORT_SYMBOL_GPL(cxgb3i_ddp_make_gl); + +/** + * cxgb3i_ddp_release_gl - release a page buffer list + * @gl: a ddp page buffer list + * @pdev: pci_dev used for pci_unmap + * free a ddp page buffer list resulted from cxgb3i_ddp_make_gl(). + */ +void cxgb3i_ddp_release_gl(struct cxgb3i_gather_list *gl, + struct pci_dev *pdev) +{ + ddp_gl_unmap(pdev, gl); + kfree(gl); +} +EXPORT_SYMBOL_GPL(cxgb3i_ddp_release_gl); + +/** + * cxgb3i_ddp_tag_reserve - set up ddp for a data transfer + * @tdev: t3cdev adapter + * @tid: connection id + * @tformat: tag format + * @tagp: the s/w tag, if ddp setup is successful, it will be updated with + * ddp/hw tag + * @gl: the page momory list + * @gfp: allocation mode + * + * ddp setup for a given page buffer list and construct the ddp tag. + * return 0 if success, < 0 otherwise. + */ +int cxgb3i_ddp_tag_reserve(struct t3cdev *tdev, unsigned int tid, + struct cxgb3i_tag_format *tformat, u32 *tagp, + struct cxgb3i_gather_list *gl, gfp_t gfp) +{ + struct cxgb3i_ddp_info *ddp = tdev->ulp_iscsi; + struct pagepod_hdr hdr; + unsigned int npods; + int idx = -1, idx_max; + int err = -ENOMEM; + u32 sw_tag = *tagp; + u32 tag; + + if (page_idx >= DDP_PGIDX_MAX || !ddp || !gl || !gl->nelem || + gl->length < DDP_THRESHOLD) { + ddp_log_debug("pgidx %u, xfer %u/%u, NO ddp.\n", + page_idx, gl->length, DDP_THRESHOLD); + return -EINVAL; + } + + npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT; + idx_max = ddp->nppods - npods + 1; + + if (ddp->idx_last == ddp->nppods) + idx = ddp_find_unused_entries(ddp, 0, idx_max, npods, gl); + else { + idx = ddp_find_unused_entries(ddp, ddp->idx_last + 1, + idx_max, npods, gl); + if (idx < 0 && ddp->idx_last >= npods) + idx = ddp_find_unused_entries(ddp, 0, + ddp->idx_last - npods + 1, + npods, gl); + } + if (idx < 0) { + ddp_log_debug("xferlen %u, gl %u, npods %u NO DDP.\n", + gl->length, gl->nelem, npods); + return idx; + } + + err = ddp_alloc_gl_skb(ddp, idx, npods, gfp); + if (err < 0) + goto unmark_entries; + + tag = cxgb3i_ddp_tag_base(tformat, sw_tag); + tag |= idx << PPOD_IDX_SHIFT; + + hdr.rsvd = 0; + hdr.vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid)); + hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask); + hdr.maxoffset = htonl(gl->length); + hdr.pgoffset = htonl(gl->offset); + + err = set_ddp_map(ddp, &hdr, idx, npods, gl); + if (err < 0) + goto free_gl_skb; + + ddp->idx_last = idx; + ddp_log_debug("xfer %u, gl %u,%u, tid 0x%x, 0x%x -> 0x%x(%u,%u).\n", + gl->length, gl->nelem, gl->offset, tid, sw_tag, tag, + idx, npods); + *tagp = tag; + return 0; + +free_gl_skb: + ddp_free_gl_skb(ddp, idx, npods); +unmark_entries: + ddp_unmark_entries(ddp, idx, npods); + return err; +} +EXPORT_SYMBOL_GPL(cxgb3i_ddp_tag_reserve); + +/** + * cxgb3i_ddp_tag_release - release a ddp tag + * @tdev: t3cdev adapter + * @tag: ddp tag + * ddp cleanup for a given ddp tag and release all the resources held + */ +void cxgb3i_ddp_tag_release(struct t3cdev *tdev, u32 tag) +{ + struct cxgb3i_ddp_info *ddp = tdev->ulp_iscsi; + u32 idx; + + if (!ddp) { + ddp_log_error("release ddp tag 0x%x, ddp NULL.\n", tag); + return; + } + + idx = (tag >> PPOD_IDX_SHIFT) & ddp->idx_mask; + if (idx < ddp->nppods) { + struct cxgb3i_gather_list *gl = ddp->gl_map[idx]; + unsigned int npods; + + if (!gl) { + ddp_log_error("release ddp 0x%x, idx 0x%x, gl NULL.\n", + tag, idx); + return; + } + npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT; + ddp_log_debug("ddp tag 0x%x, release idx 0x%x, npods %u.\n", + tag, idx, npods); + clear_ddp_map(ddp, idx, npods); + ddp_unmark_entries(ddp, idx, npods); + cxgb3i_ddp_release_gl(gl, ddp->pdev); + } else + ddp_log_error("ddp tag 0x%x, idx 0x%x > max 0x%x.\n", + tag, idx, ddp->nppods); +} +EXPORT_SYMBOL_GPL(cxgb3i_ddp_tag_release); + +static int setup_conn_pgidx(struct t3cdev *tdev, unsigned int tid, int pg_idx, + int reply) +{ + struct sk_buff *skb = alloc_skb(sizeof(struct cpl_set_tcb_field), + GFP_KERNEL); + struct cpl_set_tcb_field *req; + u64 val = pg_idx < DDP_PGIDX_MAX ? pg_idx : 0; + + if (!skb) + return -ENOMEM; + + /* set up ulp submode and page size */ + req = (struct cpl_set_tcb_field *)skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); + req->reply = V_NO_REPLY(reply ? 0 : 1); + req->cpu_idx = 0; + req->word = htons(31); + req->mask = cpu_to_be64(0xF0000000); + req->val = cpu_to_be64(val << 28); + skb->priority = CPL_PRIORITY_CONTROL; + + cxgb3_ofld_send(tdev, skb); + return 0; +} + +/** + * cxgb3i_setup_conn_host_pagesize - setup the conn.'s ddp page size + * @tdev: t3cdev adapter + * @tid: connection id + * @reply: request reply from h/w + * set up the ddp page size based on the host PAGE_SIZE for a connection + * identified by tid + */ +int cxgb3i_setup_conn_host_pagesize(struct t3cdev *tdev, unsigned int tid, + int reply) +{ + return setup_conn_pgidx(tdev, tid, page_idx, reply); +} +EXPORT_SYMBOL_GPL(cxgb3i_setup_conn_host_pagesize); + +/** + * cxgb3i_setup_conn_pagesize - setup the conn.'s ddp page size + * @tdev: t3cdev adapter + * @tid: connection id + * @reply: request reply from h/w + * @pgsz: ddp page size + * set up the ddp page size for a connection identified by tid + */ +int cxgb3i_setup_conn_pagesize(struct t3cdev *tdev, unsigned int tid, + int reply, unsigned long pgsz) +{ + int pgidx = cxgb3i_ddp_find_page_index(pgsz); + + return setup_conn_pgidx(tdev, tid, pgidx, reply); +} +EXPORT_SYMBOL_GPL(cxgb3i_setup_conn_pagesize); + +/** + * cxgb3i_setup_conn_digest - setup conn. digest setting + * @tdev: t3cdev adapter + * @tid: connection id + * @hcrc: header digest enabled + * @dcrc: data digest enabled + * @reply: request reply from h/w + * set up the iscsi digest settings for a connection identified by tid + */ +int cxgb3i_setup_conn_digest(struct t3cdev *tdev, unsigned int tid, + int hcrc, int dcrc, int reply) +{ + struct sk_buff *skb = alloc_skb(sizeof(struct cpl_set_tcb_field), + GFP_KERNEL); + struct cpl_set_tcb_field *req; + u64 val = (hcrc ? 1 : 0) | (dcrc ? 2 : 0); + + if (!skb) + return -ENOMEM; + + /* set up ulp submode and page size */ + req = (struct cpl_set_tcb_field *)skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); + req->reply = V_NO_REPLY(reply ? 0 : 1); + req->cpu_idx = 0; + req->word = htons(31); + req->mask = cpu_to_be64(0x0F000000); + req->val = cpu_to_be64(val << 24); + skb->priority = CPL_PRIORITY_CONTROL; + + cxgb3_ofld_send(tdev, skb); + return 0; +} +EXPORT_SYMBOL_GPL(cxgb3i_setup_conn_digest); + +static int ddp_init(struct t3cdev *tdev) +{ + struct cxgb3i_ddp_info *ddp; + struct ulp_iscsi_info uinfo; + unsigned int ppmax, bits; + int i, err; + static int vers_printed; + + if (!vers_printed) { + printk(KERN_INFO "%s", version); + vers_printed = 1; + } + + err = tdev->ctl(tdev, ULP_ISCSI_GET_PARAMS, &uinfo); + if (err < 0) { + ddp_log_error("%s, failed to get iscsi param err=%d.\n", + tdev->name, err); + return err; + } + + ppmax = (uinfo.ulimit - uinfo.llimit + 1) >> PPOD_SIZE_SHIFT; + bits = __ilog2_u32(ppmax) + 1; + if (bits > PPOD_IDX_MAX_SIZE) + bits = PPOD_IDX_MAX_SIZE; + ppmax = (1 << (bits - 1)) - 1; + + ddp = cxgb3i_alloc_big_mem(sizeof(struct cxgb3i_ddp_info) + + ppmax * + (sizeof(struct cxgb3i_gather_list *) + + sizeof(struct sk_buff *)), + GFP_KERNEL); + if (!ddp) { + ddp_log_warn("%s unable to alloc ddp 0x%d, ddp disabled.\n", + tdev->name, ppmax); + return 0; + } + ddp->gl_map = (struct cxgb3i_gather_list **)(ddp + 1); + ddp->gl_skb = (struct sk_buff **)(((char *)ddp->gl_map) + + ppmax * + sizeof(struct cxgb3i_gather_list *)); + spin_lock_init(&ddp->map_lock); + + ddp->tdev = tdev; + ddp->pdev = uinfo.pdev; + ddp->max_txsz = min_t(unsigned int, uinfo.max_txsz, ULP2_MAX_PKT_SIZE); + ddp->max_rxsz = min_t(unsigned int, uinfo.max_rxsz, ULP2_MAX_PKT_SIZE); + ddp->llimit = uinfo.llimit; + ddp->ulimit = uinfo.ulimit; + ddp->nppods = ppmax; + ddp->idx_last = ppmax; + ddp->idx_bits = bits; + ddp->idx_mask = (1 << bits) - 1; + ddp->rsvd_tag_mask = (1 << (bits + PPOD_IDX_SHIFT)) - 1; + + uinfo.tagmask = ddp->idx_mask << PPOD_IDX_SHIFT; + for (i = 0; i < DDP_PGIDX_MAX; i++) + uinfo.pgsz_factor[i] = ddp_page_order[i]; + uinfo.ulimit = uinfo.llimit + (ppmax << PPOD_SIZE_SHIFT); + + err = tdev->ctl(tdev, ULP_ISCSI_SET_PARAMS, &uinfo); + if (err < 0) { + ddp_log_warn("%s unable to set iscsi param err=%d, " + "ddp disabled.\n", tdev->name, err); + goto free_ddp_map; + } + + tdev->ulp_iscsi = ddp; + + /* add to the list */ + write_lock(&cxgb3i_ddp_rwlock); + list_add_tail(&ddp->list, &cxgb3i_ddp_list); + write_unlock(&cxgb3i_ddp_rwlock); + + ddp_log_info("nppods %u (0x%x ~ 0x%x), bits %u, mask 0x%x,0x%x " + "pkt %u,%u.\n", + ppmax, ddp->llimit, ddp->ulimit, ddp->idx_bits, + ddp->idx_mask, ddp->rsvd_tag_mask, + ddp->max_txsz, ddp->max_rxsz); + return 0; + +free_ddp_map: + cxgb3i_free_big_mem(ddp); + return err; +} + +/** + * cxgb3i_adapter_ddp_init - initialize the adapter's ddp resource + * @tdev: t3cdev adapter + * @tformat: tag format + * @txsz: max tx pkt size, filled in by this func. + * @rxsz: max rx pkt size, filled in by this func. + * initialize the ddp pagepod manager for a given adapter if needed and + * setup the tag format for a given iscsi entity + */ +int cxgb3i_adapter_ddp_init(struct t3cdev *tdev, + struct cxgb3i_tag_format *tformat, + unsigned int *txsz, unsigned int *rxsz) +{ + struct cxgb3i_ddp_info *ddp; + unsigned char idx_bits; + + if (!tformat) + return -EINVAL; + + if (!tdev->ulp_iscsi) { + int err = ddp_init(tdev); + if (err < 0) + return err; + } + ddp = (struct cxgb3i_ddp_info *)tdev->ulp_iscsi; + + idx_bits = 32 - tformat->sw_bits; + tformat->rsvd_bits = ddp->idx_bits; + tformat->rsvd_shift = PPOD_IDX_SHIFT; + tformat->rsvd_mask = (1 << tformat->rsvd_bits) - 1; + + ddp_log_info("tag format: sw %u, rsvd %u,%u, mask 0x%x.\n", + tformat->sw_bits, tformat->rsvd_bits, + tformat->rsvd_shift, tformat->rsvd_mask); + + *txsz = ddp->max_txsz; + *rxsz = ddp->max_rxsz; + ddp_log_info("ddp max pkt size: %u, %u.\n", + ddp->max_txsz, ddp->max_rxsz); + return 0; +} +EXPORT_SYMBOL_GPL(cxgb3i_adapter_ddp_init); + +static void ddp_release(struct cxgb3i_ddp_info *ddp) +{ + int i = 0; + struct t3cdev *tdev = ddp->tdev; + + tdev->ulp_iscsi = NULL; + while (i < ddp->nppods) { + struct cxgb3i_gather_list *gl = ddp->gl_map[i]; + if (gl) { + int npods = (gl->nelem + PPOD_PAGES_MAX - 1) + >> PPOD_PAGES_SHIFT; + + kfree(gl); + ddp_free_gl_skb(ddp, i, npods); + } else + i++; + } + cxgb3i_free_big_mem(ddp); +} + +/** + * cxgb3i_adapter_ddp_cleanup - release the adapter's ddp resource + * @tdev: t3cdev adapter + * release all the resource held by the ddp pagepod manager for a given + * adapter if needed + */ +void cxgb3i_adapter_ddp_cleanup(struct t3cdev *tdev) +{ + struct cxgb3i_ddp_info *ddp; + + /* remove from the list */ + write_lock(&cxgb3i_ddp_rwlock); + list_for_each_entry(ddp, &cxgb3i_ddp_list, list) { + if (ddp->tdev == tdev) { + list_del(&ddp->list); + break; + } + } + write_unlock(&cxgb3i_ddp_rwlock); + + if (ddp) + ddp_release(ddp); +} +EXPORT_SYMBOL_GPL(cxgb3i_adapter_ddp_cleanup); + +/** + * cxgb3i_ddp_init_module - module init entry point + * initialize any driver wide global data structures + */ +static int __init cxgb3i_ddp_init_module(void) +{ + page_idx = cxgb3i_ddp_find_page_index(PAGE_SIZE); + ddp_log_info("system PAGE_SIZE %lu, ddp idx %u.\n", + PAGE_SIZE, page_idx); + return 0; +} + +/** + * cxgb3i_ddp_exit_module - module cleanup/exit entry point + * go through the ddp list and release any resource held. + */ +static void __exit cxgb3i_ddp_exit_module(void) +{ + struct cxgb3i_ddp_info *ddp; + + /* release all ddp manager if there is any */ + write_lock(&cxgb3i_ddp_rwlock); + list_for_each_entry(ddp, &cxgb3i_ddp_list, list) { + list_del(&ddp->list); + ddp_release(ddp); + } + write_unlock(&cxgb3i_ddp_rwlock); +} + +module_init(cxgb3i_ddp_init_module); +module_exit(cxgb3i_ddp_exit_module); diff --git a/drivers/scsi/cxgb3i/cxgb3i_ddp.h b/drivers/scsi/cxgb3i/cxgb3i_ddp.h new file mode 100644 index 00000000000..5c7c4d95c49 --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i_ddp.h @@ -0,0 +1,306 @@ +/* + * cxgb3i_ddp.h: Chelsio S3xx iSCSI DDP Manager. + * + * Copyright (c) 2008 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written by: Karen Xie (kxie@chelsio.com) + */ + +#ifndef __CXGB3I_ULP2_DDP_H__ +#define __CXGB3I_ULP2_DDP_H__ + +/** + * struct cxgb3i_tag_format - cxgb3i ulp tag format for an iscsi entity + * + * @sw_bits: # of bits used by iscsi software layer + * @rsvd_bits: # of bits used by h/w + * @rsvd_shift: h/w bits shift left + * @rsvd_mask: reserved bit mask + */ +struct cxgb3i_tag_format { + unsigned char sw_bits; + unsigned char rsvd_bits; + unsigned char rsvd_shift; + unsigned char filler[1]; + u32 rsvd_mask; +}; + +/** + * struct cxgb3i_gather_list - cxgb3i direct data placement memory + * + * @tag: ddp tag + * @length: total data buffer length + * @offset: initial offset to the 1st page + * @nelem: # of pages + * @pages: page pointers + * @phys_addr: physical address + */ +struct cxgb3i_gather_list { + u32 tag; + unsigned int length; + unsigned int offset; + unsigned int nelem; + struct page **pages; + dma_addr_t phys_addr[0]; +}; + +/** + * struct cxgb3i_ddp_info - cxgb3i direct data placement for pdu payload + * + * @list: list head to link elements + * @tdev: pointer to t3cdev used by cxgb3 driver + * @max_txsz: max tx packet size for ddp + * @max_rxsz: max rx packet size for ddp + * @llimit: lower bound of the page pod memory + * @ulimit: upper bound of the page pod memory + * @nppods: # of page pod entries + * @idx_last: page pod entry last used + * @idx_bits: # of bits the pagepod index would take + * @idx_mask: pagepod index mask + * @rsvd_tag_mask: tag mask + * @map_lock: lock to synchonize access to the page pod map + * @gl_map: ddp memory gather list + * @gl_skb: skb used to program the pagepod + */ +struct cxgb3i_ddp_info { + struct list_head list; + struct t3cdev *tdev; + struct pci_dev *pdev; + unsigned int max_txsz; + unsigned int max_rxsz; + unsigned int llimit; + unsigned int ulimit; + unsigned int nppods; + unsigned int idx_last; + unsigned char idx_bits; + unsigned char filler[3]; + u32 idx_mask; + u32 rsvd_tag_mask; + spinlock_t map_lock; + struct cxgb3i_gather_list **gl_map; + struct sk_buff **gl_skb; +}; + +#define ULP2_MAX_PKT_SIZE 16224 +#define ULP2_MAX_PDU_PAYLOAD (ULP2_MAX_PKT_SIZE - ISCSI_PDU_NONPAYLOAD_MAX) +#define PPOD_PAGES_MAX 4 +#define PPOD_PAGES_SHIFT 2 /* 4 pages per pod */ + +/* + * struct pagepod_hdr, pagepod - pagepod format + */ +struct pagepod_hdr { + u32 vld_tid; + u32 pgsz_tag_clr; + u32 maxoffset; + u32 pgoffset; + u64 rsvd; +}; + +struct pagepod { + struct pagepod_hdr hdr; + u64 addr[PPOD_PAGES_MAX + 1]; +}; + +#define PPOD_SIZE sizeof(struct pagepod) /* 64 */ +#define PPOD_SIZE_SHIFT 6 + +#define PPOD_COLOR_SHIFT 0 +#define PPOD_COLOR_SIZE 6 +#define PPOD_COLOR_MASK ((1 << PPOD_COLOR_SIZE) - 1) + +#define PPOD_IDX_SHIFT PPOD_COLOR_SIZE +#define PPOD_IDX_MAX_SIZE 24 + +#define S_PPOD_TID 0 +#define M_PPOD_TID 0xFFFFFF +#define V_PPOD_TID(x) ((x) << S_PPOD_TID) + +#define S_PPOD_VALID 24 +#define V_PPOD_VALID(x) ((x) << S_PPOD_VALID) +#define F_PPOD_VALID V_PPOD_VALID(1U) + +#define S_PPOD_COLOR 0 +#define M_PPOD_COLOR 0x3F +#define V_PPOD_COLOR(x) ((x) << S_PPOD_COLOR) + +#define S_PPOD_TAG 6 +#define M_PPOD_TAG 0xFFFFFF +#define V_PPOD_TAG(x) ((x) << S_PPOD_TAG) + +#define S_PPOD_PGSZ 30 +#define M_PPOD_PGSZ 0x3 +#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ) + +/* + * large memory chunk allocation/release + * use vmalloc() if kmalloc() fails + */ +static inline void *cxgb3i_alloc_big_mem(unsigned int size, + gfp_t gfp) +{ + void *p = kmalloc(size, gfp); + if (!p) + p = vmalloc(size); + if (p) + memset(p, 0, size); + return p; +} + +static inline void cxgb3i_free_big_mem(void *addr) +{ + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + +/* + * cxgb3i ddp tag are 32 bits, it consists of reserved bits used by h/w and + * non-reserved bits that can be used by the iscsi s/w. + * The reserved bits are identified by the rsvd_bits and rsvd_shift fields + * in struct cxgb3i_tag_format. + * + * The upper most reserved bit can be used to check if a tag is ddp tag or not: + * if the bit is 0, the tag is a valid ddp tag + */ + +/** + * cxgb3i_is_ddp_tag - check if a given tag is a hw/ddp tag + * @tformat: tag format information + * @tag: tag to be checked + * + * return true if the tag is a ddp tag, false otherwise. + */ +static inline int cxgb3i_is_ddp_tag(struct cxgb3i_tag_format *tformat, u32 tag) +{ + return !(tag & (1 << (tformat->rsvd_bits + tformat->rsvd_shift - 1))); +} + +/** + * cxgb3i_sw_tag_usable - check if a given s/w tag has enough bits left for + * the reserved/hw bits + * @tformat: tag format information + * @sw_tag: s/w tag to be checked + * + * return true if the tag is a ddp tag, false otherwise. + */ +static inline int cxgb3i_sw_tag_usable(struct cxgb3i_tag_format *tformat, + u32 sw_tag) +{ + sw_tag >>= (32 - tformat->rsvd_bits); + return !sw_tag; +} + +/** + * cxgb3i_set_non_ddp_tag - mark a given s/w tag as an invalid ddp tag + * @tformat: tag format information + * @sw_tag: s/w tag to be checked + * + * insert 1 at the upper most reserved bit to mark it as an invalid ddp tag. + */ +static inline u32 cxgb3i_set_non_ddp_tag(struct cxgb3i_tag_format *tformat, + u32 sw_tag) +{ + unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1; + u32 mask = (1 << shift) - 1; + + if (sw_tag && (sw_tag & ~mask)) { + u32 v1 = sw_tag & ((1 << shift) - 1); + u32 v2 = (sw_tag >> (shift - 1)) << shift; + + return v2 | v1 | 1 << shift; + } + return sw_tag | 1 << shift; +} + +/** + * cxgb3i_ddp_tag_base - shift the s/w tag bits so that reserved bits are not + * used. + * @tformat: tag format information + * @sw_tag: s/w tag to be checked + */ +static inline u32 cxgb3i_ddp_tag_base(struct cxgb3i_tag_format *tformat, + u32 sw_tag) +{ + u32 mask = (1 << tformat->rsvd_shift) - 1; + + if (sw_tag && (sw_tag & ~mask)) { + u32 v1 = sw_tag & mask; + u32 v2 = sw_tag >> tformat->rsvd_shift; + + v2 <<= tformat->rsvd_shift + tformat->rsvd_bits; + return v2 | v1; + } + return sw_tag; +} + +/** + * cxgb3i_tag_rsvd_bits - get the reserved bits used by the h/w + * @tformat: tag format information + * @tag: tag to be checked + * + * return the reserved bits in the tag + */ +static inline u32 cxgb3i_tag_rsvd_bits(struct cxgb3i_tag_format *tformat, + u32 tag) +{ + if (cxgb3i_is_ddp_tag(tformat, tag)) + return (tag >> tformat->rsvd_shift) & tformat->rsvd_mask; + return 0; +} + +/** + * cxgb3i_tag_nonrsvd_bits - get the non-reserved bits used by the s/w + * @tformat: tag format information + * @tag: tag to be checked + * + * return the non-reserved bits in the tag. + */ +static inline u32 cxgb3i_tag_nonrsvd_bits(struct cxgb3i_tag_format *tformat, + u32 tag) +{ + unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1; + u32 v1, v2; + + if (cxgb3i_is_ddp_tag(tformat, tag)) { + v1 = tag & ((1 << tformat->rsvd_shift) - 1); + v2 = (tag >> (shift + 1)) << tformat->rsvd_shift; + } else { + u32 mask = (1 << shift) - 1; + + tag &= ~(1 << shift); + v1 = tag & mask; + v2 = (tag >> 1) & ~mask; + } + return v1 | v2; +} + +int cxgb3i_ddp_tag_reserve(struct t3cdev *, unsigned int tid, + struct cxgb3i_tag_format *, u32 *tag, + struct cxgb3i_gather_list *, gfp_t gfp); +void cxgb3i_ddp_tag_release(struct t3cdev *, u32 tag); + +struct cxgb3i_gather_list *cxgb3i_ddp_make_gl(unsigned int xferlen, + struct scatterlist *sgl, + unsigned int sgcnt, + struct pci_dev *pdev, + gfp_t gfp); +void cxgb3i_ddp_release_gl(struct cxgb3i_gather_list *gl, + struct pci_dev *pdev); + +int cxgb3i_setup_conn_host_pagesize(struct t3cdev *, unsigned int tid, + int reply); +int cxgb3i_setup_conn_pagesize(struct t3cdev *, unsigned int tid, int reply, + unsigned long pgsz); +int cxgb3i_setup_conn_digest(struct t3cdev *, unsigned int tid, + int hcrc, int dcrc, int reply); +int cxgb3i_ddp_find_page_index(unsigned long pgsz); +int cxgb3i_adapter_ddp_init(struct t3cdev *, struct cxgb3i_tag_format *, + unsigned int *txsz, unsigned int *rxsz); +void cxgb3i_adapter_ddp_cleanup(struct t3cdev *); +#endif diff --git a/drivers/scsi/cxgb3i/cxgb3i_init.c b/drivers/scsi/cxgb3i/cxgb3i_init.c new file mode 100644 index 00000000000..091ecb4d9f3 --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i_init.c @@ -0,0 +1,107 @@ +/* cxgb3i_init.c: Chelsio S3xx iSCSI driver. + * + * Copyright (c) 2008 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written by: Karen Xie (kxie@chelsio.com) + */ + +#include "cxgb3i.h" + +#define DRV_MODULE_NAME "cxgb3i" +#define DRV_MODULE_VERSION "1.0.0" +#define DRV_MODULE_RELDATE "Jun. 1, 2008" + +static char version[] = + "Chelsio S3xx iSCSI Driver " DRV_MODULE_NAME + " v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + +MODULE_AUTHOR("Karen Xie "); +MODULE_DESCRIPTION("Chelsio S3xx iSCSI Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +static void open_s3_dev(struct t3cdev *); +static void close_s3_dev(struct t3cdev *); + +static cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS]; +static struct cxgb3_client t3c_client = { + .name = "iscsi_cxgb3", + .handlers = cxgb3i_cpl_handlers, + .add = open_s3_dev, + .remove = close_s3_dev, +}; + +/** + * open_s3_dev - register with cxgb3 LLD + * @t3dev: cxgb3 adapter instance + */ +static void open_s3_dev(struct t3cdev *t3dev) +{ + static int vers_printed; + + if (!vers_printed) { + printk(KERN_INFO "%s", version); + vers_printed = 1; + } + + cxgb3i_sdev_add(t3dev, &t3c_client); + cxgb3i_adapter_add(t3dev); +} + +/** + * close_s3_dev - de-register with cxgb3 LLD + * @t3dev: cxgb3 adapter instance + */ +static void close_s3_dev(struct t3cdev *t3dev) +{ + cxgb3i_adapter_remove(t3dev); + cxgb3i_sdev_remove(t3dev); +} + +/** + * cxgb3i_init_module - module init entry point + * + * initialize any driver wide global data structures and register itself + * with the cxgb3 module + */ +static int __init cxgb3i_init_module(void) +{ + int err; + + err = cxgb3i_sdev_init(cxgb3i_cpl_handlers); + if (err < 0) + return err; + + err = cxgb3i_iscsi_init(); + if (err < 0) + return err; + + err = cxgb3i_pdu_init(); + if (err < 0) + return err; + + cxgb3_register_client(&t3c_client); + + return 0; +} + +/** + * cxgb3i_exit_module - module cleanup/exit entry point + * + * go through the driver hba list and for each hba, release any resource held. + * and unregisters iscsi transport and the cxgb3 module + */ +static void __exit cxgb3i_exit_module(void) +{ + cxgb3_unregister_client(&t3c_client); + cxgb3i_pdu_cleanup(); + cxgb3i_iscsi_cleanup(); + cxgb3i_sdev_cleanup(); +} + +module_init(cxgb3i_init_module); +module_exit(cxgb3i_exit_module); diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c new file mode 100644 index 00000000000..d83464b9b3f --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c @@ -0,0 +1,951 @@ +/* cxgb3i_iscsi.c: Chelsio S3xx iSCSI driver. + * + * Copyright (c) 2008 Chelsio Communications, Inc. + * Copyright (c) 2008 Mike Christie + * Copyright (c) 2008 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written by: Karen Xie (kxie@chelsio.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxgb3i.h" +#include "cxgb3i_pdu.h" + +#ifdef __DEBUG_CXGB3I_TAG__ +#define cxgb3i_tag_debug cxgb3i_log_debug +#else +#define cxgb3i_tag_debug(fmt...) +#endif + +#ifdef __DEBUG_CXGB3I_API__ +#define cxgb3i_api_debug cxgb3i_log_debug +#else +#define cxgb3i_api_debug(fmt...) +#endif + +/* + * align pdu size to multiple of 512 for better performance + */ +#define align_pdu_size(n) do { n = (n) & (~511); } while (0) + +static struct scsi_transport_template *cxgb3i_scsi_transport; +static struct scsi_host_template cxgb3i_host_template; +static struct iscsi_transport cxgb3i_iscsi_transport; +static unsigned char sw_tag_idx_bits; +static unsigned char sw_tag_age_bits; + +static LIST_HEAD(cxgb3i_snic_list); +static DEFINE_RWLOCK(cxgb3i_snic_rwlock); + +/** + * cxgb3i_adapter_add - init a s3 adapter structure and any h/w settings + * @t3dev: t3cdev adapter + * return the resulting cxgb3i_adapter struct + */ +struct cxgb3i_adapter *cxgb3i_adapter_add(struct t3cdev *t3dev) +{ + struct cxgb3i_adapter *snic; + struct adapter *adapter = tdev2adap(t3dev); + int i; + + snic = kzalloc(sizeof(*snic), GFP_KERNEL); + if (!snic) { + cxgb3i_api_debug("cxgb3 %s, OOM.\n", t3dev->name); + return NULL; + } + spin_lock_init(&snic->lock); + + snic->tdev = t3dev; + snic->pdev = adapter->pdev; + snic->tag_format.sw_bits = sw_tag_idx_bits + sw_tag_age_bits; + + if (cxgb3i_adapter_ddp_init(t3dev, &snic->tag_format, + &snic->tx_max_size, + &snic->rx_max_size) < 0) + goto free_snic; + + for_each_port(adapter, i) { + snic->hba[i] = cxgb3i_hba_host_add(snic, adapter->port[i]); + if (!snic->hba[i]) + goto ulp_cleanup; + } + snic->hba_cnt = adapter->params.nports; + + /* add to the list */ + write_lock(&cxgb3i_snic_rwlock); + list_add_tail(&snic->list_head, &cxgb3i_snic_list); + write_unlock(&cxgb3i_snic_rwlock); + + return snic; + +ulp_cleanup: + cxgb3i_adapter_ddp_cleanup(t3dev); +free_snic: + kfree(snic); + return NULL; +} + +/** + * cxgb3i_adapter_remove - release all the resources held and cleanup any + * h/w settings + * @t3dev: t3cdev adapter + */ +void cxgb3i_adapter_remove(struct t3cdev *t3dev) +{ + int i; + struct cxgb3i_adapter *snic; + + /* remove from the list */ + write_lock(&cxgb3i_snic_rwlock); + list_for_each_entry(snic, &cxgb3i_snic_list, list_head) { + if (snic->tdev == t3dev) { + list_del(&snic->list_head); + break; + } + } + write_unlock(&cxgb3i_snic_rwlock); + + if (snic) { + for (i = 0; i < snic->hba_cnt; i++) { + if (snic->hba[i]) { + cxgb3i_hba_host_remove(snic->hba[i]); + snic->hba[i] = NULL; + } + } + + /* release ddp resources */ + cxgb3i_adapter_ddp_cleanup(snic->tdev); + kfree(snic); + } +} + +/** + * cxgb3i_hba_find_by_netdev - find the cxgb3i_hba structure with a given + * net_device + * @t3dev: t3cdev adapter + */ +struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev) +{ + struct cxgb3i_adapter *snic; + int i; + + read_lock(&cxgb3i_snic_rwlock); + list_for_each_entry(snic, &cxgb3i_snic_list, list_head) { + for (i = 0; i < snic->hba_cnt; i++) { + if (snic->hba[i]->ndev == ndev) { + read_unlock(&cxgb3i_snic_rwlock); + return snic->hba[i]; + } + } + } + read_unlock(&cxgb3i_snic_rwlock); + return NULL; +} + +/** + * cxgb3i_hba_host_add - register a new host with scsi/iscsi + * @snic: the cxgb3i adapter + * @ndev: associated net_device + */ +struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *snic, + struct net_device *ndev) +{ + struct cxgb3i_hba *hba; + struct Scsi_Host *shost; + int err; + + shost = iscsi_host_alloc(&cxgb3i_host_template, + sizeof(struct cxgb3i_hba), + CXGB3I_SCSI_QDEPTH_DFLT); + if (!shost) { + cxgb3i_log_info("iscsi_host_alloc failed.\n"); + return NULL; + } + + shost->transportt = cxgb3i_scsi_transport; + shost->max_lun = CXGB3I_MAX_LUN; + shost->max_id = CXGB3I_MAX_TARGET; + shost->max_channel = 0; + shost->max_cmd_len = 16; + + hba = iscsi_host_priv(shost); + hba->snic = snic; + hba->ndev = ndev; + hba->shost = shost; + + pci_dev_get(snic->pdev); + err = iscsi_host_add(shost, &snic->pdev->dev); + if (err) { + cxgb3i_log_info("iscsi_host_add failed.\n"); + goto pci_dev_put; + } + + cxgb3i_api_debug("shost 0x%p, hba 0x%p, no %u.\n", + shost, hba, shost->host_no); + + return hba; + +pci_dev_put: + pci_dev_put(snic->pdev); + scsi_host_put(shost); + return NULL; +} + +/** + * cxgb3i_hba_host_remove - de-register the host with scsi/iscsi + * @hba: the cxgb3i hba + */ +void cxgb3i_hba_host_remove(struct cxgb3i_hba *hba) +{ + cxgb3i_api_debug("shost 0x%p, hba 0x%p, no %u.\n", + hba->shost, hba, hba->shost->host_no); + iscsi_host_remove(hba->shost); + pci_dev_put(hba->snic->pdev); + iscsi_host_free(hba->shost); +} + +/** + * cxgb3i_ep_connect - establish TCP connection to target portal + * @dst_addr: target IP address + * @non_blocking: blocking or non-blocking call + * + * Initiates a TCP/IP connection to the dst_addr + */ +static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr, + int non_blocking) +{ + struct iscsi_endpoint *ep; + struct cxgb3i_endpoint *cep; + struct cxgb3i_hba *hba; + struct s3_conn *c3cn = NULL; + int err = 0; + + c3cn = cxgb3i_c3cn_create(); + if (!c3cn) { + cxgb3i_log_info("ep connect OOM.\n"); + err = -ENOMEM; + goto release_conn; + } + + err = cxgb3i_c3cn_connect(c3cn, (struct sockaddr_in *)dst_addr); + if (err < 0) { + cxgb3i_log_info("ep connect failed.\n"); + goto release_conn; + } + hba = cxgb3i_hba_find_by_netdev(c3cn->dst_cache->dev); + if (!hba) { + err = -ENOSPC; + cxgb3i_log_info("NOT going through cxgbi device.\n"); + goto release_conn; + } + if (c3cn_is_closing(c3cn)) { + err = -ENOSPC; + cxgb3i_log_info("ep connect unable to connect.\n"); + goto release_conn; + } + + ep = iscsi_create_endpoint(sizeof(*cep)); + if (!ep) { + err = -ENOMEM; + cxgb3i_log_info("iscsi alloc ep, OOM.\n"); + goto release_conn; + } + cep = ep->dd_data; + cep->c3cn = c3cn; + cep->hba = hba; + + cxgb3i_api_debug("ep 0x%p, 0x%p, c3cn 0x%p, hba 0x%p.\n", + ep, cep, c3cn, hba); + return ep; + +release_conn: + cxgb3i_api_debug("conn 0x%p failed, release.\n", c3cn); + if (c3cn) + cxgb3i_c3cn_release(c3cn); + return ERR_PTR(err); +} + +/** + * cxgb3i_ep_poll - polls for TCP connection establishement + * @ep: TCP connection (endpoint) handle + * @timeout_ms: timeout value in milli secs + * + * polls for TCP connect request to complete + */ +static int cxgb3i_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) +{ + struct cxgb3i_endpoint *cep = ep->dd_data; + struct s3_conn *c3cn = cep->c3cn; + + if (!c3cn_is_established(c3cn)) + return 0; + cxgb3i_api_debug("ep 0x%p, c3cn 0x%p established.\n", ep, c3cn); + return 1; +} + +/** + * cxgb3i_ep_disconnect - teardown TCP connection + * @ep: TCP connection (endpoint) handle + * + * teardown TCP connection + */ +static void cxgb3i_ep_disconnect(struct iscsi_endpoint *ep) +{ + struct cxgb3i_endpoint *cep = ep->dd_data; + struct cxgb3i_conn *cconn = cep->cconn; + + cxgb3i_api_debug("ep 0x%p, cep 0x%p.\n", ep, cep); + + if (cconn && cconn->conn) { + /* + * stop the xmit path so the xmit_pdu function is + * not being called + */ + iscsi_suspend_tx(cconn->conn); + + write_lock_bh(&cep->c3cn->callback_lock); + cep->c3cn->user_data = NULL; + cconn->cep = NULL; + write_unlock_bh(&cep->c3cn->callback_lock); + } + + cxgb3i_api_debug("ep 0x%p, cep 0x%p, release c3cn 0x%p.\n", + ep, cep, cep->c3cn); + cxgb3i_c3cn_release(cep->c3cn); + iscsi_destroy_endpoint(ep); +} + +/** + * cxgb3i_session_create - create a new iscsi session + * @cmds_max: max # of commands + * @qdepth: scsi queue depth + * @initial_cmdsn: initial iscsi CMDSN for this session + * @host_no: pointer to return host no + * + * Creates a new iSCSI session + */ +static struct iscsi_cls_session * +cxgb3i_session_create(struct iscsi_endpoint *ep, u16 cmds_max, u16 qdepth, + u32 initial_cmdsn, u32 *host_no) +{ + struct cxgb3i_endpoint *cep; + struct cxgb3i_hba *hba; + struct Scsi_Host *shost; + struct iscsi_cls_session *cls_session; + struct iscsi_session *session; + + if (!ep) { + cxgb3i_log_error("%s, missing endpoint.\n", __func__); + return NULL; + } + + cep = ep->dd_data; + hba = cep->hba; + shost = hba->shost; + cxgb3i_api_debug("ep 0x%p, cep 0x%p, hba 0x%p.\n", ep, cep, hba); + BUG_ON(hba != iscsi_host_priv(shost)); + + *host_no = shost->host_no; + + cls_session = iscsi_session_setup(&cxgb3i_iscsi_transport, shost, + cmds_max, + sizeof(struct iscsi_tcp_task), + initial_cmdsn, ISCSI_MAX_TARGET); + if (!cls_session) + return NULL; + session = cls_session->dd_data; + if (iscsi_tcp_r2tpool_alloc(session)) + goto remove_session; + + return cls_session; + +remove_session: + iscsi_session_teardown(cls_session); + return NULL; +} + +/** + * cxgb3i_session_destroy - destroys iscsi session + * @cls_session: pointer to iscsi cls session + * + * Destroys an iSCSI session instance and releases its all resources held + */ +static void cxgb3i_session_destroy(struct iscsi_cls_session *cls_session) +{ + cxgb3i_api_debug("sess 0x%p.\n", cls_session); + iscsi_tcp_r2tpool_free(cls_session->dd_data); + iscsi_session_teardown(cls_session); +} + +/** + * cxgb3i_conn_max_xmit_dlength -- check the max. xmit pdu segment size, + * reduce it to be within the hardware limit if needed + * @conn: iscsi connection + */ +static inline int cxgb3i_conn_max_xmit_dlength(struct iscsi_conn *conn) + +{ + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct cxgb3i_conn *cconn = tcp_conn->dd_data; + unsigned int max = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD, + cconn->hba->snic->tx_max_size - + ISCSI_PDU_NONPAYLOAD_MAX); + + if (conn->max_xmit_dlength) + conn->max_xmit_dlength = min_t(unsigned int, + conn->max_xmit_dlength, max); + else + conn->max_xmit_dlength = max; + align_pdu_size(conn->max_xmit_dlength); + cxgb3i_log_info("conn 0x%p, max xmit %u.\n", + conn, conn->max_xmit_dlength); + return 0; +} + +/** + * cxgb3i_conn_max_recv_dlength -- check the max. recv pdu segment size against + * the hardware limit + * @conn: iscsi connection + * return 0 if the value is valid, < 0 otherwise. + */ +static inline int cxgb3i_conn_max_recv_dlength(struct iscsi_conn *conn) +{ + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct cxgb3i_conn *cconn = tcp_conn->dd_data; + unsigned int max = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD, + cconn->hba->snic->rx_max_size - + ISCSI_PDU_NONPAYLOAD_MAX); + + align_pdu_size(max); + if (conn->max_recv_dlength) { + if (conn->max_recv_dlength > max) { + cxgb3i_log_error("MaxRecvDataSegmentLength %u too big." + " Need to be <= %u.\n", + conn->max_recv_dlength, max); + return -EINVAL; + } + conn->max_recv_dlength = min_t(unsigned int, + conn->max_recv_dlength, max); + align_pdu_size(conn->max_recv_dlength); + } else + conn->max_recv_dlength = max; + cxgb3i_api_debug("conn 0x%p, max recv %u.\n", + conn, conn->max_recv_dlength); + return 0; +} + +/** + * cxgb3i_conn_create - create iscsi connection instance + * @cls_session: pointer to iscsi cls session + * @cid: iscsi cid + * + * Creates a new iSCSI connection instance for a given session + */ +static struct iscsi_cls_conn *cxgb3i_conn_create(struct iscsi_cls_session + *cls_session, u32 cid) +{ + struct iscsi_cls_conn *cls_conn; + struct iscsi_conn *conn; + struct iscsi_tcp_conn *tcp_conn; + struct cxgb3i_conn *cconn; + + cxgb3i_api_debug("sess 0x%p, cid %u.\n", cls_session, cid); + + cls_conn = iscsi_tcp_conn_setup(cls_session, sizeof(*cconn), cid); + if (!cls_conn) + return NULL; + conn = cls_conn->dd_data; + tcp_conn = conn->dd_data; + cconn = tcp_conn->dd_data; + + cconn->conn = conn; + return cls_conn; +} + +/** + * cxgb3i_conn_bind - binds iscsi sess, conn and endpoint together + * @cls_session: pointer to iscsi cls session + * @cls_conn: pointer to iscsi cls conn + * @transport_eph: 64-bit EP handle + * @is_leading: leading connection on this session? + * + * Binds together an iSCSI session, an iSCSI connection and a + * TCP connection. This routine returns error code if the TCP + * connection does not belong on the device iSCSI sess/conn is bound + */ + +static int cxgb3i_conn_bind(struct iscsi_cls_session *cls_session, + struct iscsi_cls_conn *cls_conn, + u64 transport_eph, int is_leading) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct cxgb3i_conn *cconn = tcp_conn->dd_data; + struct cxgb3i_adapter *snic; + struct iscsi_endpoint *ep; + struct cxgb3i_endpoint *cep; + struct s3_conn *c3cn; + int err; + + ep = iscsi_lookup_endpoint(transport_eph); + if (!ep) + return -EINVAL; + + /* setup ddp pagesize */ + cep = ep->dd_data; + c3cn = cep->c3cn; + snic = cep->hba->snic; + err = cxgb3i_setup_conn_host_pagesize(snic->tdev, c3cn->tid, 0); + if (err < 0) + return err; + + cxgb3i_api_debug("ep 0x%p, cls sess 0x%p, cls conn 0x%p.\n", + ep, cls_session, cls_conn); + + err = iscsi_conn_bind(cls_session, cls_conn, is_leading); + if (err) + return -EINVAL; + + /* calculate the tag idx bits needed for this conn based on cmds_max */ + cconn->task_idx_bits = (__ilog2_u32(conn->session->cmds_max - 1)) + 1; + cxgb3i_api_debug("session cmds_max 0x%x, bits %u.\n", + conn->session->cmds_max, cconn->task_idx_bits); + + read_lock(&c3cn->callback_lock); + c3cn->user_data = conn; + cconn->hba = cep->hba; + cconn->cep = cep; + cep->cconn = cconn; + read_unlock(&c3cn->callback_lock); + + cxgb3i_conn_max_xmit_dlength(conn); + cxgb3i_conn_max_recv_dlength(conn); + + spin_lock_bh(&conn->session->lock); + sprintf(conn->portal_address, NIPQUAD_FMT, + NIPQUAD(c3cn->daddr.sin_addr.s_addr)); + conn->portal_port = ntohs(c3cn->daddr.sin_port); + spin_unlock_bh(&conn->session->lock); + + /* init recv engine */ + iscsi_tcp_hdr_recv_prep(tcp_conn); + + return 0; +} + +/** + * cxgb3i_conn_get_param - return iscsi connection parameter to caller + * @cls_conn: pointer to iscsi cls conn + * @param: parameter type identifier + * @buf: buffer pointer + * + * returns iSCSI connection parameters + */ +static int cxgb3i_conn_get_param(struct iscsi_cls_conn *cls_conn, + enum iscsi_param param, char *buf) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + int len; + + cxgb3i_api_debug("cls_conn 0x%p, param %d.\n", cls_conn, param); + + switch (param) { + case ISCSI_PARAM_CONN_PORT: + spin_lock_bh(&conn->session->lock); + len = sprintf(buf, "%hu\n", conn->portal_port); + spin_unlock_bh(&conn->session->lock); + break; + case ISCSI_PARAM_CONN_ADDRESS: + spin_lock_bh(&conn->session->lock); + len = sprintf(buf, "%s\n", conn->portal_address); + spin_unlock_bh(&conn->session->lock); + break; + default: + return iscsi_conn_get_param(cls_conn, param, buf); + } + + return len; +} + +/** + * cxgb3i_conn_set_param - set iscsi connection parameter + * @cls_conn: pointer to iscsi cls conn + * @param: parameter type identifier + * @buf: buffer pointer + * @buflen: buffer length + * + * set iSCSI connection parameters + */ +static int cxgb3i_conn_set_param(struct iscsi_cls_conn *cls_conn, + enum iscsi_param param, char *buf, int buflen) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_session *session = conn->session; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct cxgb3i_conn *cconn = tcp_conn->dd_data; + struct cxgb3i_adapter *snic = cconn->hba->snic; + struct s3_conn *c3cn = cconn->cep->c3cn; + int value, err = 0; + + switch (param) { + case ISCSI_PARAM_HDRDGST_EN: + err = iscsi_set_param(cls_conn, param, buf, buflen); + if (!err && conn->hdrdgst_en) + err = cxgb3i_setup_conn_digest(snic->tdev, c3cn->tid, + conn->hdrdgst_en, + conn->datadgst_en, 0); + break; + case ISCSI_PARAM_DATADGST_EN: + err = iscsi_set_param(cls_conn, param, buf, buflen); + if (!err && conn->datadgst_en) + err = cxgb3i_setup_conn_digest(snic->tdev, c3cn->tid, + conn->hdrdgst_en, + conn->datadgst_en, 0); + break; + case ISCSI_PARAM_MAX_R2T: + sscanf(buf, "%d", &value); + if (value <= 0 || !is_power_of_2(value)) + return -EINVAL; + if (session->max_r2t == value) + break; + iscsi_tcp_r2tpool_free(session); + err = iscsi_set_param(cls_conn, param, buf, buflen); + if (!err && iscsi_tcp_r2tpool_alloc(session)) + return -ENOMEM; + case ISCSI_PARAM_MAX_RECV_DLENGTH: + err = iscsi_set_param(cls_conn, param, buf, buflen); + if (!err) + err = cxgb3i_conn_max_recv_dlength(conn); + break; + case ISCSI_PARAM_MAX_XMIT_DLENGTH: + err = iscsi_set_param(cls_conn, param, buf, buflen); + if (!err) + err = cxgb3i_conn_max_xmit_dlength(conn); + break; + default: + return iscsi_set_param(cls_conn, param, buf, buflen); + } + return err; +} + +/** + * cxgb3i_host_set_param - configure host (adapter) related parameters + * @shost: scsi host pointer + * @param: parameter type identifier + * @buf: buffer pointer + */ +static int cxgb3i_host_set_param(struct Scsi_Host *shost, + enum iscsi_host_param param, + char *buf, int buflen) +{ + struct cxgb3i_hba *hba = iscsi_host_priv(shost); + + cxgb3i_api_debug("param %d, buf %s.\n", param, buf); + + switch (param) { + case ISCSI_HOST_PARAM_IPADDRESS: + { + __be32 addr = in_aton(buf); + cxgb3i_set_private_ipv4addr(hba->ndev, addr); + return 0; + } + case ISCSI_HOST_PARAM_HWADDRESS: + case ISCSI_HOST_PARAM_NETDEV_NAME: + /* ignore */ + return 0; + default: + return iscsi_host_set_param(shost, param, buf, buflen); + } +} + +/** + * cxgb3i_host_get_param - returns host (adapter) related parameters + * @shost: scsi host pointer + * @param: parameter type identifier + * @buf: buffer pointer + */ +static int cxgb3i_host_get_param(struct Scsi_Host *shost, + enum iscsi_host_param param, char *buf) +{ + struct cxgb3i_hba *hba = iscsi_host_priv(shost); + int len = 0; + + cxgb3i_api_debug("hba %s, param %d.\n", hba->ndev->name, param); + + switch (param) { + case ISCSI_HOST_PARAM_HWADDRESS: + len = sysfs_format_mac(buf, hba->ndev->dev_addr, 6); + break; + case ISCSI_HOST_PARAM_NETDEV_NAME: + len = sprintf(buf, "%s\n", hba->ndev->name); + break; + case ISCSI_HOST_PARAM_IPADDRESS: + { + __be32 addr; + + addr = cxgb3i_get_private_ipv4addr(hba->ndev); + len = sprintf(buf, NIPQUAD_FMT, NIPQUAD(addr)); + break; + } + default: + return iscsi_host_get_param(shost, param, buf); + } + return len; +} + +/** + * cxgb3i_conn_get_stats - returns iSCSI stats + * @cls_conn: pointer to iscsi cls conn + * @stats: pointer to iscsi statistic struct + */ +static void cxgb3i_conn_get_stats(struct iscsi_cls_conn *cls_conn, + struct iscsi_stats *stats) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + + stats->txdata_octets = conn->txdata_octets; + stats->rxdata_octets = conn->rxdata_octets; + stats->scsicmd_pdus = conn->scsicmd_pdus_cnt; + stats->dataout_pdus = conn->dataout_pdus_cnt; + stats->scsirsp_pdus = conn->scsirsp_pdus_cnt; + stats->datain_pdus = conn->datain_pdus_cnt; + stats->r2t_pdus = conn->r2t_pdus_cnt; + stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt; + stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt; + stats->digest_err = 0; + stats->timeout_err = 0; + stats->custom_length = 1; + strcpy(stats->custom[0].desc, "eh_abort_cnt"); + stats->custom[0].value = conn->eh_abort_cnt; +} + +/** + * cxgb3i_parse_itt - get the idx and age bits from a given tag + * @conn: iscsi connection + * @itt: itt tag + * @idx: task index, filled in by this function + * @age: session age, filled in by this function + */ +static void cxgb3i_parse_itt(struct iscsi_conn *conn, itt_t itt, + int *idx, int *age) +{ + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct cxgb3i_conn *cconn = tcp_conn->dd_data; + struct cxgb3i_adapter *snic = cconn->hba->snic; + u32 tag = ntohl((__force u32) itt); + u32 sw_bits; + + sw_bits = cxgb3i_tag_nonrsvd_bits(&snic->tag_format, tag); + if (idx) + *idx = sw_bits & ((1 << cconn->task_idx_bits) - 1); + if (age) + *age = (sw_bits >> cconn->task_idx_bits) & ISCSI_AGE_MASK; + + cxgb3i_tag_debug("parse tag 0x%x/0x%x, sw 0x%x, itt 0x%x, age 0x%x.\n", + tag, itt, sw_bits, idx ? *idx : 0xFFFFF, + age ? *age : 0xFF); +} + +/** + * cxgb3i_reserve_itt - generate tag for a give task + * Try to set up ddp for a scsi read task. + * @task: iscsi task + * @hdr_itt: tag, filled in by this function + */ +int cxgb3i_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt) +{ + struct scsi_cmnd *sc = task->sc; + struct iscsi_conn *conn = task->conn; + struct iscsi_session *sess = conn->session; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct cxgb3i_conn *cconn = tcp_conn->dd_data; + struct cxgb3i_adapter *snic = cconn->hba->snic; + struct cxgb3i_tag_format *tformat = &snic->tag_format; + u32 sw_tag = (sess->age << cconn->task_idx_bits) | task->itt; + u32 tag; + int err = -EINVAL; + + if (sc && + (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE) && + cxgb3i_sw_tag_usable(tformat, sw_tag)) { + struct s3_conn *c3cn = cconn->cep->c3cn; + struct cxgb3i_gather_list *gl; + + gl = cxgb3i_ddp_make_gl(scsi_in(sc)->length, + scsi_in(sc)->table.sgl, + scsi_in(sc)->table.nents, + snic->pdev, + GFP_ATOMIC); + if (gl) { + tag = sw_tag; + err = cxgb3i_ddp_tag_reserve(snic->tdev, c3cn->tid, + tformat, &tag, + gl, GFP_ATOMIC); + if (err < 0) + cxgb3i_ddp_release_gl(gl, snic->pdev); + } + } + + if (err < 0) + tag = cxgb3i_set_non_ddp_tag(tformat, sw_tag); + /* the itt need to sent in big-endian order */ + *hdr_itt = (__force itt_t)htonl(tag); + + cxgb3i_tag_debug("new tag 0x%x/0x%x (itt 0x%x, age 0x%x).\n", + tag, *hdr_itt, task->itt, sess->age); + return 0; +} + +/** + * cxgb3i_release_itt - release the tag for a given task + * if the tag is a ddp tag, release the ddp setup + * @task: iscsi task + * @hdr_itt: tag + */ +void cxgb3i_release_itt(struct iscsi_task *task, itt_t hdr_itt) +{ + struct scsi_cmnd *sc = task->sc; + struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data; + struct cxgb3i_conn *cconn = tcp_conn->dd_data; + struct cxgb3i_adapter *snic = cconn->hba->snic; + struct cxgb3i_tag_format *tformat = &snic->tag_format; + u32 tag = ntohl((__force u32)hdr_itt); + + cxgb3i_tag_debug("release tag 0x%x.\n", tag); + + if (sc && + (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE) && + cxgb3i_is_ddp_tag(tformat, tag)) + cxgb3i_ddp_tag_release(snic->tdev, tag); +} + +/** + * cxgb3i_host_template -- Scsi_Host_Template structure + * used when registering with the scsi mid layer + */ +static struct scsi_host_template cxgb3i_host_template = { + .module = THIS_MODULE, + .name = "Chelsio S3xx iSCSI Initiator", + .proc_name = "cxgb3i", + .queuecommand = iscsi_queuecommand, + .change_queue_depth = iscsi_change_queue_depth, + .can_queue = 128 * (ISCSI_DEF_XMIT_CMDS_MAX - 1), + .sg_tablesize = SG_ALL, + .max_sectors = 0xFFFF, + .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN, + .eh_abort_handler = iscsi_eh_abort, + .eh_device_reset_handler = iscsi_eh_device_reset, + .eh_target_reset_handler = iscsi_eh_target_reset, + .use_clustering = DISABLE_CLUSTERING, + .this_id = -1, +}; + +static struct iscsi_transport cxgb3i_iscsi_transport = { + .owner = THIS_MODULE, + .name = "cxgb3i", + .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST + | CAP_DATADGST | CAP_DIGEST_OFFLOAD | + CAP_PADDING_OFFLOAD, + .param_mask = ISCSI_MAX_RECV_DLENGTH | + ISCSI_MAX_XMIT_DLENGTH | + ISCSI_HDRDGST_EN | + ISCSI_DATADGST_EN | + ISCSI_INITIAL_R2T_EN | + ISCSI_MAX_R2T | + ISCSI_IMM_DATA_EN | + ISCSI_FIRST_BURST | + ISCSI_MAX_BURST | + ISCSI_PDU_INORDER_EN | + ISCSI_DATASEQ_INORDER_EN | + ISCSI_ERL | + ISCSI_CONN_PORT | + ISCSI_CONN_ADDRESS | + ISCSI_EXP_STATSN | + ISCSI_PERSISTENT_PORT | + ISCSI_PERSISTENT_ADDRESS | + ISCSI_TARGET_NAME | ISCSI_TPGT | + ISCSI_USERNAME | ISCSI_PASSWORD | + ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN | + ISCSI_FAST_ABORT | ISCSI_ABORT_TMO | + ISCSI_LU_RESET_TMO | + ISCSI_PING_TMO | ISCSI_RECV_TMO | + ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME, + .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS | + ISCSI_HOST_INITIATOR_NAME | ISCSI_HOST_NETDEV_NAME, + .get_host_param = cxgb3i_host_get_param, + .set_host_param = cxgb3i_host_set_param, + /* session management */ + .create_session = cxgb3i_session_create, + .destroy_session = cxgb3i_session_destroy, + .get_session_param = iscsi_session_get_param, + /* connection management */ + .create_conn = cxgb3i_conn_create, + .bind_conn = cxgb3i_conn_bind, + .destroy_conn = iscsi_tcp_conn_teardown, + .start_conn = iscsi_conn_start, + .stop_conn = iscsi_conn_stop, + .get_conn_param = cxgb3i_conn_get_param, + .set_param = cxgb3i_conn_set_param, + .get_stats = cxgb3i_conn_get_stats, + /* pdu xmit req. from user space */ + .send_pdu = iscsi_conn_send_pdu, + /* task */ + .init_task = iscsi_tcp_task_init, + .xmit_task = iscsi_tcp_task_xmit, + .cleanup_task = cxgb3i_conn_cleanup_task, + + /* pdu */ + .alloc_pdu = cxgb3i_conn_alloc_pdu, + .init_pdu = cxgb3i_conn_init_pdu, + .xmit_pdu = cxgb3i_conn_xmit_pdu, + .parse_pdu_itt = cxgb3i_parse_itt, + + /* TCP connect/disconnect */ + .ep_connect = cxgb3i_ep_connect, + .ep_poll = cxgb3i_ep_poll, + .ep_disconnect = cxgb3i_ep_disconnect, + /* Error recovery timeout call */ + .session_recovery_timedout = iscsi_session_recovery_timedout, +}; + +int cxgb3i_iscsi_init(void) +{ + sw_tag_idx_bits = (__ilog2_u32(ISCSI_ITT_MASK)) + 1; + sw_tag_age_bits = (__ilog2_u32(ISCSI_AGE_MASK)) + 1; + cxgb3i_log_info("tag itt 0x%x, %u bits, age 0x%x, %u bits.\n", + ISCSI_ITT_MASK, sw_tag_idx_bits, + ISCSI_AGE_MASK, sw_tag_age_bits); + + cxgb3i_scsi_transport = + iscsi_register_transport(&cxgb3i_iscsi_transport); + if (!cxgb3i_scsi_transport) { + cxgb3i_log_error("Could not register cxgb3i transport.\n"); + return -ENODEV; + } + cxgb3i_api_debug("cxgb3i transport 0x%p.\n", cxgb3i_scsi_transport); + return 0; +} + +void cxgb3i_iscsi_cleanup(void) +{ + if (cxgb3i_scsi_transport) { + cxgb3i_api_debug("cxgb3i transport 0x%p.\n", + cxgb3i_scsi_transport); + iscsi_unregister_transport(&cxgb3i_iscsi_transport); + } +} diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c new file mode 100644 index 00000000000..5f16081b68d --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c @@ -0,0 +1,1810 @@ +/* + * cxgb3i_offload.c: Chelsio S3xx iscsi offloaded tcp connection management + * + * Copyright (C) 2003-2008 Chelsio Communications. All rights reserved. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this + * release for licensing terms and conditions. + * + * Written by: Dimitris Michailidis (dm@chelsio.com) + * Karen Xie (kxie@chelsio.com) + */ + +#include +#include + +#include "cxgb3_defs.h" +#include "cxgb3_ctl_defs.h" +#include "firmware_exports.h" +#include "cxgb3i_offload.h" +#include "cxgb3i_pdu.h" +#include "cxgb3i_ddp.h" + +#ifdef __DEBUG_C3CN_CONN__ +#define c3cn_conn_debug cxgb3i_log_info +#else +#define c3cn_conn_debug(fmt...) +#endif + +#ifdef __DEBUG_C3CN_TX__ +#define c3cn_tx_debug cxgb3i_log_debug +#else +#define c3cn_tx_debug(fmt...) +#endif + +#ifdef __DEBUG_C3CN_RX__ +#define c3cn_rx_debug cxgb3i_log_debug +#else +#define c3cn_rx_debug(fmt...) +#endif + +/* + * module parameters releated to offloaded iscsi connection + */ +static int cxgb3_rcv_win = 256 * 1024; +module_param(cxgb3_rcv_win, int, 0644); +MODULE_PARM_DESC(cxgb3_rcv_win, "TCP receive window in bytes (default=256KB)"); + +static int cxgb3_snd_win = 64 * 1024; +module_param(cxgb3_snd_win, int, 0644); +MODULE_PARM_DESC(cxgb3_snd_win, "TCP send window in bytes (default=64KB)"); + +static int cxgb3_rx_credit_thres = 10 * 1024; +module_param(cxgb3_rx_credit_thres, int, 0644); +MODULE_PARM_DESC(rx_credit_thres, + "RX credits return threshold in bytes (default=10KB)"); + +static unsigned int cxgb3_max_connect = 8 * 1024; +module_param(cxgb3_max_connect, uint, 0644); +MODULE_PARM_DESC(cxgb3_max_connect, "Max. # of connections (default=8092)"); + +static unsigned int cxgb3_sport_base = 20000; +module_param(cxgb3_sport_base, uint, 0644); +MODULE_PARM_DESC(cxgb3_sport_base, "starting port number (default=20000)"); + +/* + * cxgb3i tcp connection data(per adapter) list + */ +static LIST_HEAD(cdata_list); +static DEFINE_RWLOCK(cdata_rwlock); + +static int c3cn_push_tx_frames(struct s3_conn *c3cn, int req_completion); +static void c3cn_release_offload_resources(struct s3_conn *c3cn); + +/* + * iscsi source port management + * + * Find a free source port in the port allocation map. We use a very simple + * rotor scheme to look for the next free port. + * + * If a source port has been specified make sure that it doesn't collide with + * our normal source port allocation map. If it's outside the range of our + * allocation/deallocation scheme just let them use it. + * + * If the source port is outside our allocation range, the caller is + * responsible for keeping track of their port usage. + */ +static int c3cn_get_port(struct s3_conn *c3cn, struct cxgb3i_sdev_data *cdata) +{ + unsigned int start; + int idx; + + if (!cdata) + goto error_out; + + if (c3cn->saddr.sin_port != 0) { + idx = ntohs(c3cn->saddr.sin_port) - cxgb3_sport_base; + if (idx < 0 || idx >= cxgb3_max_connect) + return 0; + if (!test_and_set_bit(idx, cdata->sport_map)) + return -EADDRINUSE; + } + + /* the sport_map_next may not be accurate but that is okay, sport_map + should be */ + start = idx = cdata->sport_map_next; + do { + if (++idx >= cxgb3_max_connect) + idx = 0; + if (!(test_and_set_bit(idx, cdata->sport_map))) { + c3cn->saddr.sin_port = htons(cxgb3_sport_base + idx); + cdata->sport_map_next = idx; + c3cn_conn_debug("%s reserve port %u.\n", + cdata->cdev->name, + cxgb3_sport_base + idx); + return 0; + } + } while (idx != start); + +error_out: + return -EADDRNOTAVAIL; +} + +static void c3cn_put_port(struct s3_conn *c3cn) +{ + struct cxgb3i_sdev_data *cdata = CXGB3_SDEV_DATA(c3cn->cdev); + + if (c3cn->saddr.sin_port) { + int idx = ntohs(c3cn->saddr.sin_port) - cxgb3_sport_base; + + c3cn->saddr.sin_port = 0; + if (idx < 0 || idx >= cxgb3_max_connect) + return; + clear_bit(idx, cdata->sport_map); + c3cn_conn_debug("%s, release port %u.\n", + cdata->cdev->name, cxgb3_sport_base + idx); + } +} + +static inline void c3cn_set_flag(struct s3_conn *c3cn, enum c3cn_flags flag) +{ + __set_bit(flag, &c3cn->flags); + c3cn_conn_debug("c3cn 0x%p, set %d, s %u, f 0x%lx.\n", + c3cn, flag, c3cn->state, c3cn->flags); +} + +static inline void c3cn_clear_flag(struct s3_conn *c3cn, enum c3cn_flags flag) +{ + __clear_bit(flag, &c3cn->flags); + c3cn_conn_debug("c3cn 0x%p, clear %d, s %u, f 0x%lx.\n", + c3cn, flag, c3cn->state, c3cn->flags); +} + +static inline int c3cn_flag(struct s3_conn *c3cn, enum c3cn_flags flag) +{ + if (c3cn == NULL) + return 0; + return test_bit(flag, &c3cn->flags); +} + +static void c3cn_set_state(struct s3_conn *c3cn, int state) +{ + c3cn_conn_debug("c3cn 0x%p state -> %u.\n", c3cn, state); + c3cn->state = state; +} + +static inline void c3cn_hold(struct s3_conn *c3cn) +{ + atomic_inc(&c3cn->refcnt); +} + +static inline void c3cn_put(struct s3_conn *c3cn) +{ + if (atomic_dec_and_test(&c3cn->refcnt)) { + c3cn_conn_debug("free c3cn 0x%p, s %u, f 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + kfree(c3cn); + } +} + +static void c3cn_closed(struct s3_conn *c3cn) +{ + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + c3cn_put_port(c3cn); + c3cn_release_offload_resources(c3cn); + c3cn_set_state(c3cn, C3CN_STATE_CLOSED); + cxgb3i_conn_closing(c3cn); +} + +/* + * CPL (Chelsio Protocol Language) defines a message passing interface between + * the host driver and T3 asic. + * The section below implments CPLs that related to iscsi tcp connection + * open/close/abort and data send/receive. + */ + +/* + * CPL connection active open request: host -> + */ +static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu) +{ + int i = 0; + + while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) + ++i; + return i; +} + +static unsigned int select_mss(struct s3_conn *c3cn, unsigned int pmtu) +{ + unsigned int idx; + struct dst_entry *dst = c3cn->dst_cache; + struct t3cdev *cdev = c3cn->cdev; + const struct t3c_data *td = T3C_DATA(cdev); + u16 advmss = dst_metric(dst, RTAX_ADVMSS); + + if (advmss > pmtu - 40) + advmss = pmtu - 40; + if (advmss < td->mtus[0] - 40) + advmss = td->mtus[0] - 40; + idx = find_best_mtu(td, advmss + 40); + return idx; +} + +static inline int compute_wscale(int win) +{ + int wscale = 0; + while (wscale < 14 && (65535<mss_idx); +} + +static inline unsigned int calc_opt0l(struct s3_conn *c3cn) +{ + return V_ULP_MODE(ULP_MODE_ISCSI) | + V_RCV_BUFSIZ(cxgb3_rcv_win>>10); +} + +static void make_act_open_req(struct s3_conn *c3cn, struct sk_buff *skb, + unsigned int atid, const struct l2t_entry *e) +{ + struct cpl_act_open_req *req; + + c3cn_conn_debug("c3cn 0x%p, atid 0x%x.\n", c3cn, atid); + + skb->priority = CPL_PRIORITY_SETUP; + req = (struct cpl_act_open_req *)__skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); + req->local_port = c3cn->saddr.sin_port; + req->peer_port = c3cn->daddr.sin_port; + req->local_ip = c3cn->saddr.sin_addr.s_addr; + req->peer_ip = c3cn->daddr.sin_addr.s_addr; + req->opt0h = htonl(calc_opt0h(c3cn) | V_L2T_IDX(e->idx) | + V_TX_CHANNEL(e->smt_idx)); + req->opt0l = htonl(calc_opt0l(c3cn)); + req->params = 0; +} + +static void fail_act_open(struct s3_conn *c3cn, int errno) +{ + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + c3cn->err = errno; + c3cn_closed(c3cn); +} + +static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb) +{ + struct s3_conn *c3cn = (struct s3_conn *)skb->sk; + + c3cn_conn_debug("c3cn 0x%p, state %u.\n", c3cn, c3cn->state); + + c3cn_hold(c3cn); + spin_lock_bh(&c3cn->lock); + if (c3cn->state == C3CN_STATE_CONNECTING) + fail_act_open(c3cn, EHOSTUNREACH); + spin_unlock_bh(&c3cn->lock); + c3cn_put(c3cn); + __kfree_skb(skb); +} + +/* + * CPL connection close request: host -> + * + * Close a connection by sending a CPL_CLOSE_CON_REQ message and queue it to + * the write queue (i.e., after any unsent txt data). + */ +static void skb_entail(struct s3_conn *c3cn, struct sk_buff *skb, + int flags) +{ + CXGB3_SKB_CB(skb)->seq = c3cn->write_seq; + CXGB3_SKB_CB(skb)->flags = flags; + __skb_queue_tail(&c3cn->write_queue, skb); +} + +static void send_close_req(struct s3_conn *c3cn) +{ + struct sk_buff *skb = c3cn->cpl_close; + struct cpl_close_con_req *req = (struct cpl_close_con_req *)skb->head; + unsigned int tid = c3cn->tid; + + c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + c3cn->cpl_close = NULL; + + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); + req->wr.wr_lo = htonl(V_WR_TID(tid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); + req->rsvd = htonl(c3cn->write_seq); + + skb_entail(c3cn, skb, C3CB_FLAG_NO_APPEND); + if (c3cn->state != C3CN_STATE_CONNECTING) + c3cn_push_tx_frames(c3cn, 1); +} + +/* + * CPL connection abort request: host -> + * + * Send an ABORT_REQ message. Makes sure we do not send multiple ABORT_REQs + * for the same connection and also that we do not try to send a message + * after the connection has closed. + */ +static void abort_arp_failure(struct t3cdev *cdev, struct sk_buff *skb) +{ + struct cpl_abort_req *req = cplhdr(skb); + + c3cn_conn_debug("tdev 0x%p.\n", cdev); + + req->cmd = CPL_ABORT_NO_RST; + cxgb3_ofld_send(cdev, skb); +} + +static inline void c3cn_purge_write_queue(struct s3_conn *c3cn) +{ + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&c3cn->write_queue))) + __kfree_skb(skb); +} + +static void send_abort_req(struct s3_conn *c3cn) +{ + struct sk_buff *skb = c3cn->cpl_abort_req; + struct cpl_abort_req *req; + unsigned int tid = c3cn->tid; + + if (unlikely(c3cn->state == C3CN_STATE_ABORTING) || !skb || + !c3cn->cdev) + return; + + c3cn_set_state(c3cn, C3CN_STATE_ABORTING); + + c3cn_conn_debug("c3cn 0x%p, flag ABORT_RPL + ABORT_SHUT.\n", c3cn); + + c3cn_set_flag(c3cn, C3CN_ABORT_RPL_PENDING); + + /* Purge the send queue so we don't send anything after an abort. */ + c3cn_purge_write_queue(c3cn); + + c3cn->cpl_abort_req = NULL; + req = (struct cpl_abort_req *)skb->head; + + skb->priority = CPL_PRIORITY_DATA; + set_arp_failure_handler(skb, abort_arp_failure); + + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); + req->wr.wr_lo = htonl(V_WR_TID(tid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); + req->rsvd0 = htonl(c3cn->snd_nxt); + req->rsvd1 = !c3cn_flag(c3cn, C3CN_TX_DATA_SENT); + req->cmd = CPL_ABORT_SEND_RST; + + l2t_send(c3cn->cdev, skb, c3cn->l2t); +} + +/* + * CPL connection abort reply: host -> + * + * Send an ABORT_RPL message in response of the ABORT_REQ received. + */ +static void send_abort_rpl(struct s3_conn *c3cn, int rst_status) +{ + struct sk_buff *skb = c3cn->cpl_abort_rpl; + struct cpl_abort_rpl *rpl = (struct cpl_abort_rpl *)skb->head; + + c3cn->cpl_abort_rpl = NULL; + + skb->priority = CPL_PRIORITY_DATA; + rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); + rpl->wr.wr_lo = htonl(V_WR_TID(c3cn->tid)); + OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, c3cn->tid)); + rpl->cmd = rst_status; + + cxgb3_ofld_send(c3cn->cdev, skb); +} + +/* + * CPL connection rx data ack: host -> + * Send RX credits through an RX_DATA_ACK CPL message. Returns the number of + * credits sent. + */ +static u32 send_rx_credits(struct s3_conn *c3cn, u32 credits, u32 dack) +{ + struct sk_buff *skb; + struct cpl_rx_data_ack *req; + + skb = alloc_skb(sizeof(*req), GFP_ATOMIC); + if (!skb) + return 0; + + req = (struct cpl_rx_data_ack *)__skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, c3cn->tid)); + req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); + skb->priority = CPL_PRIORITY_ACK; + cxgb3_ofld_send(c3cn->cdev, skb); + return credits; +} + +/* + * CPL connection tx data: host -> + * + * Send iscsi PDU via TX_DATA CPL message. Returns the number of + * credits sent. + * Each TX_DATA consumes work request credit (wrs), so we need to keep track of + * how many we've used so far and how many are pending (i.e., yet ack'ed by T3). + */ + +/* + * For ULP connections HW may inserts digest bytes into the pdu. Those digest + * bytes are not sent by the host but are part of the TCP payload and therefore + * consume TCP sequence space. + */ +static const unsigned int cxgb3_ulp_extra_len[] = { 0, 4, 4, 8 }; +static inline unsigned int ulp_extra_len(const struct sk_buff *skb) +{ + return cxgb3_ulp_extra_len[skb_ulp_mode(skb) & 3]; +} + +static unsigned int wrlen __read_mostly; + +/* + * The number of WRs needed for an skb depends on the number of fragments + * in the skb and whether it has any payload in its main body. This maps the + * length of the gather list represented by an skb into the # of necessary WRs. + * + * The max. length of an skb is controlled by the max pdu size which is ~16K. + * Also, assume the min. fragment length is the sector size (512), then add + * extra fragment counts for iscsi bhs and payload padding. + */ +#define SKB_WR_LIST_SIZE (16384/512 + 3) +static unsigned int skb_wrs[SKB_WR_LIST_SIZE] __read_mostly; + +static void s3_init_wr_tab(unsigned int wr_len) +{ + int i; + + if (skb_wrs[1]) /* already initialized */ + return; + + for (i = 1; i < SKB_WR_LIST_SIZE; i++) { + int sgl_len = (3 * i) / 2 + (i & 1); + + sgl_len += 3; + skb_wrs[i] = (sgl_len <= wr_len + ? 1 : 1 + (sgl_len - 2) / (wr_len - 1)); + } + + wrlen = wr_len * 8; +} + +static inline void reset_wr_list(struct s3_conn *c3cn) +{ + c3cn->wr_pending_head = NULL; +} + +/* + * Add a WR to a connections's list of pending WRs. This is a singly-linked + * list of sk_buffs operating as a FIFO. The head is kept in wr_pending_head + * and the tail in wr_pending_tail. + */ +static inline void enqueue_wr(struct s3_conn *c3cn, + struct sk_buff *skb) +{ + skb->sp = NULL; + + /* + * We want to take an extra reference since both us and the driver + * need to free the packet before it's really freed. We know there's + * just one user currently so we use atomic_set rather than skb_get + * to avoid the atomic op. + */ + atomic_set(&skb->users, 2); + + if (!c3cn->wr_pending_head) + c3cn->wr_pending_head = skb; + else + c3cn->wr_pending_tail->sp = (void *)skb; + c3cn->wr_pending_tail = skb; +} + +static inline struct sk_buff *peek_wr(const struct s3_conn *c3cn) +{ + return c3cn->wr_pending_head; +} + +static inline void free_wr_skb(struct sk_buff *skb) +{ + kfree_skb(skb); +} + +static inline struct sk_buff *dequeue_wr(struct s3_conn *c3cn) +{ + struct sk_buff *skb = c3cn->wr_pending_head; + + if (likely(skb)) { + /* Don't bother clearing the tail */ + c3cn->wr_pending_head = (struct sk_buff *)skb->sp; + skb->sp = NULL; + } + return skb; +} + +static void purge_wr_queue(struct s3_conn *c3cn) +{ + struct sk_buff *skb; + while ((skb = dequeue_wr(c3cn)) != NULL) + free_wr_skb(skb); +} + +static inline void make_tx_data_wr(struct s3_conn *c3cn, struct sk_buff *skb, + int len) +{ + struct tx_data_wr *req; + + skb_reset_transport_header(skb); + req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); + req->wr_lo = htonl(V_WR_TID(c3cn->tid)); + req->sndseq = htonl(c3cn->snd_nxt); + /* len includes the length of any HW ULP additions */ + req->len = htonl(len); + req->param = htonl(V_TX_PORT(c3cn->l2t->smt_idx)); + /* V_TX_ULP_SUBMODE sets both the mode and submode */ + req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(skb)) | + V_TX_SHOVE((skb_peek(&c3cn->write_queue) ? 0 : 1))); + + if (!c3cn_flag(c3cn, C3CN_TX_DATA_SENT)) { + req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | + V_TX_CPU_IDX(c3cn->qset)); + /* Sendbuffer is in units of 32KB. */ + req->param |= htonl(V_TX_SNDBUF(cxgb3_snd_win >> 15)); + c3cn_set_flag(c3cn, C3CN_TX_DATA_SENT); + } +} + +/** + * c3cn_push_tx_frames -- start transmit + * @c3cn: the offloaded connection + * @req_completion: request wr_ack or not + * + * Prepends TX_DATA_WR or CPL_CLOSE_CON_REQ headers to buffers waiting in a + * connection's send queue and sends them on to T3. Must be called with the + * connection's lock held. Returns the amount of send buffer space that was + * freed as a result of sending queued data to T3. + */ +static void arp_failure_discard(struct t3cdev *cdev, struct sk_buff *skb) +{ + kfree_skb(skb); +} + +static int c3cn_push_tx_frames(struct s3_conn *c3cn, int req_completion) +{ + int total_size = 0; + struct sk_buff *skb; + struct t3cdev *cdev; + struct cxgb3i_sdev_data *cdata; + + if (unlikely(c3cn->state == C3CN_STATE_CONNECTING || + c3cn->state == C3CN_STATE_CLOSE_WAIT_1 || + c3cn->state == C3CN_STATE_ABORTING)) { + c3cn_tx_debug("c3cn 0x%p, in closing state %u.\n", + c3cn, c3cn->state); + return 0; + } + + cdev = c3cn->cdev; + cdata = CXGB3_SDEV_DATA(cdev); + + while (c3cn->wr_avail + && (skb = skb_peek(&c3cn->write_queue)) != NULL) { + int len = skb->len; /* length before skb_push */ + int frags = skb_shinfo(skb)->nr_frags + (len != skb->data_len); + int wrs_needed = skb_wrs[frags]; + + if (wrs_needed > 1 && len + sizeof(struct tx_data_wr) <= wrlen) + wrs_needed = 1; + + WARN_ON(frags >= SKB_WR_LIST_SIZE || wrs_needed < 1); + + if (c3cn->wr_avail < wrs_needed) { + c3cn_tx_debug("c3cn 0x%p, skb len %u/%u, frag %u, " + "wr %d < %u.\n", + c3cn, skb->len, skb->datalen, frags, + wrs_needed, c3cn->wr_avail); + break; + } + + __skb_unlink(skb, &c3cn->write_queue); + skb->priority = CPL_PRIORITY_DATA; + skb->csum = wrs_needed; /* remember this until the WR_ACK */ + c3cn->wr_avail -= wrs_needed; + c3cn->wr_unacked += wrs_needed; + enqueue_wr(c3cn, skb); + + if (likely(CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) { + len += ulp_extra_len(skb); + make_tx_data_wr(c3cn, skb, len); + c3cn->snd_nxt += len; + if ((req_completion + && c3cn->wr_unacked == wrs_needed) + || (CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_COMPL) + || c3cn->wr_unacked >= c3cn->wr_max / 2) { + struct work_request_hdr *wr = cplhdr(skb); + + wr->wr_hi |= htonl(F_WR_COMPL); + c3cn->wr_unacked = 0; + } + CXGB3_SKB_CB(skb)->flags &= ~C3CB_FLAG_NEED_HDR; + } + + total_size += skb->truesize; + set_arp_failure_handler(skb, arp_failure_discard); + l2t_send(cdev, skb, c3cn->l2t); + } + return total_size; +} + +/* + * process_cpl_msg: -> host + * Top-level CPL message processing used by most CPL messages that + * pertain to connections. + */ +static inline void process_cpl_msg(void (*fn)(struct s3_conn *, + struct sk_buff *), + struct s3_conn *c3cn, + struct sk_buff *skb) +{ + spin_lock_bh(&c3cn->lock); + fn(c3cn, skb); + spin_unlock_bh(&c3cn->lock); +} + +/* + * process_cpl_msg_ref: -> host + * Similar to process_cpl_msg() but takes an extra connection reference around + * the call to the handler. Should be used if the handler may drop a + * connection reference. + */ +static inline void process_cpl_msg_ref(void (*fn) (struct s3_conn *, + struct sk_buff *), + struct s3_conn *c3cn, + struct sk_buff *skb) +{ + c3cn_hold(c3cn); + process_cpl_msg(fn, c3cn, skb); + c3cn_put(c3cn); +} + +/* + * Process a CPL_ACT_ESTABLISH message: -> host + * Updates connection state from an active establish CPL message. Runs with + * the connection lock held. + */ + +static inline void s3_free_atid(struct t3cdev *cdev, unsigned int tid) +{ + struct s3_conn *c3cn = cxgb3_free_atid(cdev, tid); + if (c3cn) + c3cn_put(c3cn); +} + +static void c3cn_established(struct s3_conn *c3cn, u32 snd_isn, + unsigned int opt) +{ + c3cn_conn_debug("c3cn 0x%p, state %u.\n", c3cn, c3cn->state); + + c3cn->write_seq = c3cn->snd_nxt = c3cn->snd_una = snd_isn; + + /* + * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't + * pass through opt0. + */ + if (cxgb3_rcv_win > (M_RCV_BUFSIZ << 10)) + c3cn->rcv_wup -= cxgb3_rcv_win - (M_RCV_BUFSIZ << 10); + + dst_confirm(c3cn->dst_cache); + + smp_mb(); + + c3cn_set_state(c3cn, C3CN_STATE_ESTABLISHED); +} + +static void process_act_establish(struct s3_conn *c3cn, struct sk_buff *skb) +{ + struct cpl_act_establish *req = cplhdr(skb); + u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ + + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + if (unlikely(c3cn->state != C3CN_STATE_CONNECTING)) + cxgb3i_log_error("TID %u expected SYN_SENT, got EST., s %u\n", + c3cn->tid, c3cn->state); + + c3cn->copied_seq = c3cn->rcv_wup = c3cn->rcv_nxt = rcv_isn; + c3cn_established(c3cn, ntohl(req->snd_isn), ntohs(req->tcp_opt)); + + __kfree_skb(skb); + + if (unlikely(c3cn_flag(c3cn, C3CN_ACTIVE_CLOSE_NEEDED))) + /* upper layer has requested closing */ + send_abort_req(c3cn); + else if (c3cn_push_tx_frames(c3cn, 1)) + cxgb3i_conn_tx_open(c3cn); +} + +static int do_act_establish(struct t3cdev *cdev, struct sk_buff *skb, + void *ctx) +{ + struct cpl_act_establish *req = cplhdr(skb); + unsigned int tid = GET_TID(req); + unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); + struct s3_conn *c3cn = ctx; + struct cxgb3i_sdev_data *cdata = CXGB3_SDEV_DATA(cdev); + + c3cn_conn_debug("rcv, tid 0x%x, c3cn 0x%p, s %u, f 0x%lx.\n", + tid, c3cn, c3cn->state, c3cn->flags); + + c3cn->tid = tid; + c3cn_hold(c3cn); + cxgb3_insert_tid(cdata->cdev, cdata->client, c3cn, tid); + s3_free_atid(cdev, atid); + + c3cn->qset = G_QNUM(ntohl(skb->csum)); + + process_cpl_msg(process_act_establish, c3cn, skb); + return 0; +} + +/* + * Process a CPL_ACT_OPEN_RPL message: -> host + * Handle active open failures. + */ +static int act_open_rpl_status_to_errno(int status) +{ + switch (status) { + case CPL_ERR_CONN_RESET: + return ECONNREFUSED; + case CPL_ERR_ARP_MISS: + return EHOSTUNREACH; + case CPL_ERR_CONN_TIMEDOUT: + return ETIMEDOUT; + case CPL_ERR_TCAM_FULL: + return ENOMEM; + case CPL_ERR_CONN_EXIST: + cxgb3i_log_error("ACTIVE_OPEN_RPL: 4-tuple in use\n"); + return EADDRINUSE; + default: + return EIO; + } +} + +static void act_open_retry_timer(unsigned long data) +{ + struct sk_buff *skb; + struct s3_conn *c3cn = (struct s3_conn *)data; + + c3cn_conn_debug("c3cn 0x%p, state %u.\n", c3cn, c3cn->state); + + spin_lock_bh(&c3cn->lock); + skb = alloc_skb(sizeof(struct cpl_act_open_req), GFP_ATOMIC); + if (!skb) + fail_act_open(c3cn, ENOMEM); + else { + skb->sk = (struct sock *)c3cn; + set_arp_failure_handler(skb, act_open_req_arp_failure); + make_act_open_req(c3cn, skb, c3cn->tid, c3cn->l2t); + l2t_send(c3cn->cdev, skb, c3cn->l2t); + } + spin_unlock_bh(&c3cn->lock); + c3cn_put(c3cn); +} + +static void process_act_open_rpl(struct s3_conn *c3cn, struct sk_buff *skb) +{ + struct cpl_act_open_rpl *rpl = cplhdr(skb); + + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + if (rpl->status == CPL_ERR_CONN_EXIST && + c3cn->retry_timer.function != act_open_retry_timer) { + c3cn->retry_timer.function = act_open_retry_timer; + if (!mod_timer(&c3cn->retry_timer, jiffies + HZ / 2)) + c3cn_hold(c3cn); + } else + fail_act_open(c3cn, act_open_rpl_status_to_errno(rpl->status)); + __kfree_skb(skb); +} + +static int do_act_open_rpl(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) +{ + struct s3_conn *c3cn = ctx; + struct cpl_act_open_rpl *rpl = cplhdr(skb); + + c3cn_conn_debug("rcv, status 0x%x, c3cn 0x%p, s %u, f 0x%lx.\n", + rpl->status, c3cn, c3cn->state, c3cn->flags); + + if (rpl->status != CPL_ERR_TCAM_FULL && + rpl->status != CPL_ERR_CONN_EXIST && + rpl->status != CPL_ERR_ARP_MISS) + cxgb3_queue_tid_release(cdev, GET_TID(rpl)); + + process_cpl_msg_ref(process_act_open_rpl, c3cn, skb); + return 0; +} + +/* + * Process PEER_CLOSE CPL messages: -> host + * Handle peer FIN. + */ +static void process_peer_close(struct s3_conn *c3cn, struct sk_buff *skb) +{ + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) + goto out; + + switch (c3cn->state) { + case C3CN_STATE_ESTABLISHED: + c3cn_set_state(c3cn, C3CN_STATE_PASSIVE_CLOSE); + break; + case C3CN_STATE_ACTIVE_CLOSE: + c3cn_set_state(c3cn, C3CN_STATE_CLOSE_WAIT_2); + break; + case C3CN_STATE_CLOSE_WAIT_1: + c3cn_closed(c3cn); + break; + case C3CN_STATE_ABORTING: + break; + default: + cxgb3i_log_error("%s: peer close, TID %u in bad state %u\n", + c3cn->cdev->name, c3cn->tid, c3cn->state); + } + + cxgb3i_conn_closing(c3cn); +out: + __kfree_skb(skb); +} + +static int do_peer_close(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) +{ + struct s3_conn *c3cn = ctx; + + c3cn_conn_debug("rcv, c3cn 0x%p, s %u, f 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + process_cpl_msg_ref(process_peer_close, c3cn, skb); + return 0; +} + +/* + * Process CLOSE_CONN_RPL CPL message: -> host + * Process a peer ACK to our FIN. + */ +static void process_close_con_rpl(struct s3_conn *c3cn, struct sk_buff *skb) +{ + struct cpl_close_con_rpl *rpl = cplhdr(skb); + + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + c3cn->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ + + if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) + goto out; + + switch (c3cn->state) { + case C3CN_STATE_ACTIVE_CLOSE: + c3cn_set_state(c3cn, C3CN_STATE_CLOSE_WAIT_1); + break; + case C3CN_STATE_CLOSE_WAIT_1: + case C3CN_STATE_CLOSE_WAIT_2: + c3cn_closed(c3cn); + break; + case C3CN_STATE_ABORTING: + break; + default: + cxgb3i_log_error("%s: close_rpl, TID %u in bad state %u\n", + c3cn->cdev->name, c3cn->tid, c3cn->state); + } + +out: + kfree_skb(skb); +} + +static int do_close_con_rpl(struct t3cdev *cdev, struct sk_buff *skb, + void *ctx) +{ + struct s3_conn *c3cn = ctx; + + c3cn_conn_debug("rcv, c3cn 0x%p, s %u, f 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + process_cpl_msg_ref(process_close_con_rpl, c3cn, skb); + return 0; +} + +/* + * Process ABORT_REQ_RSS CPL message: -> host + * Process abort requests. If we are waiting for an ABORT_RPL we ignore this + * request except that we need to reply to it. + */ + +static int abort_status_to_errno(struct s3_conn *c3cn, int abort_reason, + int *need_rst) +{ + switch (abort_reason) { + case CPL_ERR_BAD_SYN: /* fall through */ + case CPL_ERR_CONN_RESET: + return c3cn->state > C3CN_STATE_ESTABLISHED ? + EPIPE : ECONNRESET; + case CPL_ERR_XMIT_TIMEDOUT: + case CPL_ERR_PERSIST_TIMEDOUT: + case CPL_ERR_FINWAIT2_TIMEDOUT: + case CPL_ERR_KEEPALIVE_TIMEDOUT: + return ETIMEDOUT; + default: + return EIO; + } +} + +static void process_abort_req(struct s3_conn *c3cn, struct sk_buff *skb) +{ + int rst_status = CPL_ABORT_NO_RST; + const struct cpl_abort_req_rss *req = cplhdr(skb); + + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + if (!c3cn_flag(c3cn, C3CN_ABORT_REQ_RCVD)) { + c3cn_set_flag(c3cn, C3CN_ABORT_REQ_RCVD); + c3cn_set_state(c3cn, C3CN_STATE_ABORTING); + __kfree_skb(skb); + return; + } + + c3cn_clear_flag(c3cn, C3CN_ABORT_REQ_RCVD); + send_abort_rpl(c3cn, rst_status); + + if (!c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) { + c3cn->err = + abort_status_to_errno(c3cn, req->status, &rst_status); + c3cn_closed(c3cn); + } +} + +static int do_abort_req(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) +{ + const struct cpl_abort_req_rss *req = cplhdr(skb); + struct s3_conn *c3cn = ctx; + + c3cn_conn_debug("rcv, c3cn 0x%p, s 0x%x, f 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + if (req->status == CPL_ERR_RTX_NEG_ADVICE || + req->status == CPL_ERR_PERSIST_NEG_ADVICE) { + __kfree_skb(skb); + return 0; + } + + process_cpl_msg_ref(process_abort_req, c3cn, skb); + return 0; +} + +/* + * Process ABORT_RPL_RSS CPL message: -> host + * Process abort replies. We only process these messages if we anticipate + * them as the coordination between SW and HW in this area is somewhat lacking + * and sometimes we get ABORT_RPLs after we are done with the connection that + * originated the ABORT_REQ. + */ +static void process_abort_rpl(struct s3_conn *c3cn, struct sk_buff *skb) +{ + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) { + if (!c3cn_flag(c3cn, C3CN_ABORT_RPL_RCVD)) + c3cn_set_flag(c3cn, C3CN_ABORT_RPL_RCVD); + else { + c3cn_clear_flag(c3cn, C3CN_ABORT_RPL_RCVD); + c3cn_clear_flag(c3cn, C3CN_ABORT_RPL_PENDING); + if (c3cn_flag(c3cn, C3CN_ABORT_REQ_RCVD)) + cxgb3i_log_error("%s tid %u, ABORT_RPL_RSS\n", + c3cn->cdev->name, c3cn->tid); + c3cn_closed(c3cn); + } + } + __kfree_skb(skb); +} + +static int do_abort_rpl(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) +{ + struct cpl_abort_rpl_rss *rpl = cplhdr(skb); + struct s3_conn *c3cn = ctx; + + c3cn_conn_debug("rcv, status 0x%x, c3cn 0x%p, s %u, 0x%lx.\n", + rpl->status, c3cn, c3cn ? c3cn->state : 0, + c3cn ? c3cn->flags : 0UL); + + /* + * Ignore replies to post-close aborts indicating that the abort was + * requested too late. These connections are terminated when we get + * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss + * arrives the TID is either no longer used or it has been recycled. + */ + if (rpl->status == CPL_ERR_ABORT_FAILED) + goto discard; + + /* + * Sometimes we've already closed the connection, e.g., a post-close + * abort races with ABORT_REQ_RSS, the latter frees the connection + * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, + * but FW turns the ABORT_REQ into a regular one and so we get + * ABORT_RPL_RSS with status 0 and no connection. + */ + if (!c3cn) + goto discard; + + process_cpl_msg_ref(process_abort_rpl, c3cn, skb); + return 0; + +discard: + __kfree_skb(skb); + return 0; +} + +/* + * Process RX_ISCSI_HDR CPL message: -> host + * Handle received PDUs, the payload could be DDP'ed. If not, the payload + * follow after the bhs. + */ +static void process_rx_iscsi_hdr(struct s3_conn *c3cn, struct sk_buff *skb) +{ + struct cpl_iscsi_hdr *hdr_cpl = cplhdr(skb); + struct cpl_iscsi_hdr_norss data_cpl; + struct cpl_rx_data_ddp_norss ddp_cpl; + unsigned int hdr_len, data_len, status; + unsigned int len; + int err; + + if (unlikely(c3cn->state >= C3CN_STATE_PASSIVE_CLOSE)) { + if (c3cn->state != C3CN_STATE_ABORTING) + send_abort_req(c3cn); + __kfree_skb(skb); + return; + } + + CXGB3_SKB_CB(skb)->seq = ntohl(hdr_cpl->seq); + CXGB3_SKB_CB(skb)->flags = 0; + + skb_reset_transport_header(skb); + __skb_pull(skb, sizeof(struct cpl_iscsi_hdr)); + + len = hdr_len = ntohs(hdr_cpl->len); + /* msg coalesce is off or not enough data received */ + if (skb->len <= hdr_len) { + cxgb3i_log_error("%s: TID %u, ISCSI_HDR, skb len %u < %u.\n", + c3cn->cdev->name, c3cn->tid, + skb->len, hdr_len); + goto abort_conn; + } + + err = skb_copy_bits(skb, skb->len - sizeof(ddp_cpl), &ddp_cpl, + sizeof(ddp_cpl)); + if (err < 0) + goto abort_conn; + + skb_ulp_mode(skb) = ULP2_FLAG_DATA_READY; + skb_ulp_pdulen(skb) = ntohs(ddp_cpl.len); + skb_ulp_ddigest(skb) = ntohl(ddp_cpl.ulp_crc); + status = ntohl(ddp_cpl.ddp_status); + + c3cn_rx_debug("rx skb 0x%p, len %u, pdulen %u, ddp status 0x%x.\n", + skb, skb->len, skb_ulp_pdulen(skb), status); + + if (status & (1 << RX_DDP_STATUS_HCRC_SHIFT)) + skb_ulp_mode(skb) |= ULP2_FLAG_HCRC_ERROR; + if (status & (1 << RX_DDP_STATUS_DCRC_SHIFT)) + skb_ulp_mode(skb) |= ULP2_FLAG_DCRC_ERROR; + if (status & (1 << RX_DDP_STATUS_PAD_SHIFT)) + skb_ulp_mode(skb) |= ULP2_FLAG_PAD_ERROR; + + if (skb->len > (hdr_len + sizeof(ddp_cpl))) { + err = skb_copy_bits(skb, hdr_len, &data_cpl, sizeof(data_cpl)); + if (err < 0) + goto abort_conn; + data_len = ntohs(data_cpl.len); + len += sizeof(data_cpl) + data_len; + } else if (status & (1 << RX_DDP_STATUS_DDP_SHIFT)) + skb_ulp_mode(skb) |= ULP2_FLAG_DATA_DDPED; + + c3cn->rcv_nxt = ntohl(ddp_cpl.seq) + skb_ulp_pdulen(skb); + __pskb_trim(skb, len); + __skb_queue_tail(&c3cn->receive_queue, skb); + cxgb3i_conn_pdu_ready(c3cn); + + return; + +abort_conn: + send_abort_req(c3cn); + __kfree_skb(skb); +} + +static int do_iscsi_hdr(struct t3cdev *t3dev, struct sk_buff *skb, void *ctx) +{ + struct s3_conn *c3cn = ctx; + + process_cpl_msg(process_rx_iscsi_hdr, c3cn, skb); + return 0; +} + +/* + * Process TX_DATA_ACK CPL messages: -> host + * Process an acknowledgment of WR completion. Advance snd_una and send the + * next batch of work requests from the write queue. + */ +static void process_wr_ack(struct s3_conn *c3cn, struct sk_buff *skb) +{ + struct cpl_wr_ack *hdr = cplhdr(skb); + unsigned int credits = ntohs(hdr->credits); + u32 snd_una = ntohl(hdr->snd_una); + + c3cn->wr_avail += credits; + if (c3cn->wr_unacked > c3cn->wr_max - c3cn->wr_avail) + c3cn->wr_unacked = c3cn->wr_max - c3cn->wr_avail; + + while (credits) { + struct sk_buff *p = peek_wr(c3cn); + + if (unlikely(!p)) { + cxgb3i_log_error("%u WR_ACK credits for TID %u with " + "nothing pending, state %u\n", + credits, c3cn->tid, c3cn->state); + break; + } + if (unlikely(credits < p->csum)) { + p->csum -= credits; + break; + } else { + dequeue_wr(c3cn); + credits -= p->csum; + free_wr_skb(p); + } + } + + if (unlikely(before(snd_una, c3cn->snd_una))) + goto out_free; + + if (c3cn->snd_una != snd_una) { + c3cn->snd_una = snd_una; + dst_confirm(c3cn->dst_cache); + } + + if (skb_queue_len(&c3cn->write_queue) && c3cn_push_tx_frames(c3cn, 0)) + cxgb3i_conn_tx_open(c3cn); +out_free: + __kfree_skb(skb); +} + +static int do_wr_ack(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) +{ + struct s3_conn *c3cn = ctx; + + process_cpl_msg(process_wr_ack, c3cn, skb); + return 0; +} + +/* + * for each connection, pre-allocate skbs needed for close/abort requests. So + * that we can service the request right away. + */ +static void c3cn_free_cpl_skbs(struct s3_conn *c3cn) +{ + if (c3cn->cpl_close) + kfree_skb(c3cn->cpl_close); + if (c3cn->cpl_abort_req) + kfree_skb(c3cn->cpl_abort_req); + if (c3cn->cpl_abort_rpl) + kfree_skb(c3cn->cpl_abort_rpl); +} + +static int c3cn_alloc_cpl_skbs(struct s3_conn *c3cn) +{ + c3cn->cpl_close = alloc_skb(sizeof(struct cpl_close_con_req), + GFP_KERNEL); + if (!c3cn->cpl_close) + return -ENOMEM; + skb_put(c3cn->cpl_close, sizeof(struct cpl_close_con_req)); + + c3cn->cpl_abort_req = alloc_skb(sizeof(struct cpl_abort_req), + GFP_KERNEL); + if (!c3cn->cpl_abort_req) + goto free_cpl_skbs; + skb_put(c3cn->cpl_abort_req, sizeof(struct cpl_abort_req)); + + c3cn->cpl_abort_rpl = alloc_skb(sizeof(struct cpl_abort_rpl), + GFP_KERNEL); + if (!c3cn->cpl_abort_rpl) + goto free_cpl_skbs; + skb_put(c3cn->cpl_abort_rpl, sizeof(struct cpl_abort_rpl)); + + return 0; + +free_cpl_skbs: + c3cn_free_cpl_skbs(c3cn); + return -ENOMEM; +} + +/** + * c3cn_release_offload_resources - release offload resource + * @c3cn: the offloaded iscsi tcp connection. + * Release resources held by an offload connection (TID, L2T entry, etc.) + */ +static void c3cn_release_offload_resources(struct s3_conn *c3cn) +{ + struct t3cdev *cdev = c3cn->cdev; + unsigned int tid = c3cn->tid; + + if (!cdev) + return; + + c3cn->qset = 0; + + c3cn_free_cpl_skbs(c3cn); + + if (c3cn->wr_avail != c3cn->wr_max) { + purge_wr_queue(c3cn); + reset_wr_list(c3cn); + } + + if (c3cn->l2t) { + l2t_release(L2DATA(cdev), c3cn->l2t); + c3cn->l2t = NULL; + } + + if (c3cn->state == C3CN_STATE_CONNECTING) /* we have ATID */ + s3_free_atid(cdev, tid); + else { /* we have TID */ + cxgb3_remove_tid(cdev, (void *)c3cn, tid); + c3cn_put(c3cn); + } + + c3cn->cdev = NULL; +} + +/** + * cxgb3i_c3cn_create - allocate and initialize an s3_conn structure + * returns the s3_conn structure allocated. + */ +struct s3_conn *cxgb3i_c3cn_create(void) +{ + struct s3_conn *c3cn; + + c3cn = kzalloc(sizeof(*c3cn), GFP_KERNEL); + if (!c3cn) + return NULL; + + /* pre-allocate close/abort cpl, so we don't need to wait for memory + when close/abort is requested. */ + if (c3cn_alloc_cpl_skbs(c3cn) < 0) + goto free_c3cn; + + c3cn_conn_debug("alloc c3cn 0x%p.\n", c3cn); + + c3cn->flags = 0; + spin_lock_init(&c3cn->lock); + atomic_set(&c3cn->refcnt, 1); + skb_queue_head_init(&c3cn->receive_queue); + skb_queue_head_init(&c3cn->write_queue); + setup_timer(&c3cn->retry_timer, NULL, (unsigned long)c3cn); + rwlock_init(&c3cn->callback_lock); + + return c3cn; + +free_c3cn: + kfree(c3cn); + return NULL; +} + +static void c3cn_active_close(struct s3_conn *c3cn) +{ + int data_lost; + int close_req = 0; + + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + dst_confirm(c3cn->dst_cache); + + c3cn_hold(c3cn); + spin_lock_bh(&c3cn->lock); + + data_lost = skb_queue_len(&c3cn->receive_queue); + __skb_queue_purge(&c3cn->receive_queue); + + switch (c3cn->state) { + case C3CN_STATE_CLOSED: + case C3CN_STATE_ACTIVE_CLOSE: + case C3CN_STATE_CLOSE_WAIT_1: + case C3CN_STATE_CLOSE_WAIT_2: + case C3CN_STATE_ABORTING: + /* nothing need to be done */ + break; + case C3CN_STATE_CONNECTING: + /* defer until cpl_act_open_rpl or cpl_act_establish */ + c3cn_set_flag(c3cn, C3CN_ACTIVE_CLOSE_NEEDED); + break; + case C3CN_STATE_ESTABLISHED: + close_req = 1; + c3cn_set_state(c3cn, C3CN_STATE_ACTIVE_CLOSE); + break; + case C3CN_STATE_PASSIVE_CLOSE: + close_req = 1; + c3cn_set_state(c3cn, C3CN_STATE_CLOSE_WAIT_2); + break; + } + + if (close_req) { + if (data_lost) + /* Unread data was tossed, zap the connection. */ + send_abort_req(c3cn); + else + send_close_req(c3cn); + } + + spin_unlock_bh(&c3cn->lock); + c3cn_put(c3cn); +} + +/** + * cxgb3i_c3cn_release - close and release an iscsi tcp connection and any + * resource held + * @c3cn: the iscsi tcp connection + */ +void cxgb3i_c3cn_release(struct s3_conn *c3cn) +{ + c3cn_conn_debug("c3cn 0x%p, s %u, f 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + if (likely(c3cn->state != C3CN_STATE_CONNECTING)) + c3cn_active_close(c3cn); + else + c3cn_set_flag(c3cn, C3CN_ACTIVE_CLOSE_NEEDED); + c3cn_put(c3cn); +} + +static int is_cxgb3_dev(struct net_device *dev) +{ + struct cxgb3i_sdev_data *cdata; + + write_lock(&cdata_rwlock); + list_for_each_entry(cdata, &cdata_list, list) { + struct adap_ports *ports = &cdata->ports; + int i; + + for (i = 0; i < ports->nports; i++) + if (dev == ports->lldevs[i]) { + write_unlock(&cdata_rwlock); + return 1; + } + } + write_unlock(&cdata_rwlock); + return 0; +} + +/** + * cxgb3_egress_dev - return the cxgb3 egress device + * @root_dev: the root device anchoring the search + * @c3cn: the connection used to determine egress port in bonding mode + * @context: in bonding mode, indicates a connection set up or failover + * + * Return egress device or NULL if the egress device isn't one of our ports. + */ +static struct net_device *cxgb3_egress_dev(struct net_device *root_dev, + struct s3_conn *c3cn, + int context) +{ + while (root_dev) { + if (root_dev->priv_flags & IFF_802_1Q_VLAN) + root_dev = vlan_dev_real_dev(root_dev); + else if (is_cxgb3_dev(root_dev)) + return root_dev; + else + return NULL; + } + return NULL; +} + +static struct rtable *find_route(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + struct rtable *rt; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip4_u = { + .daddr = daddr, + .saddr = saddr, + .tos = 0 } }, + .proto = IPPROTO_TCP, + .uli_u = { + .ports = { + .sport = sport, + .dport = dport } } }; + + if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0)) + return NULL; + return rt; +} + +/* + * Assign offload parameters to some connection fields. + */ +static void init_offload_conn(struct s3_conn *c3cn, + struct t3cdev *cdev, + struct dst_entry *dst) +{ + BUG_ON(c3cn->cdev != cdev); + c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs; + c3cn->wr_unacked = 0; + c3cn->mss_idx = select_mss(c3cn, dst_mtu(dst)); + + reset_wr_list(c3cn); +} + +static int initiate_act_open(struct s3_conn *c3cn, struct net_device *dev) +{ + struct cxgb3i_sdev_data *cdata = NDEV2CDATA(dev); + struct t3cdev *cdev = cdata->cdev; + struct dst_entry *dst = c3cn->dst_cache; + struct sk_buff *skb; + + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + /* + * Initialize connection data. Note that the flags and ULP mode are + * initialized higher up ... + */ + c3cn->dev = dev; + c3cn->cdev = cdev; + c3cn->tid = cxgb3_alloc_atid(cdev, cdata->client, c3cn); + if (c3cn->tid < 0) + goto out_err; + + c3cn->qset = 0; + c3cn->l2t = t3_l2t_get(cdev, dst->neighbour, dev); + if (!c3cn->l2t) + goto free_tid; + + skb = alloc_skb(sizeof(struct cpl_act_open_req), GFP_KERNEL); + if (!skb) + goto free_l2t; + + skb->sk = (struct sock *)c3cn; + set_arp_failure_handler(skb, act_open_req_arp_failure); + + c3cn_hold(c3cn); + + init_offload_conn(c3cn, cdev, dst); + c3cn->err = 0; + + make_act_open_req(c3cn, skb, c3cn->tid, c3cn->l2t); + l2t_send(cdev, skb, c3cn->l2t); + return 0; + +free_l2t: + l2t_release(L2DATA(cdev), c3cn->l2t); +free_tid: + s3_free_atid(cdev, c3cn->tid); + c3cn->tid = 0; +out_err: + return -1; +} + + +/** + * cxgb3i_c3cn_connect - initiates an iscsi tcp connection to a given address + * @c3cn: the iscsi tcp connection + * @usin: destination address + * + * return 0 if active open request is sent, < 0 otherwise. + */ +int cxgb3i_c3cn_connect(struct s3_conn *c3cn, struct sockaddr_in *usin) +{ + struct rtable *rt; + struct net_device *dev; + struct cxgb3i_sdev_data *cdata; + struct t3cdev *cdev; + __be32 sipv4; + int err; + + if (usin->sin_family != AF_INET) + return -EAFNOSUPPORT; + + c3cn->daddr.sin_port = usin->sin_port; + c3cn->daddr.sin_addr.s_addr = usin->sin_addr.s_addr; + + rt = find_route(c3cn->saddr.sin_addr.s_addr, + c3cn->daddr.sin_addr.s_addr, + c3cn->saddr.sin_port, + c3cn->daddr.sin_port); + if (rt == NULL) { + c3cn_conn_debug("NO route to 0x%x, port %u.\n", + c3cn->daddr.sin_addr.s_addr, + ntohs(c3cn->daddr.sin_port)); + return -ENETUNREACH; + } + + if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { + c3cn_conn_debug("multi-cast route to 0x%x, port %u.\n", + c3cn->daddr.sin_addr.s_addr, + ntohs(c3cn->daddr.sin_port)); + ip_rt_put(rt); + return -ENETUNREACH; + } + + if (!c3cn->saddr.sin_addr.s_addr) + c3cn->saddr.sin_addr.s_addr = rt->rt_src; + + /* now commit destination to connection */ + c3cn->dst_cache = &rt->u.dst; + + /* try to establish an offloaded connection */ + dev = cxgb3_egress_dev(c3cn->dst_cache->dev, c3cn, 0); + if (dev == NULL) { + c3cn_conn_debug("c3cn 0x%p, egress dev NULL.\n", c3cn); + return -ENETUNREACH; + } + cdata = NDEV2CDATA(dev); + cdev = cdata->cdev; + + /* get a source port if one hasn't been provided */ + err = c3cn_get_port(c3cn, cdata); + if (err) + return err; + + c3cn_conn_debug("c3cn 0x%p get port %u.\n", + c3cn, ntohs(c3cn->saddr.sin_port)); + + sipv4 = cxgb3i_get_private_ipv4addr(dev); + if (!sipv4) { + c3cn_conn_debug("c3cn 0x%p, iscsi ip not configured.\n", c3cn); + sipv4 = c3cn->saddr.sin_addr.s_addr; + cxgb3i_set_private_ipv4addr(dev, sipv4); + } else + c3cn->saddr.sin_addr.s_addr = sipv4; + + c3cn_conn_debug("c3cn 0x%p, %u.%u.%u.%u,%u-%u.%u.%u.%u,%u SYN_SENT.\n", + c3cn, NIPQUAD(c3cn->saddr.sin_addr.s_addr), + ntohs(c3cn->saddr.sin_port), + NIPQUAD(c3cn->daddr.sin_addr.s_addr), + ntohs(c3cn->daddr.sin_port)); + + c3cn_set_state(c3cn, C3CN_STATE_CONNECTING); + if (!initiate_act_open(c3cn, dev)) + return 0; + + /* + * If we get here, we don't have an offload connection so simply + * return a failure. + */ + err = -ENOTSUPP; + + /* + * This trashes the connection and releases the local port, + * if necessary. + */ + c3cn_conn_debug("c3cn 0x%p -> CLOSED.\n", c3cn); + c3cn_set_state(c3cn, C3CN_STATE_CLOSED); + ip_rt_put(rt); + c3cn_put_port(c3cn); + c3cn->daddr.sin_port = 0; + return err; +} + +/** + * cxgb3i_c3cn_rx_credits - ack received tcp data. + * @c3cn: iscsi tcp connection + * @copied: # of bytes processed + * + * Called after some received data has been read. It returns RX credits + * to the HW for the amount of data processed. + */ +void cxgb3i_c3cn_rx_credits(struct s3_conn *c3cn, int copied) +{ + struct t3cdev *cdev; + int must_send; + u32 credits, dack = 0; + + if (c3cn->state != C3CN_STATE_ESTABLISHED) + return; + + credits = c3cn->copied_seq - c3cn->rcv_wup; + if (unlikely(!credits)) + return; + + cdev = c3cn->cdev; + + if (unlikely(cxgb3_rx_credit_thres == 0)) + return; + + dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); + + /* + * For coalescing to work effectively ensure the receive window has + * at least 16KB left. + */ + must_send = credits + 16384 >= cxgb3_rcv_win; + + if (must_send || credits >= cxgb3_rx_credit_thres) + c3cn->rcv_wup += send_rx_credits(c3cn, credits, dack); +} + +/** + * cxgb3i_c3cn_send_pdus - send the skbs containing iscsi pdus + * @c3cn: iscsi tcp connection + * @skb: skb contains the iscsi pdu + * + * Add a list of skbs to a connection send queue. The skbs must comply with + * the max size limit of the device and have a headroom of at least + * TX_HEADER_LEN bytes. + * Return # of bytes queued. + */ +int cxgb3i_c3cn_send_pdus(struct s3_conn *c3cn, struct sk_buff *skb) +{ + struct sk_buff *next; + int err, copied = 0; + + spin_lock_bh(&c3cn->lock); + + if (c3cn->state != C3CN_STATE_ESTABLISHED) { + c3cn_tx_debug("c3cn 0x%p, not in est. state %u.\n", + c3cn, c3cn->state); + err = -EAGAIN; + goto out_err; + } + + err = -EPIPE; + if (c3cn->err) { + c3cn_tx_debug("c3cn 0x%p, err %d.\n", c3cn, c3cn->err); + goto out_err; + } + + while (skb) { + int frags = skb_shinfo(skb)->nr_frags + + (skb->len != skb->data_len); + + if (unlikely(skb_headroom(skb) < TX_HEADER_LEN)) { + c3cn_tx_debug("c3cn 0x%p, skb head.\n", c3cn); + err = -EINVAL; + goto out_err; + } + + if (frags >= SKB_WR_LIST_SIZE) { + cxgb3i_log_error("c3cn 0x%p, tx frags %d, len %u,%u.\n", + c3cn, skb_shinfo(skb)->nr_frags, + skb->len, skb->data_len); + err = -EINVAL; + goto out_err; + } + + next = skb->next; + skb->next = NULL; + skb_entail(c3cn, skb, C3CB_FLAG_NO_APPEND | C3CB_FLAG_NEED_HDR); + copied += skb->len; + c3cn->write_seq += skb->len + ulp_extra_len(skb); + skb = next; + } +done: + if (likely(skb_queue_len(&c3cn->write_queue))) + c3cn_push_tx_frames(c3cn, 1); + spin_unlock_bh(&c3cn->lock); + return copied; + +out_err: + if (copied == 0 && err == -EPIPE) + copied = c3cn->err ? c3cn->err : -EPIPE; + goto done; +} + +static void sdev_data_cleanup(struct cxgb3i_sdev_data *cdata) +{ + struct adap_ports *ports = &cdata->ports; + int i; + + for (i = 0; i < ports->nports; i++) + NDEV2CDATA(ports->lldevs[i]) = NULL; + cxgb3i_free_big_mem(cdata); +} + +void cxgb3i_sdev_cleanup(void) +{ + struct cxgb3i_sdev_data *cdata; + + write_lock(&cdata_rwlock); + list_for_each_entry(cdata, &cdata_list, list) { + list_del(&cdata->list); + sdev_data_cleanup(cdata); + } + write_unlock(&cdata_rwlock); +} + +int cxgb3i_sdev_init(cxgb3_cpl_handler_func *cpl_handlers) +{ + cpl_handlers[CPL_ACT_ESTABLISH] = do_act_establish; + cpl_handlers[CPL_ACT_OPEN_RPL] = do_act_open_rpl; + cpl_handlers[CPL_PEER_CLOSE] = do_peer_close; + cpl_handlers[CPL_ABORT_REQ_RSS] = do_abort_req; + cpl_handlers[CPL_ABORT_RPL_RSS] = do_abort_rpl; + cpl_handlers[CPL_CLOSE_CON_RPL] = do_close_con_rpl; + cpl_handlers[CPL_TX_DMA_ACK] = do_wr_ack; + cpl_handlers[CPL_ISCSI_HDR] = do_iscsi_hdr; + + if (cxgb3_max_connect > CXGB3I_MAX_CONN) + cxgb3_max_connect = CXGB3I_MAX_CONN; + return 0; +} + +/** + * cxgb3i_sdev_add - allocate and initialize resources for each adapter found + * @cdev: t3cdev adapter + * @client: cxgb3 driver client + */ +void cxgb3i_sdev_add(struct t3cdev *cdev, struct cxgb3_client *client) +{ + struct cxgb3i_sdev_data *cdata; + struct ofld_page_info rx_page_info; + unsigned int wr_len; + int mapsize = DIV_ROUND_UP(cxgb3_max_connect, + 8 * sizeof(unsigned long)); + int i; + + cdata = cxgb3i_alloc_big_mem(sizeof(*cdata) + mapsize, GFP_KERNEL); + if (!cdata) + return; + + if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0 || + cdev->ctl(cdev, GET_PORTS, &cdata->ports) < 0 || + cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info) < 0) + goto free_cdata; + + s3_init_wr_tab(wr_len); + + INIT_LIST_HEAD(&cdata->list); + cdata->cdev = cdev; + cdata->client = client; + + for (i = 0; i < cdata->ports.nports; i++) + NDEV2CDATA(cdata->ports.lldevs[i]) = cdata; + + write_lock(&cdata_rwlock); + list_add_tail(&cdata->list, &cdata_list); + write_unlock(&cdata_rwlock); + + return; + +free_cdata: + cxgb3i_free_big_mem(cdata); +} + +/** + * cxgb3i_sdev_remove - free the allocated resources for the adapter + * @cdev: t3cdev adapter + */ +void cxgb3i_sdev_remove(struct t3cdev *cdev) +{ + struct cxgb3i_sdev_data *cdata = CXGB3_SDEV_DATA(cdev); + + write_lock(&cdata_rwlock); + list_del(&cdata->list); + write_unlock(&cdata_rwlock); + + sdev_data_cleanup(cdata); +} diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.h b/drivers/scsi/cxgb3i/cxgb3i_offload.h new file mode 100644 index 00000000000..5b93d629e5c --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i_offload.h @@ -0,0 +1,231 @@ +/* + * cxgb3i_offload.h: Chelsio S3xx iscsi offloaded tcp connection management + * + * Copyright (C) 2003-2008 Chelsio Communications. All rights reserved. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this + * release for licensing terms and conditions. + * + * Written by: Dimitris Michailidis (dm@chelsio.com) + * Karen Xie (kxie@chelsio.com) + */ + +#ifndef _CXGB3I_OFFLOAD_H +#define _CXGB3I_OFFLOAD_H + +#include +#include + +#include "common.h" +#include "adapter.h" +#include "t3cdev.h" +#include "cxgb3_offload.h" + +#define cxgb3i_log_error(fmt...) printk(KERN_ERR "cxgb3i: ERR! " fmt) +#define cxgb3i_log_warn(fmt...) printk(KERN_WARNING "cxgb3i: WARN! " fmt) +#define cxgb3i_log_info(fmt...) printk(KERN_INFO "cxgb3i: " fmt) +#define cxgb3i_log_debug(fmt, args...) \ + printk(KERN_INFO "cxgb3i: %s - " fmt, __func__ , ## args) + +/** + * struct s3_conn - an iscsi tcp connection structure + * + * @dev: net device of with connection + * @cdev: adapter t3cdev for net device + * @flags: see c3cn_flags below + * @tid: connection id assigned by the h/w + * @qset: queue set used by connection + * @mss_idx: Maximum Segment Size table index + * @l2t: ARP resolution entry for offload packets + * @wr_max: maximum in-flight writes + * @wr_avail: number of writes available + * @wr_unacked: writes since last request for completion notification + * @wr_pending_head: head of pending write queue + * @wr_pending_tail: tail of pending write queue + * @cpl_close: skb for cpl_close_req + * @cpl_abort_req: skb for cpl_abort_req + * @cpl_abort_rpl: skb for cpl_abort_rpl + * @lock: connection status lock + * @refcnt: reference count on connection + * @state: connection state + * @saddr: source ip/port address + * @daddr: destination ip/port address + * @dst_cache: reference to destination route + * @receive_queue: received PDUs + * @write_queue: un-pushed pending writes + * @retry_timer: retry timer for various operations + * @err: connection error status + * @callback_lock: lock for opaque user context + * @user_data: opaque user context + * @rcv_nxt: next receive seq. # + * @copied_seq: head of yet unread data + * @rcv_wup: rcv_nxt on last window update sent + * @snd_nxt: next sequence we send + * @snd_una: first byte we want an ack for + * @write_seq: tail+1 of data held in send buffer + */ +struct s3_conn { + struct net_device *dev; + struct t3cdev *cdev; + unsigned long flags; + int tid; + int qset; + int mss_idx; + struct l2t_entry *l2t; + int wr_max; + int wr_avail; + int wr_unacked; + struct sk_buff *wr_pending_head; + struct sk_buff *wr_pending_tail; + struct sk_buff *cpl_close; + struct sk_buff *cpl_abort_req; + struct sk_buff *cpl_abort_rpl; + spinlock_t lock; + atomic_t refcnt; + volatile unsigned int state; + struct sockaddr_in saddr; + struct sockaddr_in daddr; + struct dst_entry *dst_cache; + struct sk_buff_head receive_queue; + struct sk_buff_head write_queue; + struct timer_list retry_timer; + int err; + rwlock_t callback_lock; + void *user_data; + + u32 rcv_nxt; + u32 copied_seq; + u32 rcv_wup; + u32 snd_nxt; + u32 snd_una; + u32 write_seq; +}; + +/* + * connection state + */ +enum conn_states { + C3CN_STATE_CONNECTING = 1, + C3CN_STATE_ESTABLISHED, + C3CN_STATE_ACTIVE_CLOSE, + C3CN_STATE_PASSIVE_CLOSE, + C3CN_STATE_CLOSE_WAIT_1, + C3CN_STATE_CLOSE_WAIT_2, + C3CN_STATE_ABORTING, + C3CN_STATE_CLOSED, +}; + +static inline unsigned int c3cn_is_closing(const struct s3_conn *c3cn) +{ + return c3cn->state >= C3CN_STATE_ACTIVE_CLOSE; +} +static inline unsigned int c3cn_is_established(const struct s3_conn *c3cn) +{ + return c3cn->state == C3CN_STATE_ESTABLISHED; +} + +/* + * Connection flags -- many to track some close related events. + */ +enum c3cn_flags { + C3CN_ABORT_RPL_RCVD, /* received one ABORT_RPL_RSS message */ + C3CN_ABORT_REQ_RCVD, /* received one ABORT_REQ_RSS message */ + C3CN_ABORT_RPL_PENDING, /* expecting an abort reply */ + C3CN_TX_DATA_SENT, /* already sent a TX_DATA WR */ + C3CN_ACTIVE_CLOSE_NEEDED, /* need to be closed */ +}; + +/** + * cxgb3i_sdev_data - Per adapter data. + * Linked off of each Ethernet device port on the adapter. + * Also available via the t3cdev structure since we have pointers to our port + * net_device's there ... + * + * @list: list head to link elements + * @cdev: t3cdev adapter + * @client: CPL client pointer + * @ports: array of adapter ports + * @sport_map_next: next index into the port map + * @sport_map: source port map + */ +struct cxgb3i_sdev_data { + struct list_head list; + struct t3cdev *cdev; + struct cxgb3_client *client; + struct adap_ports ports; + unsigned int sport_map_next; + unsigned long sport_map[0]; +}; +#define NDEV2CDATA(ndev) (*(struct cxgb3i_sdev_data **)&(ndev)->ec_ptr) +#define CXGB3_SDEV_DATA(cdev) NDEV2CDATA((cdev)->lldev) + +void cxgb3i_sdev_cleanup(void); +int cxgb3i_sdev_init(cxgb3_cpl_handler_func *); +void cxgb3i_sdev_add(struct t3cdev *, struct cxgb3_client *); +void cxgb3i_sdev_remove(struct t3cdev *); + +struct s3_conn *cxgb3i_c3cn_create(void); +int cxgb3i_c3cn_connect(struct s3_conn *, struct sockaddr_in *); +void cxgb3i_c3cn_rx_credits(struct s3_conn *, int); +int cxgb3i_c3cn_send_pdus(struct s3_conn *, struct sk_buff *); +void cxgb3i_c3cn_release(struct s3_conn *); + +/** + * cxgb3_skb_cb - control block for received pdu state and ULP mode management. + * + * @flag: see C3CB_FLAG_* below + * @ulp_mode: ULP mode/submode of sk_buff + * @seq: tcp sequence number + * @ddigest: pdu data digest + * @pdulen: recovered pdu length + * @ulp_data: scratch area for ULP + */ +struct cxgb3_skb_cb { + __u8 flags; + __u8 ulp_mode; + __u32 seq; + __u32 ddigest; + __u32 pdulen; + __u8 ulp_data[16]; +}; + +#define CXGB3_SKB_CB(skb) ((struct cxgb3_skb_cb *)&((skb)->cb[0])) + +#define skb_ulp_mode(skb) (CXGB3_SKB_CB(skb)->ulp_mode) +#define skb_ulp_ddigest(skb) (CXGB3_SKB_CB(skb)->ddigest) +#define skb_ulp_pdulen(skb) (CXGB3_SKB_CB(skb)->pdulen) +#define skb_ulp_data(skb) (CXGB3_SKB_CB(skb)->ulp_data) + +enum c3cb_flags { + C3CB_FLAG_NEED_HDR = 1 << 0, /* packet needs a TX_DATA_WR header */ + C3CB_FLAG_NO_APPEND = 1 << 1, /* don't grow this skb */ + C3CB_FLAG_COMPL = 1 << 2, /* request WR completion */ +}; + +/** + * sge_opaque_hdr - + * Opaque version of structure the SGE stores at skb->head of TX_DATA packets + * and for which we must reserve space. + */ +struct sge_opaque_hdr { + void *dev; + dma_addr_t addr[MAX_SKB_FRAGS + 1]; +}; + +/* for TX: a skb must have a headroom of at least TX_HEADER_LEN bytes */ +#define TX_HEADER_LEN \ + (sizeof(struct tx_data_wr) + sizeof(struct sge_opaque_hdr)) + +/* + * get and set private ip for iscsi traffic + */ +#define cxgb3i_get_private_ipv4addr(ndev) \ + (((struct port_info *)(netdev_priv(ndev)))->iscsi_ipv4addr) +#define cxgb3i_set_private_ipv4addr(ndev, addr) \ + (((struct port_info *)(netdev_priv(ndev)))->iscsi_ipv4addr) = addr + +/* max. connections per adapter */ +#define CXGB3I_MAX_CONN 16384 +#endif /* _CXGB3_OFFLOAD_H */ diff --git a/drivers/scsi/cxgb3i/cxgb3i_pdu.c b/drivers/scsi/cxgb3i/cxgb3i_pdu.c new file mode 100644 index 00000000000..ce7ce8c6094 --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i_pdu.c @@ -0,0 +1,402 @@ +/* + * cxgb3i_pdu.c: Chelsio S3xx iSCSI driver. + * + * Copyright (c) 2008 Chelsio Communications, Inc. + * Copyright (c) 2008 Mike Christie + * Copyright (c) 2008 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written by: Karen Xie (kxie@chelsio.com) + */ + +#include +#include +#include +#include + +#include "cxgb3i.h" +#include "cxgb3i_pdu.h" + +#ifdef __DEBUG_CXGB3I_RX__ +#define cxgb3i_rx_debug cxgb3i_log_debug +#else +#define cxgb3i_rx_debug(fmt...) +#endif + +#ifdef __DEBUG_CXGB3I_TX__ +#define cxgb3i_tx_debug cxgb3i_log_debug +#else +#define cxgb3i_tx_debug(fmt...) +#endif + +static struct page *pad_page; + +/* + * pdu receive, interact with libiscsi_tcp + */ +static inline int read_pdu_skb(struct iscsi_conn *conn, struct sk_buff *skb, + unsigned int offset, int offloaded) +{ + int status = 0; + int bytes_read; + + bytes_read = iscsi_tcp_recv_skb(conn, skb, offset, offloaded, &status); + switch (status) { + case ISCSI_TCP_CONN_ERR: + return -EIO; + case ISCSI_TCP_SUSPENDED: + /* no transfer - just have caller flush queue */ + return bytes_read; + case ISCSI_TCP_SKB_DONE: + /* + * pdus should always fit in the skb and we should get + * segment done notifcation. + */ + iscsi_conn_printk(KERN_ERR, conn, "Invalid pdu or skb."); + return -EFAULT; + case ISCSI_TCP_SEGMENT_DONE: + return bytes_read; + default: + iscsi_conn_printk(KERN_ERR, conn, "Invalid iscsi_tcp_recv_skb " + "status %d\n", status); + return -EINVAL; + } +} + +static int cxgb3i_conn_read_pdu_skb(struct iscsi_conn *conn, + struct sk_buff *skb) +{ + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + bool offloaded = 0; + unsigned int offset; + int rc; + + cxgb3i_rx_debug("conn 0x%p, skb 0x%p, len %u, flag 0x%x.\n", + conn, skb, skb->len, skb_ulp_mode(skb)); + + if (!iscsi_tcp_recv_segment_is_hdr(tcp_conn)) { + iscsi_conn_failure(conn, ISCSI_ERR_PROTO); + return -EIO; + } + + if (conn->hdrdgst_en && (skb_ulp_mode(skb) & ULP2_FLAG_HCRC_ERROR)) { + iscsi_conn_failure(conn, ISCSI_ERR_HDR_DGST); + return -EIO; + } + + if (conn->datadgst_en && (skb_ulp_mode(skb) & ULP2_FLAG_DCRC_ERROR)) { + iscsi_conn_failure(conn, ISCSI_ERR_DATA_DGST); + return -EIO; + } + + /* iscsi hdr */ + rc = read_pdu_skb(conn, skb, 0, 0); + if (rc <= 0) + return rc; + + if (iscsi_tcp_recv_segment_is_hdr(tcp_conn)) + return 0; + + offset = rc; + if (conn->hdrdgst_en) + offset += ISCSI_DIGEST_SIZE; + + /* iscsi data */ + if (skb_ulp_mode(skb) & ULP2_FLAG_DATA_DDPED) { + cxgb3i_rx_debug("skb 0x%p, opcode 0x%x, data %u, ddp'ed, " + "itt 0x%x.\n", + skb, + tcp_conn->in.hdr->opcode & ISCSI_OPCODE_MASK, + tcp_conn->in.datalen, + ntohl(tcp_conn->in.hdr->itt)); + offloaded = 1; + } else { + cxgb3i_rx_debug("skb 0x%p, opcode 0x%x, data %u, NOT ddp'ed, " + "itt 0x%x.\n", + skb, + tcp_conn->in.hdr->opcode & ISCSI_OPCODE_MASK, + tcp_conn->in.datalen, + ntohl(tcp_conn->in.hdr->itt)); + offset += sizeof(struct cpl_iscsi_hdr_norss); + } + + rc = read_pdu_skb(conn, skb, offset, offloaded); + if (rc < 0) + return rc; + else + return 0; +} + +/* + * pdu transmit, interact with libiscsi_tcp + */ +static inline void tx_skb_setmode(struct sk_buff *skb, int hcrc, int dcrc) +{ + u8 submode = 0; + + if (hcrc) + submode |= 1; + if (dcrc) + submode |= 2; + skb_ulp_mode(skb) = (ULP_MODE_ISCSI << 4) | submode; +} + +void cxgb3i_conn_cleanup_task(struct iscsi_task *task) +{ + struct iscsi_tcp_task *tcp_task = task->dd_data; + + /* never reached the xmit task callout */ + if (tcp_task->dd_data) + kfree_skb(tcp_task->dd_data); + tcp_task->dd_data = NULL; + + /* MNC - Do we need a check in case this is called but + * cxgb3i_conn_alloc_pdu has never been called on the task */ + cxgb3i_release_itt(task, task->hdr_itt); + iscsi_tcp_cleanup_task(task); +} + +/* + * We do not support ahs yet + */ +int cxgb3i_conn_alloc_pdu(struct iscsi_task *task, u8 opcode) +{ + struct iscsi_tcp_task *tcp_task = task->dd_data; + struct sk_buff *skb; + + task->hdr = NULL; + /* always allocate rooms for AHS */ + skb = alloc_skb(sizeof(struct iscsi_hdr) + ISCSI_MAX_AHS_SIZE + + TX_HEADER_LEN, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + cxgb3i_tx_debug("task 0x%p, opcode 0x%x, skb 0x%p.\n", + task, opcode, skb); + + tcp_task->dd_data = skb; + skb_reserve(skb, TX_HEADER_LEN); + task->hdr = (struct iscsi_hdr *)skb->data; + task->hdr_max = sizeof(struct iscsi_hdr); + + /* data_out uses scsi_cmd's itt */ + if (opcode != ISCSI_OP_SCSI_DATA_OUT) + cxgb3i_reserve_itt(task, &task->hdr->itt); + + return 0; +} + +int cxgb3i_conn_init_pdu(struct iscsi_task *task, unsigned int offset, + unsigned int count) +{ + struct iscsi_tcp_task *tcp_task = task->dd_data; + struct sk_buff *skb = tcp_task->dd_data; + struct iscsi_conn *conn = task->conn; + struct page *pg; + unsigned int datalen = count; + int i, padlen = iscsi_padding(count); + skb_frag_t *frag; + + cxgb3i_tx_debug("task 0x%p,0x%p, offset %u, count %u, skb 0x%p.\n", + task, task->sc, offset, count, skb); + + skb_put(skb, task->hdr_len); + tx_skb_setmode(skb, conn->hdrdgst_en, datalen ? conn->datadgst_en : 0); + if (!count) + return 0; + + if (task->sc) { + struct scatterlist *sg; + struct scsi_data_buffer *sdb; + unsigned int sgoffset = offset; + struct page *sgpg; + unsigned int sglen; + + sdb = scsi_out(task->sc); + sg = sdb->table.sgl; + + for_each_sg(sdb->table.sgl, sg, sdb->table.nents, i) { + cxgb3i_tx_debug("sg %d, page 0x%p, len %u offset %u\n", + i, sg_page(sg), sg->length, sg->offset); + + if (sgoffset < sg->length) + break; + sgoffset -= sg->length; + } + sgpg = sg_page(sg); + sglen = sg->length - sgoffset; + + do { + int j = skb_shinfo(skb)->nr_frags; + unsigned int copy; + + if (!sglen) { + sg = sg_next(sg); + sgpg = sg_page(sg); + sgoffset = 0; + sglen = sg->length; + ++i; + } + copy = min(sglen, datalen); + if (j && skb_can_coalesce(skb, j, sgpg, + sg->offset + sgoffset)) { + skb_shinfo(skb)->frags[j - 1].size += copy; + } else { + get_page(sgpg); + skb_fill_page_desc(skb, j, sgpg, + sg->offset + sgoffset, copy); + } + sgoffset += copy; + sglen -= copy; + datalen -= copy; + } while (datalen); + } else { + pg = virt_to_page(task->data); + + while (datalen) { + i = skb_shinfo(skb)->nr_frags; + frag = &skb_shinfo(skb)->frags[i]; + + get_page(pg); + frag->page = pg; + frag->page_offset = 0; + frag->size = min((unsigned int)PAGE_SIZE, datalen); + + skb_shinfo(skb)->nr_frags++; + datalen -= frag->size; + pg++; + } + } + + if (padlen) { + i = skb_shinfo(skb)->nr_frags; + frag = &skb_shinfo(skb)->frags[i]; + frag->page = pad_page; + frag->page_offset = 0; + frag->size = padlen; + skb_shinfo(skb)->nr_frags++; + } + + datalen = count + padlen; + skb->data_len += datalen; + skb->truesize += datalen; + skb->len += datalen; + return 0; +} + +int cxgb3i_conn_xmit_pdu(struct iscsi_task *task) +{ + struct iscsi_tcp_task *tcp_task = task->dd_data; + struct sk_buff *skb = tcp_task->dd_data; + struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data; + struct cxgb3i_conn *cconn = tcp_conn->dd_data; + unsigned int datalen; + int err; + + if (!skb) + return 0; + + datalen = skb->data_len; + tcp_task->dd_data = NULL; + err = cxgb3i_c3cn_send_pdus(cconn->cep->c3cn, skb); + cxgb3i_tx_debug("task 0x%p, skb 0x%p, len %u/%u, rv %d.\n", + task, skb, skb->len, skb->data_len, err); + if (err > 0) { + int pdulen = err; + + if (task->conn->hdrdgst_en) + pdulen += ISCSI_DIGEST_SIZE; + if (datalen && task->conn->datadgst_en) + pdulen += ISCSI_DIGEST_SIZE; + + task->conn->txdata_octets += pdulen; + return 0; + } + + if (err < 0 && err != -EAGAIN) { + kfree_skb(skb); + cxgb3i_tx_debug("itt 0x%x, skb 0x%p, len %u/%u, xmit err %d.\n", + task->itt, skb, skb->len, skb->data_len, err); + iscsi_conn_printk(KERN_ERR, task->conn, "xmit err %d.\n", err); + iscsi_conn_failure(task->conn, ISCSI_ERR_XMIT_FAILED); + return err; + } + /* reset skb to send when we are called again */ + tcp_task->dd_data = skb; + return -EAGAIN; +} + +int cxgb3i_pdu_init(void) +{ + pad_page = alloc_page(GFP_KERNEL); + if (!pad_page) + return -ENOMEM; + memset(page_address(pad_page), 0, PAGE_SIZE); + return 0; +} + +void cxgb3i_pdu_cleanup(void) +{ + if (pad_page) { + __free_page(pad_page); + pad_page = NULL; + } +} + +void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn) +{ + struct sk_buff *skb; + unsigned int read = 0; + struct iscsi_conn *conn = c3cn->user_data; + int err = 0; + + cxgb3i_rx_debug("cn 0x%p.\n", c3cn); + + read_lock(&c3cn->callback_lock); + if (unlikely(!conn || conn->suspend_rx)) { + cxgb3i_rx_debug("conn 0x%p, id %d, suspend_rx %lu!\n", + conn, conn ? conn->id : 0xFF, + conn ? conn->suspend_rx : 0xFF); + read_unlock(&c3cn->callback_lock); + return; + } + skb = skb_peek(&c3cn->receive_queue); + while (!err && skb) { + __skb_unlink(skb, &c3cn->receive_queue); + read += skb_ulp_pdulen(skb); + err = cxgb3i_conn_read_pdu_skb(conn, skb); + __kfree_skb(skb); + skb = skb_peek(&c3cn->receive_queue); + } + read_unlock(&c3cn->callback_lock); + if (c3cn) { + c3cn->copied_seq += read; + cxgb3i_c3cn_rx_credits(c3cn, read); + } + conn->rxdata_octets += read; +} + +void cxgb3i_conn_tx_open(struct s3_conn *c3cn) +{ + struct iscsi_conn *conn = c3cn->user_data; + + cxgb3i_tx_debug("cn 0x%p.\n", c3cn); + if (conn) { + cxgb3i_tx_debug("cn 0x%p, cid %d.\n", c3cn, conn->id); + scsi_queue_work(conn->session->host, &conn->xmitwork); + } +} + +void cxgb3i_conn_closing(struct s3_conn *c3cn) +{ + struct iscsi_conn *conn; + + read_lock(&c3cn->callback_lock); + conn = c3cn->user_data; + if (conn && c3cn->state != C3CN_STATE_ESTABLISHED) + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); + read_unlock(&c3cn->callback_lock); +} diff --git a/drivers/scsi/cxgb3i/cxgb3i_pdu.h b/drivers/scsi/cxgb3i/cxgb3i_pdu.h new file mode 100644 index 00000000000..a3f685cc236 --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i_pdu.h @@ -0,0 +1,59 @@ +/* + * cxgb3i_ulp2.h: Chelsio S3xx iSCSI driver. + * + * Copyright (c) 2008 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written by: Karen Xie (kxie@chelsio.com) + */ + +#ifndef __CXGB3I_ULP2_PDU_H__ +#define __CXGB3I_ULP2_PDU_H__ + +struct cpl_iscsi_hdr_norss { + union opcode_tid ot; + u16 pdu_len_ddp; + u16 len; + u32 seq; + u16 urg; + u8 rsvd; + u8 status; +}; + +struct cpl_rx_data_ddp_norss { + union opcode_tid ot; + u16 urg; + u16 len; + u32 seq; + u32 nxt_seq; + u32 ulp_crc; + u32 ddp_status; +}; + +#define RX_DDP_STATUS_IPP_SHIFT 27 /* invalid pagepod */ +#define RX_DDP_STATUS_TID_SHIFT 26 /* tid mismatch */ +#define RX_DDP_STATUS_COLOR_SHIFT 25 /* color mismatch */ +#define RX_DDP_STATUS_OFFSET_SHIFT 24 /* offset mismatch */ +#define RX_DDP_STATUS_ULIMIT_SHIFT 23 /* ulimit error */ +#define RX_DDP_STATUS_TAG_SHIFT 22 /* tag mismatch */ +#define RX_DDP_STATUS_DCRC_SHIFT 21 /* dcrc error */ +#define RX_DDP_STATUS_HCRC_SHIFT 20 /* hcrc error */ +#define RX_DDP_STATUS_PAD_SHIFT 19 /* pad error */ +#define RX_DDP_STATUS_PPP_SHIFT 18 /* pagepod parity error */ +#define RX_DDP_STATUS_LLIMIT_SHIFT 17 /* llimit error */ +#define RX_DDP_STATUS_DDP_SHIFT 16 /* ddp'able */ +#define RX_DDP_STATUS_PMM_SHIFT 15 /* pagepod mismatch */ + +#define ULP2_FLAG_DATA_READY 0x1 +#define ULP2_FLAG_DATA_DDPED 0x2 +#define ULP2_FLAG_HCRC_ERROR 0x10 +#define ULP2_FLAG_DCRC_ERROR 0x20 +#define ULP2_FLAG_PAD_ERROR 0x40 + +void cxgb3i_conn_closing(struct s3_conn *); +void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn); +void cxgb3i_conn_tx_open(struct s3_conn *c3cn); +#endif -- cgit v1.2.3-70-g09d2 From 94409d6e1088517b6d6c8e669c604cc86d08ac1b Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 18 Dec 2008 19:37:23 +0300 Subject: powerpc: Add device tree bindings for BCSR GPIO banks The patch adds bindings for BCSR GPIO banks, the bindings are used to describe particular BCSR registers that act as simple GPIO controllers. These GPIO banks might control power switches, SPI chip-selects, LEDs, etc. While at it, also fix "length" spelling error in the PIXIS FPGA bindings. Signed-off-by: Anton Vorontsov Signed-off-by: Kumar Gala --- Documentation/powerpc/dts-bindings/fsl/board.txt | 32 +++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/powerpc/dts-bindings/fsl/board.txt b/Documentation/powerpc/dts-bindings/fsl/board.txt index 81a917ef96e..6c974d28eeb 100644 --- a/Documentation/powerpc/dts-bindings/fsl/board.txt +++ b/Documentation/powerpc/dts-bindings/fsl/board.txt @@ -18,7 +18,7 @@ This is the memory-mapped registers for on board FPGA. Required properities: - compatible : should be "fsl,fpga-pixis". -- reg : should contain the address and the lenght of the FPPGA register +- reg : should contain the address and the length of the FPPGA register set. Example (MPC8610HPCD): @@ -27,3 +27,33 @@ Example (MPC8610HPCD): compatible = "fsl,fpga-pixis"; reg = <0xe8000000 32>; }; + +* Freescale BCSR GPIO banks + +Some BCSR registers act as simple GPIO controllers, each such +register can be represented by the gpio-controller node. + +Required properities: +- compatible : Should be "fsl,-bcsr-gpio". +- reg : Should contain the address and the length of the GPIO bank + register. +- #gpio-cells : Should be two. The first cell is the pin number and the + second cell is used to specify optional paramters (currently unused). +- gpio-controller : Marks the port as GPIO controller. + +Example: + + bcsr@1,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,mpc8360mds-bcsr"; + reg = <1 0 0x8000>; + ranges = <0 1 0 0x8000>; + + bcsr13: gpio-controller@d { + #gpio-cells = <2>; + compatible = "fsl,mpc8360mds-bcsr-gpio"; + reg = <0xd 1>; + gpio-controller; + }; + }; -- cgit v1.2.3-70-g09d2 From 80736d41f895bc472b2433a1c27fa6d4afe6ca35 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 30 Dec 2008 17:44:02 +0200 Subject: UBIFS: fix numerous spelling mistakes Signed-off-by: Artem Bityutskiy --- Documentation/filesystems/ubifs.txt | 6 +++--- fs/ubifs/budget.c | 14 +++++++------- fs/ubifs/lpt_commit.c | 8 ++++---- fs/ubifs/ubifs.h | 1 - 4 files changed, 14 insertions(+), 15 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt index 2d0db5482d2..84da2a4ba25 100644 --- a/Documentation/filesystems/ubifs.txt +++ b/Documentation/filesystems/ubifs.txt @@ -95,9 +95,9 @@ no_chk_data_crc skip checking of CRCs on data nodes in order to of this option is that corruption of the contents of a file can go unnoticed. chk_data_crc (*) do not skip checking CRCs on data nodes -compr=none override defoult comressor and set it to "none" -compr=lzo override defoult comressor and set it to "lzo" -compr=zlib override defoult comressor and set it to "zlib" +compr=none override default compressor and set it to "none" +compr=lzo override default compressor and set it to "lzo" +compr=zlib override default compressor and set it to "zlib" Quick usage instructions diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 4d270f0a856..31870d8dab8 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -652,9 +652,9 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, * user-space. User-space application tend to expect that if the file-system * (e.g., via the 'statfs()' call) reports that it has N bytes available, they * are able to write a file of size N. UBIFS attaches node headers to each data - * node and it has to write indexind nodes as well. This introduces additional - * overhead, and UBIFS has to report sligtly less free space to meet the above - * expectetions. + * node and it has to write indexing nodes as well. This introduces additional + * overhead, and UBIFS has to report slightly less free space to meet the above + * expectations. * * This function assumes free space is made up of uncompressed data nodes and * full index nodes (one per data node, tripled because we always allow enough @@ -677,7 +677,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) * of data nodes, f - fanout. Because effective UBIFS fanout is twice * as less than maximum fanout, we assume that each data node * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. - * Note, the multiplier 3 is because UBIFS reseves thrice as more space + * Note, the multiplier 3 is because UBIFS reserves thrice as more space * for the index. */ f = c->fanout > 3 ? c->fanout >> 1 : 2; @@ -695,10 +695,10 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) * This function calculates amount of free space to report to user-space. * * Because UBIFS may introduce substantial overhead (the index, node headers, - * alighment, wastage at the end of eraseblocks, etc), it cannot report real + * alignment, wastage at the end of eraseblocks, etc), it cannot report real * amount of free flash space it has (well, because not all dirty space is - * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, - * it would bread user expectetion about what free space is. Users seem to + * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, + * it would bread user expectations about what free space is. Users seem to * accustomed to assume that if the file-system reports N bytes of free space, * they would be able to fit a file of N bytes to the FS. This almost works for * traditional file-systems, because they have way less overhead than UBIFS. diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index b8a06079423..96ca9570717 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -753,7 +753,7 @@ static void lpt_tgc_start(struct ubifs_info *c) * LPT trivial garbage collection is where a LPT LEB contains only dirty and * free space and so may be reused as soon as the next commit is completed. * This function is called after the commit is completed (master node has been - * written) and unmaps LPT LEBs that were marked for trivial GC. + * written) and un-maps LPT LEBs that were marked for trivial GC. */ static int lpt_tgc_end(struct ubifs_info *c) { @@ -1467,7 +1467,7 @@ void ubifs_lpt_free(struct ubifs_info *c, int wr_only) #ifdef CONFIG_UBIFS_FS_DEBUG /** - * dbg_is_all_ff - determine if a buffer contains only 0xff bytes. + * dbg_is_all_ff - determine if a buffer contains only 0xFF bytes. * @buf: buffer * @len: buffer length */ @@ -1492,7 +1492,7 @@ static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs) struct ubifs_nnode *nnode; int hght; - /* Entire tree is in memory so first_nnode / next_nnode are ok */ + /* Entire tree is in memory so first_nnode / next_nnode are OK */ nnode = first_nnode(c, &hght); for (; nnode; nnode = next_nnode(c, nnode, &hght)) { struct ubifs_nbranch *branch; @@ -1837,7 +1837,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) * This function dumps an LEB from LPT area. Nodes in this area are very * different to nodes in the main area (e.g., they do not have common headers, * they do not have 8-byte alignments, etc), so we have a separate function to - * dump LPT area LEBs. Note, LPT has to be locked by the coller. + * dump LPT area LEBs. Note, LPT has to be locked by the caller. */ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) { diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 3275c89a358..fc2a4cc66d0 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1168,7 +1168,6 @@ struct ubifs_debug_info; * @mount_opts: UBIFS-specific mount options * * @dbg: debugging-related information - * @dfs: debugfs support-related information */ struct ubifs_info { struct super_block *vfs_sb; -- cgit v1.2.3-70-g09d2 From 277d342fc423fca5e66e677fe629d1b2f8f1b9e2 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 31 Dec 2008 12:54:11 -0500 Subject: selinux: Deprecate and schedule the removal of the the compat_net functionality This patch is the first step towards removing the old "compat_net" code from the kernel. Secmark, the "compat_net" replacement was first introduced in 2.6.18 (September 2006) and the major Linux distributions with SELinux support have transitioned to Secmark so it is time to start deprecating the "compat_net" mechanism. Testing a patched version of 2.6.28-rc6 with the initial release of Fedora Core 5 did not show any problems when running in enforcing mode. This patch adds an entry to the feature-removal-schedule.txt file and removes the SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT configuration option, forcing Secmark on by default although it can still be disabled at runtime. The patch also makes the Secmark permission checks "dynamic" in the sense that they are only executed when Secmark is configured; this should help prevent problems with older distributions that have not yet migrated to Secmark. Signed-off-by: Paul Moore Acked-by: James Morris --- Documentation/feature-removal-schedule.txt | 12 ++++++++++++ security/selinux/Kconfig | 27 --------------------------- security/selinux/hooks.c | 6 +++--- security/selinux/selinuxfs.c | 16 ++++++++-------- 4 files changed, 23 insertions(+), 38 deletions(-) (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index dc7c681e532..a0ed3964a21 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -324,3 +324,15 @@ When: 2.6.29 (ideally) or 2.6.30 (more likely) Why: Deprecated by the new (standard) device driver binding model. Use i2c_driver->probe() and ->remove() instead. Who: Jean Delvare + +--------------------------- + +What: SELinux "compat_net" functionality +When: 2.6.30 at the earliest +Why: In 2.6.18 the Secmark concept was introduced to replace the "compat_net" + network access control functionality of SELinux. Secmark offers both + better performance and greater flexibility than the "compat_net" + mechanism. Now that the major Linux distributions have moved to + Secmark, it is time to deprecate the older mechanism and start the + process of removing the old code. +Who: Paul Moore diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig index 26301dd651d..bca1b74a4a2 100644 --- a/security/selinux/Kconfig +++ b/security/selinux/Kconfig @@ -94,33 +94,6 @@ config SECURITY_SELINUX_CHECKREQPROT_VALUE If you are unsure how to answer this question, answer 1. -config SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT - bool "NSA SELinux enable new secmark network controls by default" - depends on SECURITY_SELINUX - default n - help - This option determines whether the new secmark-based network - controls will be enabled by default. If not, the old internal - per-packet controls will be enabled by default, preserving - old behavior. - - If you enable the new controls, you will need updated - SELinux userspace libraries, tools and policy. Typically, - your distribution will provide these and enable the new controls - in the kernel they also distribute. - - Note that this option can be overridden at boot with the - selinux_compat_net parameter, and after boot via - /selinux/compat_net. See Documentation/kernel-parameters.txt - for details on this parameter. - - If you enable the new network controls, you will likely - also require the SECMARK and CONNSECMARK targets, as - well as any conntrack helpers for protocols which you - wish to control. - - If you are unsure what to do here, select N. - config SECURITY_SELINUX_POLICYDB_VERSION_MAX bool "NSA SELinux maximum supported policy format version" depends on SECURITY_SELINUX diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index dbeaa783b2a..df30a7555d8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4185,7 +4185,7 @@ static int selinux_sock_rcv_skb_iptables_compat(struct sock *sk, static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, u16 family) { - int err; + int err = 0; struct sk_security_struct *sksec = sk->sk_security; u32 peer_sid; u32 sk_sid = sksec->sid; @@ -4202,7 +4202,7 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, if (selinux_compat_net) err = selinux_sock_rcv_skb_iptables_compat(sk, skb, &ad, family, addrp); - else + else if (selinux_secmark_enabled()) err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, PACKET__RECV, &ad); if (err) @@ -4705,7 +4705,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, if (selinux_ip_postroute_iptables_compat(skb->sk, ifindex, &ad, family, addrp)) return NF_DROP; - } else { + } else if (selinux_secmark_enabled()) { if (avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET, PACKET__SEND, &ad)) return NF_DROP; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index c8630363823..77fb3c8d926 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -47,13 +47,7 @@ static char *policycap_names[] = { unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE; -#ifdef CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT -#define SELINUX_COMPAT_NET_VALUE 0 -#else -#define SELINUX_COMPAT_NET_VALUE 1 -#endif - -int selinux_compat_net = SELINUX_COMPAT_NET_VALUE; +int selinux_compat_net = 0; static int __init checkreqprot_setup(char *str) { @@ -494,7 +488,13 @@ static ssize_t sel_write_compat_net(struct file *file, const char __user *buf, if (sscanf(page, "%d", &new_value) != 1) goto out; - selinux_compat_net = new_value ? 1 : 0; + if (new_value) { + printk(KERN_NOTICE + "SELinux: compat_net is deprecated, please use secmark" + " instead\n"); + selinux_compat_net = 1; + } else + selinux_compat_net = 0; length = count; out: free_page((unsigned long) page); -- cgit v1.2.3-70-g09d2 From be42c4c433c2c0d3f1583c08908fead00d36d222 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Mon, 1 Dec 2008 14:34:58 -0800 Subject: correct wrong function name of d_put in kernel document and source comment no function named d_put(), it should be dput(). Impact: fix document and comment, no functionality changed Signed-off-by: Zhao Lei Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- Documentation/filesystems/vfs.txt | 2 +- fs/dcache.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 5579bda58a6..041cb771d50 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -931,7 +931,7 @@ manipulate dentries: d_lookup: look up a dentry given its parent and path name component It looks up the child of that given name from the dcache hash table. If it is found, the reference count is incremented - and the dentry is returned. The caller must use d_put() + and the dentry is returned. The caller must use dput() to free the dentry when it finishes using it. For further information on dentry locking, please refer to the document diff --git a/fs/dcache.c b/fs/dcache.c index eeafc14c2a1..c231a639c2a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1332,7 +1332,7 @@ err_out: * * Searches the children of the parent dentry for the name in question. If * the dentry is found its reference count is incremented and the dentry - * is returned. The caller must use d_put to free the entry when it has + * is returned. The caller must use dput to free the entry when it has * finished using it. %NULL is returned on failure. * * __d_lookup is dcache_lock free. The hash list is protected using RCU. -- cgit v1.2.3-70-g09d2 From fd659fd6275d3426d7967da1f0e3638bbbd2fedb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Dec 2008 09:35:45 -0800 Subject: fix f_count description in Documentation/filesystems/files.txt Documentation/filesystems/files.txt was not updated when f_count became an atomic_long_t. atomic_long_inc_not_zero() is now used instead of atomic_inc_not_zero() Signed-off-by: Al Viro --- Documentation/filesystems/files.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/files.txt b/Documentation/filesystems/files.txt index bb0142f6108..ac2facc50d2 100644 --- a/Documentation/filesystems/files.txt +++ b/Documentation/filesystems/files.txt @@ -76,13 +76,13 @@ the fdtable structure - 5. Handling of the file structures is special. Since the look-up of the fd (fget()/fget_light()) are lock-free, it is possible that look-up may race with the last put() operation on the - file structure. This is avoided using atomic_inc_not_zero() + file structure. This is avoided using atomic_long_inc_not_zero() on ->f_count : rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (atomic_inc_not_zero(&file->f_count)) + if (atomic_long_inc_not_zero(&file->f_count)) *fput_needed = 1; else /* Didn't get the reference, someone's freed */ @@ -92,7 +92,7 @@ the fdtable structure - .... return file; - atomic_inc_not_zero() detects if refcounts is already zero or + atomic_long_inc_not_zero() detects if refcounts is already zero or goes to zero during increment. If it does, we fail fget()/fget_light(). -- cgit v1.2.3-70-g09d2 From 6badd79bd002788aaec27b50a74ab69ef65ab8ee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 26 Dec 2008 00:57:40 -0500 Subject: kill ->dir_notify() Remove the hopelessly misguided ->dir_notify(). The only instance (cifs) has been broken by design from the very beginning; the objects it creates are never destroyed, keep references to struct file they can outlive, nothing that could possibly evict them exists on close(2) path *and* no locking whatsoever is done to prevent races with close(), should the previous, er, deficiencies someday be dealt with. Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 - Documentation/filesystems/vfs.txt | 3 - fs/bad_inode.c | 6 -- fs/cifs/Makefile | 2 +- fs/cifs/cifsfs.c | 7 --- fs/cifs/cifsfs.h | 1 - fs/cifs/fcntl.c | 118 -------------------------------------- fs/dnotify.c | 3 - include/linux/fs.h | 1 - 9 files changed, 1 insertion(+), 142 deletions(-) delete mode 100644 fs/cifs/fcntl.c (limited to 'Documentation') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 23d2f4460de..ccec5539438 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -394,7 +394,6 @@ prototypes: unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); - int (*dir_notify)(struct file *, unsigned long); }; locking rules: @@ -424,7 +423,6 @@ sendfile: no sendpage: no get_unmapped_area: no check_flags: no -dir_notify: no ->llseek() locking has moved from llseek to the individual llseek implementations. If your fs is not using generic_file_llseek, you diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 041cb771d50..ef19afa186a 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -733,7 +733,6 @@ struct file_operations { ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); - int (*dir_notify)(struct file *filp, unsigned long arg); int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int); @@ -800,8 +799,6 @@ otherwise noted. check_flags: called by the fcntl(2) system call for F_SETFL command - dir_notify: called by the fcntl(2) system call for F_NOTIFY command - flock: called by the flock(2) system call splice_write: called by the VFS to splice data from a pipe to a file. This diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 5f1538c03b1..a05287a23f6 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -132,11 +132,6 @@ static int bad_file_check_flags(int flags) return -EIO; } -static int bad_file_dir_notify(struct file *file, unsigned long arg) -{ - return -EIO; -} - static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl) { return -EIO; @@ -179,7 +174,6 @@ static const struct file_operations bad_file_ops = .sendpage = bad_file_sendpage, .get_unmapped_area = bad_file_get_unmapped_area, .check_flags = bad_file_check_flags, - .dir_notify = bad_file_dir_notify, .flock = bad_file_flock, .splice_write = bad_file_splice_write, .splice_read = bad_file_splice_read, diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 6ba43fb346f..9948c0030e8 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ - md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o \ + md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ readdir.o ioctl.o sess.o export.o cifsacl.o cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0005a194a75..13ea53251dc 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -747,7 +747,6 @@ const struct file_operations cifs_file_ops = { #endif /* CONFIG_CIFS_POSIX */ #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -768,7 +767,6 @@ const struct file_operations cifs_file_direct_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -789,7 +787,6 @@ const struct file_operations cifs_file_nobrl_ops = { #endif /* CONFIG_CIFS_POSIX */ #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -809,7 +806,6 @@ const struct file_operations cifs_file_direct_nobrl_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -818,9 +814,6 @@ const struct file_operations cifs_dir_ops = { .readdir = cifs_readdir, .release = cifs_closedir, .read = generic_read_dir, -#ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, -#endif /* CONFIG_CIFS_EXPERIMENTAL */ .unlocked_ioctl = cifs_ioctl, .llseek = generic_file_llseek, }; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 2ce04c73d74..7ac481841f8 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -76,7 +76,6 @@ extern int cifs_file_mmap(struct file * , struct vm_area_struct *); extern const struct file_operations cifs_dir_ops; extern int cifs_dir_open(struct inode *inode, struct file *file); extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); -extern int cifs_dir_notify(struct file *, unsigned long arg); /* Functions related to dir entries */ extern struct dentry_operations cifs_dentry_ops; diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c deleted file mode 100644 index 5a57581eb4b..00000000000 --- a/fs/cifs/fcntl.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * fs/cifs/fcntl.c - * - * vfs operations that deal with the file control API - * - * Copyright (C) International Business Machines Corp., 2003,2004 - * Author(s): Steve French (sfrench@us.ibm.com) - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include -#include -#include -#include "cifsglob.h" -#include "cifsproto.h" -#include "cifs_unicode.h" -#include "cifs_debug.h" -#include "cifsfs.h" - -static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags) -{ - __u32 cifs_ntfy_flags = 0; - - /* No way on Linux VFS to ask to monitor xattr - changes (and no stream support either */ - if (fcntl_notify_flags & DN_ACCESS) - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS; - if (fcntl_notify_flags & DN_MODIFY) { - /* What does this mean on directories? */ - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE | - FILE_NOTIFY_CHANGE_SIZE; - } - if (fcntl_notify_flags & DN_CREATE) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | - FILE_NOTIFY_CHANGE_LAST_WRITE; - } - if (fcntl_notify_flags & DN_DELETE) - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE; - if (fcntl_notify_flags & DN_RENAME) { - /* BB review this - checking various server behaviors */ - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | - FILE_NOTIFY_CHANGE_FILE_NAME; - } - if (fcntl_notify_flags & DN_ATTRIB) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY | - FILE_NOTIFY_CHANGE_ATTRIBUTES; - } -/* if (fcntl_notify_flags & DN_MULTISHOT) { - cifs_ntfy_flags |= ; - } */ /* BB fixme - not sure how to handle this with CIFS yet */ - - return cifs_ntfy_flags; -} - -int cifs_dir_notify(struct file *file, unsigned long arg) -{ - int xid; - int rc = -EINVAL; - int oplock = 0; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; - char *full_path = NULL; - __u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES; - __u16 netfid; - - if (experimEnabled == 0) - return 0; - - xid = GetXid(); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - pTcon = cifs_sb->tcon; - - full_path = build_path_from_dentry(file->f_path.dentry); - - if (full_path == NULL) { - rc = -ENOMEM; - } else { - cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg)); - rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, - GENERIC_READ | SYNCHRONIZE, 0 /* create options */, - &netfid, &oplock, NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - /* BB fixme - add this handle to a notify handle list */ - if (rc) { - cFYI(1, ("Could not open directory for notify")); - } else { - filter = convert_to_cifs_notify_flags(arg); - if (filter != 0) { - rc = CIFSSMBNotify(xid, pTcon, - 0 /* no subdirs */, netfid, - filter, file, arg & DN_MULTISHOT, - cifs_sb->local_nls); - } else { - rc = -EINVAL; - } - /* BB add code to close file eventually (at unmount - it would close automatically but may be a way - to do it easily when inode freed or when - notify info is cleared/changed */ - cFYI(1, ("notify rc %d", rc)); - } - } - - FreeXid(xid); - return rc; -} diff --git a/fs/dnotify.c b/fs/dnotify.c index 676073b8dda..b0aa2cde80b 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c @@ -115,9 +115,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) dn->dn_next = inode->i_dnotify; inode->i_dnotify = dn; spin_unlock(&inode->i_lock); - - if (filp->f_op && filp->f_op->dir_notify) - return filp->f_op->dir_notify(filp, arg); return 0; out_free: diff --git a/include/linux/fs.h b/include/linux/fs.h index fd615986a41..be16ce01fb1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1309,7 +1309,6 @@ struct file_operations { ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); - int (*dir_notify)(struct file *filp, unsigned long arg); int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); -- cgit v1.2.3-70-g09d2 From 93c164af19f608c5f737eb9bed8cb4de3a872329 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 2 Jan 2009 16:12:51 +0100 Subject: remove ide-scsi As planed, this removes ide-scsi. The 2.6 kernel supports direct writing to ide CD drives, which eliminates the need for ide-scsi. ide-scsi has been unmaintained and marked as deprecated. Signed-off-by: FUJITA Tomonori Cc: James.Bottomley@HansenPartnership.com Signed-off-by: Bartlomiej Zolnierkiewicz --- Documentation/feature-removal-schedule.txt | 9 - MAINTAINERS | 5 - drivers/ide/Kconfig | 17 - drivers/scsi/Kconfig | 8 +- drivers/scsi/Makefile | 1 - drivers/scsi/ide-scsi.c | 840 ----------------------------- 6 files changed, 4 insertions(+), 876 deletions(-) delete mode 100644 drivers/scsi/ide-scsi.c (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index dc7c681e532..df18d87c483 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -310,15 +310,6 @@ Who: Krzysztof Piotr Oledzki --------------------------- -What: ide-scsi (BLK_DEV_IDESCSI) -When: 2.6.29 -Why: The 2.6 kernel supports direct writing to ide CD drives, which - eliminates the need for ide-scsi. The new method is more - efficient in every way. -Who: FUJITA Tomonori - ---------------------------- - What: i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client() When: 2.6.29 (ideally) or 2.6.30 (more likely) Why: Deprecated by the new (standard) device driver binding model. Use diff --git a/MAINTAINERS b/MAINTAINERS index ceb32ee51f9..144766c0dba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2146,11 +2146,6 @@ M: Gadi Oxman L: linux-kernel@vger.kernel.org S: Maintained -IDE-SCSI DRIVER -L: linux-ide@vger.kernel.org -L: linux-scsi@vger.kernel.org -S: Orphan - IDLE-I7300 P: Andy Henroid M: andrew.d.henroid@intel.com diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index c9f21e3d4ea..937945e471d 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -185,23 +185,6 @@ config BLK_DEV_IDETAPE To compile this driver as a module, choose M here: the module will be called ide-tape. -config BLK_DEV_IDESCSI - tristate "SCSI emulation support (DEPRECATED)" - depends on SCSI - select IDE_ATAPI - ---help--- - WARNING: ide-scsi is no longer needed for cd writing applications! - The 2.6 kernel supports direct writing to ide-cd, which eliminates - the need for ide-scsi + the entire scsi stack just for writing a - cd. The new method is more efficient in every way. - - This will provide SCSI host adapter emulation for IDE ATAPI devices, - and will allow you to use a SCSI device driver instead of a native - ATAPI driver. - - If both this SCSI emulation and native ATAPI support are compiled - into the kernel, the native support will be used. - config BLK_DEV_IDEACPI bool "IDE ACPI support" depends on ACPI diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 152d4aa9354..b7322976d2b 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -21,7 +21,7 @@ config SCSI You also need to say Y here if you have a device which speaks the SCSI protocol. Examples of this include the parallel port version of the IOMEGA ZIP drive, USB storage devices, Fibre - Channel, FireWire storage and the IDE-SCSI emulation driver. + Channel, and FireWire storage. To compile this driver as a module, choose M here and read . @@ -101,9 +101,9 @@ config CHR_DEV_OSST ---help--- The OnStream SC-x0 SCSI tape drives cannot be driven by the standard st driver, but instead need this special osst driver and - use the /dev/osstX char device nodes (major 206). Via usb-storage - and ide-scsi, you may be able to drive the USB-x0 and DI-x0 drives - as well. Note that there is also a second generation of OnStream + use the /dev/osstX char device nodes (major 206). Via usb-storage, + you may be able to drive the USB-x0 and DI-x0 drives as well. + Note that there is also a second generation of OnStream tape drives (ADR-x0) that supports the standard SCSI-2 commands for tapes (QIC-157) and can be driven by the standard driver st. For more information, you may have a look at the SCSI-HOWTO diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 1410697257c..7461eb09a03 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -105,7 +105,6 @@ obj-$(CONFIG_SCSI_GDTH) += gdth.o obj-$(CONFIG_SCSI_INITIO) += initio.o obj-$(CONFIG_SCSI_INIA100) += a100u2w.o obj-$(CONFIG_SCSI_QLOGICPTI) += qlogicpti.o -obj-$(CONFIG_BLK_DEV_IDESCSI) += ide-scsi.o obj-$(CONFIG_SCSI_MESH) += mesh.o obj-$(CONFIG_SCSI_MAC53C94) += mac53c94.o obj-$(CONFIG_BLK_DEV_3W_XXXX_RAID) += 3w-xxxx.o diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c deleted file mode 100644 index c24140aff8e..00000000000 --- a/drivers/scsi/ide-scsi.c +++ /dev/null @@ -1,840 +0,0 @@ -/* - * Copyright (C) 1996-1999 Gadi Oxman - * Copyright (C) 2004-2005 Bartlomiej Zolnierkiewicz - */ - -/* - * Emulation of a SCSI host adapter for IDE ATAPI devices. - * - * With this driver, one can use the Linux SCSI drivers instead of the - * native IDE ATAPI drivers. - * - * Ver 0.1 Dec 3 96 Initial version. - * Ver 0.2 Jan 26 97 Fixed bug in cleanup_module() and added emulation - * of MODE_SENSE_6/MODE_SELECT_6 for cdroms. Thanks - * to Janos Farkas for pointing this out. - * Avoid using bitfields in structures for m68k. - * Added Scatter/Gather and DMA support. - * Ver 0.4 Dec 7 97 Add support for ATAPI PD/CD drives. - * Use variable timeout for each command. - * Ver 0.5 Jan 2 98 Fix previous PD/CD support. - * Allow disabling of SCSI-6 to SCSI-10 transformation. - * Ver 0.6 Jan 27 98 Allow disabling of SCSI command translation layer - * for access through /dev/sg. - * Fix MODE_SENSE_6/MODE_SELECT_6/INQUIRY translation. - * Ver 0.7 Dec 04 98 Ignore commands where lun != 0 to avoid multiple - * detection of devices with CONFIG_SCSI_MULTI_LUN - * Ver 0.8 Feb 05 99 Optical media need translation too. Reverse 0.7. - * Ver 0.9 Jul 04 99 Fix a bug in SG_SET_TRANSFORM. - * Ver 0.91 Jun 10 02 Fix "off by one" error in transforms - * Ver 0.92 Dec 31 02 Implement new SCSI mid level API - */ - -#define IDESCSI_VERSION "0.92" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#define IDESCSI_DEBUG_LOG 0 - -#if IDESCSI_DEBUG_LOG -#define debug_log(fmt, args...) \ - printk(KERN_INFO "ide-scsi: " fmt, ## args) -#else -#define debug_log(fmt, args...) do {} while (0) -#endif - -/* - * SCSI command transformation layer - */ -#define IDESCSI_SG_TRANSFORM 1 /* /dev/sg transformation */ - -/* - * Log flags - */ -#define IDESCSI_LOG_CMD 0 /* Log SCSI commands */ - -typedef struct ide_scsi_obj { - ide_drive_t *drive; - ide_driver_t *driver; - struct gendisk *disk; - struct Scsi_Host *host; - - unsigned long transform; /* SCSI cmd translation layer */ - unsigned long log; /* log flags */ -} idescsi_scsi_t; - -static DEFINE_MUTEX(idescsi_ref_mutex); -/* Set by module param to skip cd */ -static int idescsi_nocd; - -#define ide_scsi_g(disk) \ - container_of((disk)->private_data, struct ide_scsi_obj, driver) - -static struct ide_scsi_obj *ide_scsi_get(struct gendisk *disk) -{ - struct ide_scsi_obj *scsi = NULL; - - mutex_lock(&idescsi_ref_mutex); - scsi = ide_scsi_g(disk); - if (scsi) { - if (ide_device_get(scsi->drive)) - scsi = NULL; - else - scsi_host_get(scsi->host); - } - mutex_unlock(&idescsi_ref_mutex); - return scsi; -} - -static void ide_scsi_put(struct ide_scsi_obj *scsi) -{ - ide_drive_t *drive = scsi->drive; - - mutex_lock(&idescsi_ref_mutex); - scsi_host_put(scsi->host); - ide_device_put(drive); - mutex_unlock(&idescsi_ref_mutex); -} - -static inline idescsi_scsi_t *scsihost_to_idescsi(struct Scsi_Host *host) -{ - return (idescsi_scsi_t*) (&host[1]); -} - -static inline idescsi_scsi_t *drive_to_idescsi(ide_drive_t *ide_drive) -{ - return scsihost_to_idescsi(ide_drive->driver_data); -} - -static void ide_scsi_hex_dump(u8 *data, int len) -{ - print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0); -} - -static int idescsi_end_request(ide_drive_t *, int, int); - -static void ide_scsi_callback(ide_drive_t *drive, int dsc) -{ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); - struct ide_atapi_pc *pc = drive->pc; - - if (pc->flags & PC_FLAG_TIMEDOUT) - debug_log("%s: got timed out packet %lu at %lu\n", __func__, - pc->scsi_cmd->serial_number, jiffies); - /* end this request now - scsi should retry it*/ - else if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) - printk(KERN_INFO "Packet command completed, %d bytes" - " transferred\n", pc->xferred); - - idescsi_end_request(drive, 1, 0); -} - -static int idescsi_check_condition(ide_drive_t *drive, - struct request *failed_cmd) -{ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); - struct ide_atapi_pc *pc; - struct request *rq; - u8 *buf; - - /* stuff a sense request in front of our current request */ - pc = kzalloc(sizeof(struct ide_atapi_pc), GFP_ATOMIC); - rq = blk_get_request(drive->queue, READ, GFP_ATOMIC); - buf = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_ATOMIC); - if (!pc || !rq || !buf) { - kfree(buf); - if (rq) - blk_put_request(rq); - kfree(pc); - return -ENOMEM; - } - rq->special = (char *) pc; - pc->rq = rq; - pc->buf = buf; - pc->c[0] = REQUEST_SENSE; - pc->c[4] = pc->req_xfer = pc->buf_size = SCSI_SENSE_BUFFERSIZE; - rq->cmd_type = REQ_TYPE_SENSE; - rq->cmd_flags |= REQ_PREEMPT; - pc->timeout = jiffies + WAIT_READY; - /* NOTE! Save the failed packet command in "rq->buffer" */ - rq->buffer = (void *) failed_cmd->special; - pc->scsi_cmd = ((struct ide_atapi_pc *) failed_cmd->special)->scsi_cmd; - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) { - printk ("ide-scsi: %s: queue cmd = ", drive->name); - ide_scsi_hex_dump(pc->c, 6); - } - rq->rq_disk = scsi->disk; - rq->ref_count++; - memcpy(rq->cmd, pc->c, 12); - ide_do_drive_cmd(drive, rq); - return 0; -} - -static ide_startstop_t -idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err) -{ - ide_hwif_t *hwif = drive->hwif; - - if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ)) - /* force an abort */ - hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE); - - rq->errors++; - - idescsi_end_request(drive, 0, 0); - - return ide_stopped; -} - -static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs) -{ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); - struct request *rq = HWGROUP(drive)->rq; - struct ide_atapi_pc *pc = (struct ide_atapi_pc *) rq->special; - int log = test_bit(IDESCSI_LOG_CMD, &scsi->log); - struct Scsi_Host *host; - int errors = rq->errors; - unsigned long flags; - - if (!blk_special_request(rq) && !blk_sense_request(rq)) { - ide_end_request(drive, uptodate, nrsecs); - return 0; - } - ide_end_drive_cmd (drive, 0, 0); - if (blk_sense_request(rq)) { - struct ide_atapi_pc *opc = (struct ide_atapi_pc *) rq->buffer; - if (log) { - printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number); - ide_scsi_hex_dump(pc->buf, 16); - } - memcpy((void *) opc->scsi_cmd->sense_buffer, pc->buf, - SCSI_SENSE_BUFFERSIZE); - kfree(pc->buf); - kfree(pc); - blk_put_request(rq); - pc = opc; - rq = pc->rq; - pc->scsi_cmd->result = (CHECK_CONDITION << 1) | - (((pc->flags & PC_FLAG_TIMEDOUT) ? - DID_TIME_OUT : - DID_OK) << 16); - } else if (pc->flags & PC_FLAG_TIMEDOUT) { - if (log) - printk (KERN_WARNING "ide-scsi: %s: timed out for %lu\n", - drive->name, pc->scsi_cmd->serial_number); - pc->scsi_cmd->result = DID_TIME_OUT << 16; - } else if (errors >= ERROR_MAX) { - pc->scsi_cmd->result = DID_ERROR << 16; - if (log) - printk ("ide-scsi: %s: I/O error for %lu\n", drive->name, pc->scsi_cmd->serial_number); - } else if (errors) { - if (log) - printk ("ide-scsi: %s: check condition for %lu\n", drive->name, pc->scsi_cmd->serial_number); - if (!idescsi_check_condition(drive, rq)) - /* we started a request sense, so we'll be back, exit for now */ - return 0; - pc->scsi_cmd->result = (CHECK_CONDITION << 1) | (DID_OK << 16); - } else { - pc->scsi_cmd->result = DID_OK << 16; - } - host = pc->scsi_cmd->device->host; - spin_lock_irqsave(host->host_lock, flags); - pc->done(pc->scsi_cmd); - spin_unlock_irqrestore(host->host_lock, flags); - kfree(pc); - blk_put_request(rq); - drive->pc = NULL; - return 0; -} - -static inline int idescsi_set_direction(struct ide_atapi_pc *pc) -{ - switch (pc->c[0]) { - case READ_6: case READ_10: case READ_12: - pc->flags &= ~PC_FLAG_WRITING; - return 0; - case WRITE_6: case WRITE_10: case WRITE_12: - pc->flags |= PC_FLAG_WRITING; - return 0; - default: - return 1; - } -} - -static int idescsi_map_sg(ide_drive_t *drive, struct ide_atapi_pc *pc) -{ - ide_hwif_t *hwif = drive->hwif; - struct scatterlist *sg, *scsi_sg; - int segments; - - if (!pc->req_xfer || pc->req_xfer % 1024) - return 1; - - if (idescsi_set_direction(pc)) - return 1; - - sg = hwif->sg_table; - scsi_sg = scsi_sglist(pc->scsi_cmd); - segments = scsi_sg_count(pc->scsi_cmd); - - if (segments > hwif->sg_max_nents) - return 1; - - hwif->sg_nents = segments; - memcpy(sg, scsi_sg, sizeof(*sg) * segments); - - return 0; -} - -static ide_startstop_t idescsi_issue_pc(ide_drive_t *drive, - struct ide_atapi_pc *pc) -{ - /* Set the current packet command */ - drive->pc = pc; - - return ide_issue_pc(drive, ide_scsi_get_timeout(pc), ide_scsi_expiry); -} - -/* - * idescsi_do_request is our request handling function. - */ -static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block) -{ - debug_log("dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name, - rq->cmd[0], rq->errors); - debug_log("sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n", - rq->sector, rq->nr_sectors, rq->current_nr_sectors); - - if (blk_sense_request(rq) || blk_special_request(rq)) { - struct ide_atapi_pc *pc = (struct ide_atapi_pc *)rq->special; - - if ((drive->dev_flags & IDE_DFLAG_USING_DMA) && - idescsi_map_sg(drive, pc) == 0) - pc->flags |= PC_FLAG_DMA_OK; - - return idescsi_issue_pc(drive, pc); - } - blk_dump_rq_flags(rq, "ide-scsi: unsup command"); - idescsi_end_request (drive, 0, 0); - return ide_stopped; -} - -#ifdef CONFIG_IDE_PROC_FS -static ide_proc_entry_t idescsi_proc[] = { - { "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL }, - { NULL, 0, NULL, NULL } -}; - -#define ide_scsi_devset_get(name, field) \ -static int get_##name(ide_drive_t *drive) \ -{ \ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); \ - return scsi->field; \ -} - -#define ide_scsi_devset_set(name, field) \ -static int set_##name(ide_drive_t *drive, int arg) \ -{ \ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); \ - scsi->field = arg; \ - return 0; \ -} - -#define ide_scsi_devset_rw_field(_name, _field) \ -ide_scsi_devset_get(_name, _field); \ -ide_scsi_devset_set(_name, _field); \ -IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name); - -ide_devset_rw_field(bios_cyl, bios_cyl); -ide_devset_rw_field(bios_head, bios_head); -ide_devset_rw_field(bios_sect, bios_sect); - -ide_scsi_devset_rw_field(transform, transform); -ide_scsi_devset_rw_field(log, log); - -static const struct ide_proc_devset idescsi_settings[] = { - IDE_PROC_DEVSET(bios_cyl, 0, 1023), - IDE_PROC_DEVSET(bios_head, 0, 255), - IDE_PROC_DEVSET(bios_sect, 0, 63), - IDE_PROC_DEVSET(log, 0, 1), - IDE_PROC_DEVSET(transform, 0, 3), - { 0 }, -}; - -static ide_proc_entry_t *ide_scsi_proc_entries(ide_drive_t *drive) -{ - return idescsi_proc; -} - -static const struct ide_proc_devset *ide_scsi_proc_devsets(ide_drive_t *drive) -{ - return idescsi_settings; -} -#endif - -/* - * Driver initialization. - */ -static void idescsi_setup (ide_drive_t *drive, idescsi_scsi_t *scsi) -{ - clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform); -#if IDESCSI_DEBUG_LOG - set_bit(IDESCSI_LOG_CMD, &scsi->log); -#endif /* IDESCSI_DEBUG_LOG */ - - drive->pc_callback = ide_scsi_callback; - drive->pc_update_buffers = NULL; - drive->pc_io_buffers = ide_io_buffers; - - ide_proc_register_driver(drive, scsi->driver); -} - -static void ide_scsi_remove(ide_drive_t *drive) -{ - struct Scsi_Host *scsihost = drive->driver_data; - struct ide_scsi_obj *scsi = scsihost_to_idescsi(scsihost); - struct gendisk *g = scsi->disk; - - scsi_remove_host(scsihost); - ide_proc_unregister_driver(drive, scsi->driver); - - ide_unregister_region(g); - - drive->driver_data = NULL; - g->private_data = NULL; - put_disk(g); - - ide_scsi_put(scsi); - - drive->dev_flags &= ~IDE_DFLAG_SCSI; -} - -static int ide_scsi_probe(ide_drive_t *); - -static ide_driver_t idescsi_driver = { - .gen_driver = { - .owner = THIS_MODULE, - .name = "ide-scsi", - .bus = &ide_bus_type, - }, - .probe = ide_scsi_probe, - .remove = ide_scsi_remove, - .version = IDESCSI_VERSION, - .do_request = idescsi_do_request, - .end_request = idescsi_end_request, - .error = idescsi_atapi_error, -#ifdef CONFIG_IDE_PROC_FS - .proc_entries = ide_scsi_proc_entries, - .proc_devsets = ide_scsi_proc_devsets, -#endif -}; - -static int idescsi_ide_open(struct block_device *bdev, fmode_t mode) -{ - struct ide_scsi_obj *scsi = ide_scsi_get(bdev->bd_disk); - - if (!scsi) - return -ENXIO; - - return 0; -} - -static int idescsi_ide_release(struct gendisk *disk, fmode_t mode) -{ - ide_scsi_put(ide_scsi_g(disk)); - return 0; -} - -static int idescsi_ide_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct ide_scsi_obj *scsi = ide_scsi_g(bdev->bd_disk); - return generic_ide_ioctl(scsi->drive, bdev, cmd, arg); -} - -static struct block_device_operations idescsi_ops = { - .owner = THIS_MODULE, - .open = idescsi_ide_open, - .release = idescsi_ide_release, - .locked_ioctl = idescsi_ide_ioctl, -}; - -static int idescsi_slave_configure(struct scsi_device * sdp) -{ - /* Configure detected device */ - sdp->use_10_for_rw = 1; - sdp->use_10_for_ms = 1; - scsi_adjust_queue_depth(sdp, MSG_SIMPLE_TAG, sdp->host->cmd_per_lun); - return 0; -} - -static const char *idescsi_info (struct Scsi_Host *host) -{ - return "SCSI host adapter emulation for IDE ATAPI devices"; -} - -static int idescsi_ioctl (struct scsi_device *dev, int cmd, void __user *arg) -{ - idescsi_scsi_t *scsi = scsihost_to_idescsi(dev->host); - - if (cmd == SG_SET_TRANSFORM) { - if (arg) - set_bit(IDESCSI_SG_TRANSFORM, &scsi->transform); - else - clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform); - return 0; - } else if (cmd == SG_GET_TRANSFORM) - return put_user(test_bit(IDESCSI_SG_TRANSFORM, &scsi->transform), (int __user *) arg); - return -EINVAL; -} - -static int idescsi_queue (struct scsi_cmnd *cmd, - void (*done)(struct scsi_cmnd *)) -{ - struct Scsi_Host *host = cmd->device->host; - idescsi_scsi_t *scsi = scsihost_to_idescsi(host); - ide_drive_t *drive = scsi->drive; - struct request *rq = NULL; - struct ide_atapi_pc *pc = NULL; - int write = cmd->sc_data_direction == DMA_TO_DEVICE; - - if (!drive) { - scmd_printk (KERN_ERR, cmd, "drive not present\n"); - goto abort; - } - scsi = drive_to_idescsi(drive); - pc = kmalloc(sizeof(struct ide_atapi_pc), GFP_ATOMIC); - rq = blk_get_request(drive->queue, write, GFP_ATOMIC); - if (rq == NULL || pc == NULL) { - printk (KERN_ERR "ide-scsi: %s: out of memory\n", drive->name); - goto abort; - } - - memset (pc->c, 0, 12); - pc->flags = 0; - if (cmd->sc_data_direction == DMA_TO_DEVICE) - pc->flags |= PC_FLAG_WRITING; - pc->rq = rq; - memcpy (pc->c, cmd->cmnd, cmd->cmd_len); - pc->buf = NULL; - pc->sg = scsi_sglist(cmd); - pc->sg_cnt = scsi_sg_count(cmd); - pc->b_count = 0; - pc->req_xfer = pc->buf_size = scsi_bufflen(cmd); - pc->scsi_cmd = cmd; - pc->done = done; - pc->timeout = jiffies + cmd->request->timeout; - - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) { - printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number); - ide_scsi_hex_dump(cmd->cmnd, cmd->cmd_len); - if (memcmp(pc->c, cmd->cmnd, cmd->cmd_len)) { - printk ("ide-scsi: %s: que %lu, tsl = ", drive->name, cmd->serial_number); - ide_scsi_hex_dump(pc->c, 12); - } - } - - rq->special = (char *) pc; - rq->cmd_type = REQ_TYPE_SPECIAL; - spin_unlock_irq(host->host_lock); - rq->ref_count++; - memcpy(rq->cmd, pc->c, 12); - blk_execute_rq_nowait(drive->queue, scsi->disk, rq, 0, NULL); - spin_lock_irq(host->host_lock); - return 0; -abort: - kfree (pc); - if (rq) - blk_put_request(rq); - cmd->result = DID_ERROR << 16; - done(cmd); - return 0; -} - -static int idescsi_eh_abort (struct scsi_cmnd *cmd) -{ - idescsi_scsi_t *scsi = scsihost_to_idescsi(cmd->device->host); - ide_drive_t *drive = scsi->drive; - ide_hwif_t *hwif; - ide_hwgroup_t *hwgroup; - int busy; - int ret = FAILED; - - struct ide_atapi_pc *pc; - - /* In idescsi_eh_abort we try to gently pry our command from the ide subsystem */ - - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) - printk (KERN_WARNING "ide-scsi: abort called for %lu\n", cmd->serial_number); - - if (!drive) { - printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_abort\n"); - WARN_ON(1); - goto no_drive; - } - - hwif = drive->hwif; - hwgroup = hwif->hwgroup; - - /* First give it some more time, how much is "right" is hard to say :-( - FIXME - uses mdelay which causes latency? */ - busy = ide_wait_not_busy(hwif, 100); - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) - printk (KERN_WARNING "ide-scsi: drive did%s become ready\n", busy?" not":""); - - spin_lock_irq(&hwgroup->lock); - - /* If there is no pc running we're done (our interrupt took care of it) */ - pc = drive->pc; - if (pc == NULL) { - ret = SUCCESS; - goto ide_unlock; - } - - /* It's somewhere in flight. Does ide subsystem agree? */ - if (pc->scsi_cmd->serial_number == cmd->serial_number && !busy && - elv_queue_empty(drive->queue) && HWGROUP(drive)->rq != pc->rq) { - /* - * FIXME - not sure this condition can ever occur - */ - printk (KERN_ERR "ide-scsi: cmd aborted!\n"); - - if (blk_sense_request(pc->rq)) - kfree(pc->buf); - /* we need to call blk_put_request twice. */ - blk_put_request(pc->rq); - blk_put_request(pc->rq); - kfree(pc); - drive->pc = NULL; - - ret = SUCCESS; - } - -ide_unlock: - spin_unlock_irq(&hwgroup->lock); -no_drive: - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) - printk (KERN_WARNING "ide-scsi: abort returns %s\n", ret == SUCCESS?"success":"failed"); - - return ret; -} - -static int idescsi_eh_reset (struct scsi_cmnd *cmd) -{ - struct request *req; - idescsi_scsi_t *scsi = scsihost_to_idescsi(cmd->device->host); - ide_drive_t *drive = scsi->drive; - ide_hwgroup_t *hwgroup; - int ready = 0; - int ret = SUCCESS; - - struct ide_atapi_pc *pc; - - /* In idescsi_eh_reset we forcefully remove the command from the ide subsystem and reset the device. */ - - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) - printk (KERN_WARNING "ide-scsi: reset called for %lu\n", cmd->serial_number); - - if (!drive) { - printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_reset\n"); - WARN_ON(1); - return FAILED; - } - - hwgroup = drive->hwif->hwgroup; - - spin_lock_irq(cmd->device->host->host_lock); - spin_lock(&hwgroup->lock); - - pc = drive->pc; - if (pc) - req = pc->rq; - - if (pc == NULL || req != hwgroup->rq || hwgroup->handler == NULL) { - printk (KERN_WARNING "ide-scsi: No active request in idescsi_eh_reset\n"); - spin_unlock(&hwgroup->lock); - spin_unlock_irq(cmd->device->host->host_lock); - return FAILED; - } - - /* kill current request */ - if (__blk_end_request(req, -EIO, 0)) - BUG(); - if (blk_sense_request(req)) - kfree(pc->buf); - kfree(pc); - drive->pc = NULL; - blk_put_request(req); - - /* now nuke the drive queue */ - while ((req = elv_next_request(drive->queue))) { - if (__blk_end_request(req, -EIO, 0)) - BUG(); - } - - hwgroup->rq = NULL; - hwgroup->handler = NULL; - hwgroup->busy = 1; /* will set this to zero when ide reset finished */ - spin_unlock(&hwgroup->lock); - - ide_do_reset(drive); - - /* ide_do_reset starts a polling handler which restarts itself every 50ms until the reset finishes */ - - do { - spin_unlock_irq(cmd->device->host->host_lock); - msleep(50); - spin_lock_irq(cmd->device->host->host_lock); - } while ( HWGROUP(drive)->handler ); - - ready = drive_is_ready(drive); - HWGROUP(drive)->busy--; - if (!ready) { - printk (KERN_ERR "ide-scsi: reset failed!\n"); - ret = FAILED; - } - - spin_unlock_irq(cmd->device->host->host_lock); - return ret; -} - -static int idescsi_bios(struct scsi_device *sdev, struct block_device *bdev, - sector_t capacity, int *parm) -{ - idescsi_scsi_t *idescsi = scsihost_to_idescsi(sdev->host); - ide_drive_t *drive = idescsi->drive; - - if (drive->bios_cyl && drive->bios_head && drive->bios_sect) { - parm[0] = drive->bios_head; - parm[1] = drive->bios_sect; - parm[2] = drive->bios_cyl; - } - return 0; -} - -static struct scsi_host_template idescsi_template = { - .module = THIS_MODULE, - .name = "idescsi", - .info = idescsi_info, - .slave_configure = idescsi_slave_configure, - .ioctl = idescsi_ioctl, - .queuecommand = idescsi_queue, - .eh_abort_handler = idescsi_eh_abort, - .eh_host_reset_handler = idescsi_eh_reset, - .bios_param = idescsi_bios, - .can_queue = 40, - .this_id = -1, - .sg_tablesize = 256, - .cmd_per_lun = 5, - .max_sectors = 128, - .use_clustering = DISABLE_CLUSTERING, - .emulated = 1, - .proc_name = "ide-scsi", -}; - -static int ide_scsi_probe(ide_drive_t *drive) -{ - idescsi_scsi_t *idescsi; - struct Scsi_Host *host; - struct gendisk *g; - static int warned; - int err = -ENOMEM; - u16 last_lun; - - if (!warned && drive->media == ide_cdrom) { - printk(KERN_WARNING "ide-scsi is deprecated for cd burning! Use ide-cd and give dev=/dev/hdX as device\n"); - warned = 1; - } - - if (idescsi_nocd && drive->media == ide_cdrom) - return -ENODEV; - - if (!strstr("ide-scsi", drive->driver_req) || - drive->media == ide_disk || - !(host = scsi_host_alloc(&idescsi_template,sizeof(idescsi_scsi_t)))) - return -ENODEV; - - drive->dev_flags |= IDE_DFLAG_SCSI; - - g = alloc_disk(1 << PARTN_BITS); - if (!g) - goto out_host_put; - - ide_init_disk(g, drive); - - host->max_id = 1; - - last_lun = drive->id[ATA_ID_LAST_LUN]; - if (last_lun) - debug_log("%s: last_lun=%u\n", drive->name, last_lun); - - if ((last_lun & 7) != 7) - host->max_lun = (last_lun & 7) + 1; - else - host->max_lun = 1; - - drive->driver_data = host; - idescsi = scsihost_to_idescsi(host); - idescsi->drive = drive; - idescsi->driver = &idescsi_driver; - idescsi->host = host; - idescsi->disk = g; - g->private_data = &idescsi->driver; - err = 0; - idescsi_setup(drive, idescsi); - g->fops = &idescsi_ops; - ide_register_region(g); - err = scsi_add_host(host, &drive->gendev); - if (!err) { - scsi_scan_host(host); - return 0; - } - /* fall through on error */ - ide_unregister_region(g); - ide_proc_unregister_driver(drive, &idescsi_driver); - - put_disk(g); -out_host_put: - drive->dev_flags &= ~IDE_DFLAG_SCSI; - scsi_host_put(host); - return err; -} - -static int __init init_idescsi_module(void) -{ - return driver_register(&idescsi_driver.gen_driver); -} - -static void __exit exit_idescsi_module(void) -{ - driver_unregister(&idescsi_driver.gen_driver); -} - -module_param(idescsi_nocd, int, 0600); -MODULE_PARM_DESC(idescsi_nocd, "Disable handling of CD-ROMs so they may be driven by ide-cd"); -module_init(init_idescsi_module); -module_exit(exit_idescsi_module); -MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From 784c4d8b1b1e66f8c45e8b889613f4982f525b2b Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Fri, 2 Jan 2009 13:42:34 +0000 Subject: Document usage of multiple-instances of devpts Changelog [v2]: - Add note indicating strict isolation is not possible unless all mounts of devpts use the 'newinstance' mount option. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- Documentation/filesystems/devpts.txt | 132 +++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 Documentation/filesystems/devpts.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt new file mode 100644 index 00000000000..68dffd87f9b --- /dev/null +++ b/Documentation/filesystems/devpts.txt @@ -0,0 +1,132 @@ + +To support containers, we now allow multiple instances of devpts filesystem, +such that indices of ptys allocated in one instance are independent of indices +allocated in other instances of devpts. + +To preserve backward compatibility, this support for multiple instances is +enabled only if: + + - CONFIG_DEVPTS_MULTIPLE_INSTANCES=y, and + - '-o newinstance' mount option is specified while mounting devpts + +IOW, devpts now supports both single-instance and multi-instance semantics. + +If CONFIG_DEVPTS_MULTIPLE_INSTANCES=n, there is no change in behavior and +this referred to as the "legacy" mode. In this mode, the new mount options +(-o newinstance and -o ptmxmode) will be ignored with a 'bogus option' message +on console. + +If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and devpts is mounted without the +'newinstance' option (as in current start-up scripts) the new mount binds +to the initial kernel mount of devpts. This mode is referred to as the +'single-instance' mode and the current, single-instance semantics are +preserved, i.e PTYs are common across the system. + +The only difference between this single-instance mode and the legacy mode +is the presence of new, '/dev/pts/ptmx' node with permissions 0000, which +can safely be ignored. + +If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and 'newinstance' option is specified, +the mount is considered to be in the multi-instance mode and a new instance +of the devpts fs is created. Any ptys created in this instance are independent +of ptys in other instances of devpts. Like in the single-instance mode, the +/dev/pts/ptmx node is present. To effectively use the multi-instance mode, +open of /dev/ptmx must be a redirected to '/dev/pts/ptmx' using a symlink or +bind-mount. + +Eg: A container startup script could do the following: + + $ chmod 0666 /dev/pts/ptmx + $ rm /dev/ptmx + $ ln -s pts/ptmx /dev/ptmx + $ ns_exec -cm /bin/bash + + # We are now in new container + + $ umount /dev/pts + $ mount -t devpts -o newinstance lxcpts /dev/pts + $ sshd -p 1234 + +where 'ns_exec -cm /bin/bash' calls clone() with CLONE_NEWNS flag and execs +/bin/bash in the child process. A pty created by the sshd is not visible in +the original mount of /dev/pts. + +User-space changes +------------------ + +In multi-instance mode (i.e '-o newinstance' mount option is specified at least +once), following user-space issues should be noted. + +1. If -o newinstance mount option is never used, /dev/pts/ptmx can be ignored + and no change is needed to system-startup scripts. + +2. To effectively use multi-instance mode (i.e -o newinstance is specified) + administrators or startup scripts should "redirect" open of /dev/ptmx to + /dev/pts/ptmx using either a bind mount or symlink. + + $ mount -t devpts -o newinstance devpts /dev/pts + + followed by either + + $ rm /dev/ptmx + $ ln -s pts/ptmx /dev/ptmx + $ chmod 666 /dev/pts/ptmx + or + $ mount -o bind /dev/pts/ptmx /dev/ptmx + +3. The '/dev/ptmx -> pts/ptmx' symlink is the preferred method since it + enables better error-reporting and treats both single-instance and + multi-instance mounts similarly. + + But this method requires that system-startup scripts set the mode of + /dev/pts/ptmx correctly (default mode is 0000). The scripts can set the + mode by, either + + - adding ptmxmode mount option to devpts entry in /etc/fstab, or + - using 'chmod 0666 /dev/pts/ptmx' + +4. If multi-instance mode mount is needed for containers, but the system + startup scripts have not yet been updated, container-startup scripts + should bind mount /dev/ptmx to /dev/pts/ptmx to avoid breaking single- + instance mounts. + + Or, in general, container-startup scripts should use: + + mount -t devpts -o newinstance -o ptmxmode=0666 devpts /dev/pts + if [ ! -L /dev/ptmx ]; then + mount -o bind /dev/pts/ptmx /dev/ptmx + fi + + When all devpts mounts are multi-instance, /dev/ptmx can permanently be + a symlink to pts/ptmx and the bind mount can be ignored. + +5. A multi-instance mount that is not accompanied by the /dev/ptmx to + /dev/pts/ptmx redirection would result in an unusable/unreachable pty. + + mount -t devpts -o newinstance lxcpts /dev/pts + + immediately followed by: + + open("/dev/ptmx") + + would create a pty, say /dev/pts/7, in the initial kernel mount. + But /dev/pts/7 would be invisible in the new mount. + +6. The permissions for /dev/pts/ptmx node should be specified when mounting + /dev/pts, using the '-o ptmxmode=%o' mount option (default is 0000). + + mount -t devpts -o newinstance -o ptmxmode=0644 devpts /dev/pts + + The permissions can be later be changed as usual with 'chmod'. + + chmod 666 /dev/pts/ptmx + +7. A mount of devpts without the 'newinstance' option results in binding to + initial kernel mount. This behavior while preserving legacy semantics, + does not provide strict isolation in a container environment. i.e by + mounting devpts without the 'newinstance' option, a container could + get visibility into the 'host' or root container's devpts. + + To workaround this and have strict isolation, all mounts of devpts, + including the mount in the root container, should use the newinstance + option. -- cgit v1.2.3-70-g09d2 From 50a2a8b35edec09aff900a9b1c629776e11c5c88 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 22 Dec 2008 09:13:11 -0300 Subject: V4L/DVB (10133): v4l2-framework: use correct comment style. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/v4l2-framework.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt index eeae76c22a9..ba9344294d6 100644 --- a/Documentation/video4linux/v4l2-framework.txt +++ b/Documentation/video4linux/v4l2-framework.txt @@ -410,7 +410,7 @@ for you. err = video_register_device(vdev, VFL_TYPE_GRABBER, -1); if (err) { - video_device_release(vdev); // or kfree(my_vdev); + video_device_release(vdev); /* or kfree(my_vdev); */ return err; } -- cgit v1.2.3-70-g09d2 From dfa9a5ae679ff2d23caa995d0f55a19abaf0596e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 23 Dec 2008 12:17:23 -0300 Subject: V4L/DVB (10134): v4l2 doc: set v4l2_dev instead of parent. Update the documentation now that the v4l2_dev field is in. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/v4l2-framework.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt index ba9344294d6..38d054aa0e0 100644 --- a/Documentation/video4linux/v4l2-framework.txt +++ b/Documentation/video4linux/v4l2-framework.txt @@ -390,8 +390,7 @@ allocated memory. You should also set these fields: -- parent: set to the parent device (same device as was used to register - v4l2_device). +- v4l2_dev: set to the v4l2_device parent device. - name: set to something descriptive and unique. - fops: set to the file_operations struct. - ioctl_ops: if you use the v4l2_ioctl_ops to simplify ioctl maintenance @@ -516,5 +515,4 @@ void *video_drvdata(struct file *file); You can go from a video_device struct to the v4l2_device struct using: -struct v4l2_device *v4l2_dev = dev_get_drvdata(vdev->parent); - +struct v4l2_device *v4l2_dev = vdev->v4l2_dev; -- cgit v1.2.3-70-g09d2 From c7dd09dabc278b03980c8e93d0eee3843b5ad514 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 23 Dec 2008 13:42:25 -0300 Subject: V4L/DVB (10136): v4l2 doc: update v4l2-framework.txt Mention the new v4l2_file_operations struct. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/v4l2-framework.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt index 38d054aa0e0..3b483c1e012 100644 --- a/Documentation/video4linux/v4l2-framework.txt +++ b/Documentation/video4linux/v4l2-framework.txt @@ -392,13 +392,16 @@ You should also set these fields: - v4l2_dev: set to the v4l2_device parent device. - name: set to something descriptive and unique. -- fops: set to the file_operations struct. +- fops: set to the v4l2_file_operations struct. - ioctl_ops: if you use the v4l2_ioctl_ops to simplify ioctl maintenance (highly recommended to use this and it might become compulsory in the future!), then set this to your v4l2_ioctl_ops struct. -If you use v4l2_ioctl_ops, then you should set .unlocked_ioctl to -__video_ioctl2 or .ioctl to video_ioctl2 in your file_operations struct. +If you use v4l2_ioctl_ops, then you should set either .unlocked_ioctl or +.ioctl to video_ioctl2 in your v4l2_file_operations struct. + +The v4l2_file_operations struct is a subset of file_operations. The main +difference is that the inode argument is omitted since it is never used. video_device registration -- cgit v1.2.3-70-g09d2 From aecde8b53b8ee1330a5a8206200f0d6b8845a6e0 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 30 Dec 2008 07:14:19 -0300 Subject: V4L/DVB (10141): v4l2: debugging API changed to match against driver name instead of ID. Since the i2c driver ID will be removed in the near future we have to modify the v4l2 debugging API to use the driver name instead of driver ID. Note that this API is not used in applications other than v4l2-dbg.cpp as it is for debugging and testing only. Should anyone use the old VIDIOC_G_CHIP_IDENT, then this will be logged with a warning that it is deprecated and will be removed in 2.6.30. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/v4l2-framework.txt | 2 +- drivers/media/video/bt8xx/bttv-driver.c | 9 +++-- drivers/media/video/cafe_ccic.c | 7 ++-- drivers/media/video/cs5345.c | 13 +++---- drivers/media/video/cs53l32a.c | 2 +- drivers/media/video/cx18/cx18-i2c.c | 28 +------------- drivers/media/video/cx18/cx18-i2c.h | 1 - drivers/media/video/cx18/cx18-ioctl.c | 41 ++++++++------------ drivers/media/video/cx23885/cx23885-video.c | 8 ++-- drivers/media/video/cx25840/cx25840-core.c | 13 +++---- drivers/media/video/cx88/cx88-video.c | 13 ++++--- drivers/media/video/em28xx/em28xx-video.c | 28 +++++++------- drivers/media/video/ivtv/ivtv-driver.c | 7 ++-- drivers/media/video/ivtv/ivtv-ioctl.c | 21 +++++----- drivers/media/video/m52790.c | 13 +++---- drivers/media/video/msp3400-driver.c | 2 +- drivers/media/video/mt9m001.c | 19 ++++----- drivers/media/video/mt9m111.c | 19 ++++----- drivers/media/video/mt9t031.c | 18 ++++----- drivers/media/video/mt9v022.c | 19 ++++----- drivers/media/video/ov7670.c | 2 +- drivers/media/video/ov772x.c | 7 ++-- drivers/media/video/pvrusb2/pvrusb2-hdw.c | 11 +++--- drivers/media/video/pvrusb2/pvrusb2-hdw.h | 4 +- drivers/media/video/pvrusb2/pvrusb2-v4l2.c | 6 +-- drivers/media/video/saa7115.c | 13 +++---- drivers/media/video/saa7127.c | 13 +++---- drivers/media/video/saa7134/saa6752hs.c | 2 +- drivers/media/video/saa7134/saa7134-empress.c | 14 +++---- drivers/media/video/saa7134/saa7134-video.c | 9 +++-- drivers/media/video/saa717x.c | 9 +++-- drivers/media/video/soc_camera.c | 6 +-- drivers/media/video/tvaudio.c | 2 +- drivers/media/video/tvp5150.c | 13 +++---- drivers/media/video/tw9910.c | 6 +-- drivers/media/video/upd64031a.c | 13 +++---- drivers/media/video/upd64083.c | 13 +++---- drivers/media/video/usbvision/usbvision-video.c | 9 +++-- drivers/media/video/v4l2-common.c | 29 +++++++++----- drivers/media/video/v4l2-compat-ioctl32.c | 3 +- drivers/media/video/v4l2-ioctl.c | 15 +++++--- drivers/media/video/v4l2-subdev.c | 2 +- drivers/media/video/vp27smpx.c | 2 +- drivers/media/video/wm8739.c | 2 +- drivers/media/video/wm8775.c | 2 +- include/linux/videodev2.h | 51 ++++++++++++++++++------- include/media/soc_camera.h | 6 +-- include/media/v4l2-chip-ident.h | 4 +- include/media/v4l2-common.h | 6 +-- include/media/v4l2-int-device.h | 2 +- include/media/v4l2-ioctl.h | 6 +-- include/media/v4l2-subdev.h | 6 +-- 52 files changed, 290 insertions(+), 281 deletions(-) (limited to 'Documentation') diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt index 3b483c1e012..ff124374e9b 100644 --- a/Documentation/video4linux/v4l2-framework.txt +++ b/Documentation/video4linux/v4l2-framework.txt @@ -184,7 +184,7 @@ may be NULL if the subdev driver does not support anything from that category. It looks like this: struct v4l2_subdev_core_ops { - int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip); + int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip); int (*log_status)(struct v4l2_subdev *sd); int (*init)(struct v4l2_subdev *sd, u32 val); ... diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c index ebcb8e5e9c4..d2f43bd2f84 100644 --- a/drivers/media/video/bt8xx/bttv-driver.c +++ b/drivers/media/video/bt8xx/bttv-driver.c @@ -2039,7 +2039,7 @@ static int bttv_log_status(struct file *file, void *f) #ifdef CONFIG_VIDEO_ADV_DEBUG static int bttv_g_register(struct file *file, void *f, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct bttv_fh *fh = f; struct bttv *btv = fh->btv; @@ -2047,18 +2047,19 @@ static int bttv_g_register(struct file *file, void *f, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; /* bt848 has a 12-bit register space */ reg->reg &= 0xfff; reg->val = btread(reg->reg); + reg->size = 1; return 0; } static int bttv_s_register(struct file *file, void *f, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct bttv_fh *fh = f; struct bttv *btv = fh->btv; @@ -2066,7 +2067,7 @@ static int bttv_s_register(struct file *file, void *f, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; /* bt848 has a 12-bit register space */ diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c index 476171cf500..34a39d2e470 100644 --- a/drivers/media/video/cafe_ccic.c +++ b/drivers/media/video/cafe_ccic.c @@ -859,7 +859,7 @@ static int __cafe_cam_reset(struct cafe_camera *cam) */ static int cafe_cam_init(struct cafe_camera *cam) { - struct v4l2_chip_ident chip = { V4L2_CHIP_MATCH_I2C_ADDR, 0, 0, 0 }; + struct v4l2_dbg_chip_ident chip; int ret; mutex_lock(&cam->s_mutex); @@ -869,8 +869,9 @@ static int cafe_cam_init(struct cafe_camera *cam) ret = __cafe_cam_reset(cam); if (ret) goto out; - chip.match_chip = cam->sensor->addr; - ret = __cafe_cam_cmd(cam, VIDIOC_G_CHIP_IDENT, &chip); + chip.match.type = V4L2_CHIP_MATCH_I2C_ADDR; + chip.match.addr = cam->sensor->addr; + ret = __cafe_cam_cmd(cam, VIDIOC_DBG_G_CHIP_IDENT, &chip); if (ret) goto out; cam->sensor_type = chip.ident; diff --git a/drivers/media/video/cs5345.c b/drivers/media/video/cs5345.c index 70fcd0d5de1..14bebf8a116 100644 --- a/drivers/media/video/cs5345.c +++ b/drivers/media/video/cs5345.c @@ -95,25 +95,24 @@ static int cs5345_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl) } #ifdef CONFIG_VIDEO_ADV_DEBUG -static int cs5345_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int cs5345_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; + reg->size = 1; reg->val = cs5345_read(sd, reg->reg & 0x1f); return 0; } -static int cs5345_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int cs5345_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -122,7 +121,7 @@ static int cs5345_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) } #endif -static int cs5345_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int cs5345_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/drivers/media/video/cs53l32a.c b/drivers/media/video/cs53l32a.c index cb65d519cf7..7292a6316e6 100644 --- a/drivers/media/video/cs53l32a.c +++ b/drivers/media/video/cs53l32a.c @@ -102,7 +102,7 @@ static int cs53l32a_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl) return 0; } -static int cs53l32a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int cs53l32a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/drivers/media/video/cx18/cx18-i2c.c b/drivers/media/video/cx18/cx18-i2c.c index 8941f58bed7..83e1c633312 100644 --- a/drivers/media/video/cx18/cx18-i2c.c +++ b/drivers/media/video/cx18/cx18-i2c.c @@ -242,7 +242,7 @@ int cx18_call_i2c_client(struct cx18 *cx, int addr, unsigned cmd, void *arg) return retval; } } - if (cmd != VIDIOC_G_CHIP_IDENT) + if (cmd != VIDIOC_DBG_G_CHIP_IDENT) CX18_ERR("i2c addr 0x%02x not found for cmd 0x%x!\n", addr, cmd); return -ENODEV; @@ -268,17 +268,6 @@ static int cx18_i2c_id_addr(struct cx18 *cx, u32 id) return retval; } -/* Find the i2c device name matching the DRIVERID */ -static const char *cx18_i2c_id_name(u32 id) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(hw_driverids); i++) - if (hw_driverids[i] == id) - return hw_devicenames[i]; - return "unknown device"; -} - /* Find the i2c device name matching the CX18_HW_ flag */ static const char *cx18_i2c_hw_name(u32 hw) { @@ -326,21 +315,6 @@ int cx18_i2c_hw(struct cx18 *cx, u32 hw, unsigned int cmd, void *arg) return cx18_call_i2c_client(cx, addr, cmd, arg); } -/* Calls i2c device based on I2C driver ID. */ -int cx18_i2c_id(struct cx18 *cx, u32 id, unsigned int cmd, void *arg) -{ - int addr; - - addr = cx18_i2c_id_addr(cx, id); - if (addr < 0) { - if (cmd != VIDIOC_G_CHIP_IDENT) - CX18_ERR("i2c ID 0x%08x (%s) not found for cmd 0x%x!\n", - id, cx18_i2c_id_name(id), cmd); - return addr; - } - return cx18_call_i2c_client(cx, addr, cmd, arg); -} - /* broadcast cmd for all I2C clients and for the gpio subsystem */ void cx18_call_i2c_clients(struct cx18 *cx, unsigned int cmd, void *arg) { diff --git a/drivers/media/video/cx18/cx18-i2c.h b/drivers/media/video/cx18/cx18-i2c.h index 113c3f9a2cc..4869739013b 100644 --- a/drivers/media/video/cx18/cx18-i2c.h +++ b/drivers/media/video/cx18/cx18-i2c.h @@ -23,7 +23,6 @@ int cx18_i2c_hw_addr(struct cx18 *cx, u32 hw); int cx18_i2c_hw(struct cx18 *cx, u32 hw, unsigned int cmd, void *arg); -int cx18_i2c_id(struct cx18 *cx, u32 id, unsigned int cmd, void *arg); int cx18_call_i2c_client(struct cx18 *cx, int addr, unsigned cmd, void *arg); void cx18_call_i2c_clients(struct cx18 *cx, unsigned int cmd, void *arg); int cx18_i2c_register(struct cx18 *cx, unsigned idx); diff --git a/drivers/media/video/cx18/cx18-ioctl.c b/drivers/media/video/cx18/cx18-ioctl.c index 8aa152b3954..7086aaba77d 100644 --- a/drivers/media/video/cx18/cx18-ioctl.c +++ b/drivers/media/video/cx18/cx18-ioctl.c @@ -254,30 +254,24 @@ static int cx18_s_fmt_sliced_vbi_cap(struct file *file, void *fh, } static int cx18_g_chip_ident(struct file *file, void *fh, - struct v4l2_chip_ident *chip) + struct v4l2_dbg_chip_ident *chip) { struct cx18 *cx = ((struct cx18_open_id *)fh)->cx; chip->ident = V4L2_IDENT_NONE; chip->revision = 0; - if (chip->match_type == V4L2_CHIP_MATCH_HOST) { - if (v4l2_chip_match_host(chip->match_type, chip->match_chip)) - chip->ident = V4L2_IDENT_CX23418; + if (v4l2_chip_match_host(&chip->match)) { + chip->ident = V4L2_IDENT_CX23418; return 0; } - if (chip->match_type == V4L2_CHIP_MATCH_I2C_DRIVER) - return cx18_i2c_id(cx, chip->match_chip, VIDIOC_G_CHIP_IDENT, - chip); - if (chip->match_type == V4L2_CHIP_MATCH_I2C_ADDR) - return cx18_call_i2c_client(cx, chip->match_chip, - VIDIOC_G_CHIP_IDENT, chip); - return -EINVAL; + cx18_call_i2c_clients(cx, VIDIOC_DBG_G_CHIP_IDENT, chip); + return 0; } #ifdef CONFIG_VIDEO_ADV_DEBUG static int cx18_cxc(struct cx18 *cx, unsigned int cmd, void *arg) { - struct v4l2_register *regs = arg; + struct v4l2_dbg_register *regs = arg; unsigned long flags; if (!capable(CAP_SYS_ADMIN)) @@ -286,6 +280,7 @@ static int cx18_cxc(struct cx18 *cx, unsigned int cmd, void *arg) return -EINVAL; spin_lock_irqsave(&cx18_cards_lock, flags); + regs->size = 4; if (cmd == VIDIOC_DBG_G_REGISTER) regs->val = cx18_read_enc(cx, regs->reg); else @@ -295,31 +290,25 @@ static int cx18_cxc(struct cx18 *cx, unsigned int cmd, void *arg) } static int cx18_g_register(struct file *file, void *fh, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct cx18 *cx = ((struct cx18_open_id *)fh)->cx; - if (v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (v4l2_chip_match_host(®->match)) return cx18_cxc(cx, VIDIOC_DBG_G_REGISTER, reg); - if (reg->match_type == V4L2_CHIP_MATCH_I2C_DRIVER) - return cx18_i2c_id(cx, reg->match_chip, VIDIOC_DBG_G_REGISTER, - reg); - return cx18_call_i2c_client(cx, reg->match_chip, VIDIOC_DBG_G_REGISTER, - reg); + cx18_call_i2c_clients(cx, VIDIOC_DBG_G_REGISTER, reg); + return 0; } static int cx18_s_register(struct file *file, void *fh, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct cx18 *cx = ((struct cx18_open_id *)fh)->cx; - if (v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (v4l2_chip_match_host(®->match)) return cx18_cxc(cx, VIDIOC_DBG_S_REGISTER, reg); - if (reg->match_type == V4L2_CHIP_MATCH_I2C_DRIVER) - return cx18_i2c_id(cx, reg->match_chip, VIDIOC_DBG_S_REGISTER, - reg); - return cx18_call_i2c_client(cx, reg->match_chip, VIDIOC_DBG_S_REGISTER, - reg); + cx18_call_i2c_clients(cx, VIDIOC_DBG_S_REGISTER, reg); + return 0; } #endif diff --git a/drivers/media/video/cx23885/cx23885-video.c b/drivers/media/video/cx23885/cx23885-video.c index 637c4d00884..2d81c4d0434 100644 --- a/drivers/media/video/cx23885/cx23885-video.c +++ b/drivers/media/video/cx23885/cx23885-video.c @@ -1326,11 +1326,11 @@ static int vidioc_s_frequency(struct file *file, void *priv, #ifdef CONFIG_VIDEO_ADV_DEBUG static int vidioc_g_register(struct file *file, void *fh, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct cx23885_dev *dev = ((struct cx23885_fh *)fh)->dev; - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; cx23885_call_i2c_clients(&dev->i2c_bus[2], VIDIOC_DBG_G_REGISTER, reg); @@ -1339,11 +1339,11 @@ static int vidioc_g_register(struct file *file, void *fh, } static int vidioc_s_register(struct file *file, void *fh, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct cx23885_dev *dev = ((struct cx23885_fh *)fh)->dev; - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; cx23885_call_i2c_clients(&dev->i2c_bus[2], VIDIOC_DBG_S_REGISTER, reg); diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c index 2ad277189da..88f2fd32bfe 100644 --- a/drivers/media/video/cx25840/cx25840-core.c +++ b/drivers/media/video/cx25840/cx25840-core.c @@ -1120,25 +1120,24 @@ static int cx25840_init(struct v4l2_subdev *sd, u32 val) } #ifdef CONFIG_VIDEO_ADV_DEBUG -static int cx25840_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int cx25840_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; + reg->size = 1; reg->val = cx25840_read(client, reg->reg & 0x0fff); return 0; } -static int cx25840_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int cx25840_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1362,7 +1361,7 @@ static int cx25840_reset(struct v4l2_subdev *sd, u32 val) return 0; } -static int cx25840_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int cx25840_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c index b93b7ab99d8..791e69d804f 100644 --- a/drivers/media/video/cx88/cx88-video.c +++ b/drivers/media/video/cx88/cx88-video.c @@ -1447,25 +1447,26 @@ static int vidioc_s_frequency (struct file *file, void *priv, #ifdef CONFIG_VIDEO_ADV_DEBUG static int vidioc_g_register (struct file *file, void *fh, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct cx88_core *core = ((struct cx8800_fh*)fh)->dev->core; - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; /* cx2388x has a 24-bit register space */ - reg->val = cx_read(reg->reg&0xffffff); + reg->val = cx_read(reg->reg & 0xffffff); + reg->size = 4; return 0; } static int vidioc_s_register (struct file *file, void *fh, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct cx88_core *core = ((struct cx8800_fh*)fh)->dev->core; - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; - cx_write(reg->reg&0xffffff, reg->val); + cx_write(reg->reg & 0xffffff, reg->val); return 0; } #endif diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c index 9cb7c64a88f..416b691c33c 100644 --- a/drivers/media/video/em28xx/em28xx-video.c +++ b/drivers/media/video/em28xx/em28xx-video.c @@ -1154,7 +1154,7 @@ static int em28xx_reg_len(int reg) } static int vidioc_g_chip_ident(struct file *file, void *priv, - struct v4l2_chip_ident *chip) + struct v4l2_dbg_chip_ident *chip) { struct em28xx_fh *fh = priv; struct em28xx *dev = fh->dev; @@ -1162,20 +1162,20 @@ static int vidioc_g_chip_ident(struct file *file, void *priv, chip->ident = V4L2_IDENT_NONE; chip->revision = 0; - em28xx_i2c_call_clients(dev, VIDIOC_G_CHIP_IDENT, chip); + em28xx_i2c_call_clients(dev, VIDIOC_DBG_G_CHIP_IDENT, chip); return 0; } static int vidioc_g_register(struct file *file, void *priv, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct em28xx_fh *fh = priv; struct em28xx *dev = fh->dev; int ret; - switch (reg->match_type) { + switch (reg->match.type) { case V4L2_CHIP_MATCH_AC97: mutex_lock(&dev->lock); ret = em28xx_read_ac97(dev, reg->reg); @@ -1184,6 +1184,7 @@ static int vidioc_g_register(struct file *file, void *priv, return ret; reg->val = ret; + reg->size = 1; return 0; case V4L2_CHIP_MATCH_I2C_DRIVER: em28xx_i2c_call_clients(dev, VIDIOC_DBG_G_REGISTER, reg); @@ -1192,12 +1193,13 @@ static int vidioc_g_register(struct file *file, void *priv, /* Not supported yet */ return -EINVAL; default: - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; } /* Match host */ - if (em28xx_reg_len(reg->reg) == 1) { + reg->size = em28xx_reg_len(reg->reg); + if (reg->size == 1) { mutex_lock(&dev->lock); ret = em28xx_read_reg(dev, reg->reg); mutex_unlock(&dev->lock); @@ -1207,7 +1209,7 @@ static int vidioc_g_register(struct file *file, void *priv, reg->val = ret; } else { - __le64 val = 0; + __le16 val = 0; mutex_lock(&dev->lock); ret = em28xx_read_reg_req_len(dev, USB_REQ_GET_STATUS, reg->reg, (char *)&val, 2); @@ -1215,21 +1217,21 @@ static int vidioc_g_register(struct file *file, void *priv, if (ret < 0) return ret; - reg->val = le64_to_cpu(val); + reg->val = le16_to_cpu(val); } return 0; } static int vidioc_s_register(struct file *file, void *priv, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct em28xx_fh *fh = priv; struct em28xx *dev = fh->dev; - __le64 buf; + __le16 buf; int rc; - switch (reg->match_type) { + switch (reg->match.type) { case V4L2_CHIP_MATCH_AC97: mutex_lock(&dev->lock); rc = em28xx_write_ac97(dev, reg->reg, reg->val); @@ -1243,12 +1245,12 @@ static int vidioc_s_register(struct file *file, void *priv, /* Not supported yet */ return -EINVAL; default: - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; } /* Match host */ - buf = cpu_to_le64(reg->val); + buf = cpu_to_le16(reg->val); mutex_lock(&dev->lock); rc = em28xx_write_regs(dev, reg->reg, (char *)&buf, diff --git a/drivers/media/video/ivtv/ivtv-driver.c b/drivers/media/video/ivtv/ivtv-driver.c index 08b76295175..e8e5921cdc3 100644 --- a/drivers/media/video/ivtv/ivtv-driver.c +++ b/drivers/media/video/ivtv/ivtv-driver.c @@ -902,18 +902,19 @@ static void ivtv_load_and_init_modules(struct ivtv *itv) } if (hw & IVTV_HW_SAA711X) { - struct v4l2_chip_ident v = { V4L2_CHIP_MATCH_I2C_DRIVER, I2C_DRIVERID_SAA711X }; + struct v4l2_dbg_chip_ident v; /* determine the exact saa711x model */ itv->hw_flags &= ~IVTV_HW_SAA711X; + v.match.type = V4L2_CHIP_MATCH_I2C_DRIVER; + strlcpy(v.match.name, "saa7115", sizeof(v.match.name)); ivtv_call_hw(itv, IVTV_HW_SAA711X, core, g_chip_ident, &v); if (v.ident == V4L2_IDENT_SAA7114) { itv->hw_flags |= IVTV_HW_SAA7114; /* VBI is not yet supported by the saa7114 driver. */ itv->v4l2_cap &= ~(V4L2_CAP_SLICED_VBI_CAPTURE|V4L2_CAP_VBI_CAPTURE); - } - else { + } else { itv->hw_flags |= IVTV_HW_SAA7115; } itv->vbi.raw_decoder_line_size = 1443; diff --git a/drivers/media/video/ivtv/ivtv-ioctl.c b/drivers/media/video/ivtv/ivtv-ioctl.c index 1f6ca93b984..f6b3ef6e691 100644 --- a/drivers/media/video/ivtv/ivtv-ioctl.c +++ b/drivers/media/video/ivtv/ivtv-ioctl.c @@ -674,19 +674,19 @@ static int ivtv_s_fmt_vid_out_overlay(struct file *file, void *fh, struct v4l2_f return ret; } -static int ivtv_g_chip_ident(struct file *file, void *fh, struct v4l2_chip_ident *chip) +static int ivtv_g_chip_ident(struct file *file, void *fh, struct v4l2_dbg_chip_ident *chip) { struct ivtv *itv = ((struct ivtv_open_id *)fh)->itv; chip->ident = V4L2_IDENT_NONE; chip->revision = 0; - if (chip->match_type == V4L2_CHIP_MATCH_HOST) { - if (v4l2_chip_match_host(chip->match_type, chip->match_chip)) + if (chip->match.type == V4L2_CHIP_MATCH_HOST) { + if (v4l2_chip_match_host(&chip->match)) chip->ident = itv->has_cx23415 ? V4L2_IDENT_CX23415 : V4L2_IDENT_CX23416; return 0; } - if (chip->match_type != V4L2_CHIP_MATCH_I2C_DRIVER && - chip->match_type != V4L2_CHIP_MATCH_I2C_ADDR) + if (chip->match.type != V4L2_CHIP_MATCH_I2C_DRIVER && + chip->match.type != V4L2_CHIP_MATCH_I2C_ADDR) return -EINVAL; /* TODO: is this correct? */ return ivtv_call_all_err(itv, core, g_chip_ident, chip); @@ -695,7 +695,7 @@ static int ivtv_g_chip_ident(struct file *file, void *fh, struct v4l2_chip_ident #ifdef CONFIG_VIDEO_ADV_DEBUG static int ivtv_itvc(struct ivtv *itv, unsigned int cmd, void *arg) { - struct v4l2_register *regs = arg; + struct v4l2_dbg_register *regs = arg; volatile u8 __iomem *reg_start; if (!capable(CAP_SYS_ADMIN)) @@ -710,6 +710,7 @@ static int ivtv_itvc(struct ivtv *itv, unsigned int cmd, void *arg) else return -EINVAL; + regs->size = 4; if (cmd == VIDIOC_DBG_G_REGISTER) regs->val = readl(regs->reg + reg_start); else @@ -717,11 +718,11 @@ static int ivtv_itvc(struct ivtv *itv, unsigned int cmd, void *arg) return 0; } -static int ivtv_g_register(struct file *file, void *fh, struct v4l2_register *reg) +static int ivtv_g_register(struct file *file, void *fh, struct v4l2_dbg_register *reg) { struct ivtv *itv = ((struct ivtv_open_id *)fh)->itv; - if (v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (v4l2_chip_match_host(®->match)) return ivtv_itvc(itv, VIDIOC_DBG_G_REGISTER, reg); /* TODO: subdev errors should not be ignored, this should become a subdev helper function. */ @@ -729,11 +730,11 @@ static int ivtv_g_register(struct file *file, void *fh, struct v4l2_register *re return 0; } -static int ivtv_s_register(struct file *file, void *fh, struct v4l2_register *reg) +static int ivtv_s_register(struct file *file, void *fh, struct v4l2_dbg_register *reg) { struct ivtv *itv = ((struct ivtv_open_id *)fh)->itv; - if (v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (v4l2_chip_match_host(®->match)) return ivtv_itvc(itv, VIDIOC_DBG_S_REGISTER, reg); /* TODO: subdev errors should not be ignored, this should become a subdev helper function. */ diff --git a/drivers/media/video/m52790.c b/drivers/media/video/m52790.c index 07be14a9fe7..de397ef57b4 100644 --- a/drivers/media/video/m52790.c +++ b/drivers/media/video/m52790.c @@ -80,29 +80,28 @@ static int m52790_s_routing(struct v4l2_subdev *sd, const struct v4l2_routing *r } #ifdef CONFIG_VIDEO_ADV_DEBUG -static int m52790_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int m52790_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct m52790_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (reg->reg != 0) return -EINVAL; + reg->size = 1; reg->val = state->input | state->output; return 0; } -static int m52790_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int m52790_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct m52790_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -115,7 +114,7 @@ static int m52790_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) } #endif -static int m52790_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int m52790_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c index b8577ade405..4d7a9185211 100644 --- a/drivers/media/video/msp3400-driver.c +++ b/drivers/media/video/msp3400-driver.c @@ -733,7 +733,7 @@ static int msp_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc) return 0; } -static int msp_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int msp_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct msp_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c index 1a1a1245367..c1bf75ef274 100644 --- a/drivers/media/video/mt9m001.c +++ b/drivers/media/video/mt9m001.c @@ -343,14 +343,14 @@ static int mt9m001_try_fmt(struct soc_camera_device *icd, } static int mt9m001_get_chip_id(struct soc_camera_device *icd, - struct v4l2_chip_ident *id) + struct v4l2_dbg_chip_ident *id) { struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd); - if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR) + if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR) return -EINVAL; - if (id->match_chip != mt9m001->client->addr) + if (id->match.addr != mt9m001->client->addr) return -ENODEV; id->ident = mt9m001->model; @@ -361,16 +361,17 @@ static int mt9m001_get_chip_id(struct soc_camera_device *icd, #ifdef CONFIG_VIDEO_ADV_DEBUG static int mt9m001_get_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd); - if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) + if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) return -EINVAL; - if (reg->match_chip != mt9m001->client->addr) + if (reg->match.addr != mt9m001->client->addr) return -ENODEV; + reg->size = 2; reg->val = reg_read(icd, reg->reg); if (reg->val > 0xffff) @@ -380,14 +381,14 @@ static int mt9m001_get_register(struct soc_camera_device *icd, } static int mt9m001_set_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd); - if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) + if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) return -EINVAL; - if (reg->match_chip != mt9m001->client->addr) + if (reg->match.addr != mt9m001->client->addr) return -ENODEV; if (reg_write(icd, reg->reg, reg->val) < 0) diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c index c89ea41fe25..5b8e20979cc 100644 --- a/drivers/media/video/mt9m111.c +++ b/drivers/media/video/mt9m111.c @@ -514,14 +514,14 @@ static int mt9m111_try_fmt(struct soc_camera_device *icd, } static int mt9m111_get_chip_id(struct soc_camera_device *icd, - struct v4l2_chip_ident *id) + struct v4l2_dbg_chip_ident *id) { struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd); - if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR) + if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR) return -EINVAL; - if (id->match_chip != mt9m111->client->addr) + if (id->match.addr != mt9m111->client->addr) return -ENODEV; id->ident = mt9m111->model; @@ -532,18 +532,19 @@ static int mt9m111_get_chip_id(struct soc_camera_device *icd, #ifdef CONFIG_VIDEO_ADV_DEBUG static int mt9m111_get_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { int val; struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd); - if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff) + if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff) return -EINVAL; - if (reg->match_chip != mt9m111->client->addr) + if (reg->match.addr != mt9m111->client->addr) return -ENODEV; val = mt9m111_reg_read(icd, reg->reg); + reg->size = 2; reg->val = (u64)val; if (reg->val > 0xffff) @@ -553,14 +554,14 @@ static int mt9m111_get_register(struct soc_camera_device *icd, } static int mt9m111_set_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd); - if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff) + if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff) return -EINVAL; - if (reg->match_chip != mt9m111->client->addr) + if (reg->match.addr != mt9m111->client->addr) return -ENODEV; if (mt9m111_reg_write(icd, reg->reg, reg->val) < 0) diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c index 1a9d53966d0..349d8e36553 100644 --- a/drivers/media/video/mt9t031.c +++ b/drivers/media/video/mt9t031.c @@ -326,14 +326,14 @@ static int mt9t031_try_fmt(struct soc_camera_device *icd, } static int mt9t031_get_chip_id(struct soc_camera_device *icd, - struct v4l2_chip_ident *id) + struct v4l2_dbg_chip_ident *id) { struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd); - if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR) + if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR) return -EINVAL; - if (id->match_chip != mt9t031->client->addr) + if (id->match.addr != mt9t031->client->addr) return -ENODEV; id->ident = mt9t031->model; @@ -344,14 +344,14 @@ static int mt9t031_get_chip_id(struct soc_camera_device *icd, #ifdef CONFIG_VIDEO_ADV_DEBUG static int mt9t031_get_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd); - if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) + if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) return -EINVAL; - if (reg->match_chip != mt9t031->client->addr) + if (reg->match.addr != mt9t031->client->addr) return -ENODEV; reg->val = reg_read(icd, reg->reg); @@ -363,14 +363,14 @@ static int mt9t031_get_register(struct soc_camera_device *icd, } static int mt9t031_set_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd); - if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) + if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) return -EINVAL; - if (reg->match_chip != mt9t031->client->addr) + if (reg->match.addr != mt9t031->client->addr) return -ENODEV; if (reg_write(icd, reg->reg, reg->val) < 0) diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c index 14a5f9c21ff..b04c8cb1644 100644 --- a/drivers/media/video/mt9v022.c +++ b/drivers/media/video/mt9v022.c @@ -422,14 +422,14 @@ static int mt9v022_try_fmt(struct soc_camera_device *icd, } static int mt9v022_get_chip_id(struct soc_camera_device *icd, - struct v4l2_chip_ident *id) + struct v4l2_dbg_chip_ident *id) { struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd); - if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR) + if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR) return -EINVAL; - if (id->match_chip != mt9v022->client->addr) + if (id->match.addr != mt9v022->client->addr) return -ENODEV; id->ident = mt9v022->model; @@ -440,16 +440,17 @@ static int mt9v022_get_chip_id(struct soc_camera_device *icd, #ifdef CONFIG_VIDEO_ADV_DEBUG static int mt9v022_get_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd); - if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) + if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) return -EINVAL; - if (reg->match_chip != mt9v022->client->addr) + if (reg->match.addr != mt9v022->client->addr) return -ENODEV; + reg->size = 2; reg->val = reg_read(icd, reg->reg); if (reg->val > 0xffff) @@ -459,14 +460,14 @@ static int mt9v022_get_register(struct soc_camera_device *icd, } static int mt9v022_set_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd); - if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) + if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff) return -EINVAL; - if (reg->match_chip != mt9v022->client->addr) + if (reg->match.addr != mt9v022->client->addr) return -ENODEV; if (reg_write(icd, reg->reg, reg->val) < 0) diff --git a/drivers/media/video/ov7670.c b/drivers/media/video/ov7670.c index ea032f5f2f4..ca26b0c50cf 100644 --- a/drivers/media/video/ov7670.c +++ b/drivers/media/video/ov7670.c @@ -1310,7 +1310,7 @@ static int ov7670_command(struct i2c_client *client, unsigned int cmd, void *arg) { switch (cmd) { - case VIDIOC_G_CHIP_IDENT: + case VIDIOC_DBG_G_CHIP_IDENT: return v4l2_chip_ident_i2c_client(client, arg, V4L2_IDENT_OV7670, 0); case VIDIOC_INT_RESET: diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c index 54b736fcc07..3c9e0ba974e 100644 --- a/drivers/media/video/ov772x.c +++ b/drivers/media/video/ov772x.c @@ -724,7 +724,7 @@ static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd) } static int ov772x_get_chip_id(struct soc_camera_device *icd, - struct v4l2_chip_ident *id) + struct v4l2_dbg_chip_ident *id) { struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd); @@ -736,11 +736,12 @@ static int ov772x_get_chip_id(struct soc_camera_device *icd, #ifdef CONFIG_VIDEO_ADV_DEBUG static int ov772x_get_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd); int ret; + reg->size = 1; if (reg->reg > 0xff) return -EINVAL; @@ -754,7 +755,7 @@ static int ov772x_get_register(struct soc_camera_device *icd, } static int ov772x_set_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd); diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.c b/drivers/media/video/pvrusb2/pvrusb2-hdw.c index 4358079f196..8fb92ac78c7 100644 --- a/drivers/media/video/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.c @@ -4732,26 +4732,25 @@ static int pvr2_hdw_get_eeprom_addr(struct pvr2_hdw *hdw) int pvr2_hdw_register_access(struct pvr2_hdw *hdw, - u32 match_type, u32 match_chip, u64 reg_id, - int setFl,u64 *val_ptr) + struct v4l2_dbg_match *match, u64 reg_id, + int setFl, u64 *val_ptr) { #ifdef CONFIG_VIDEO_ADV_DEBUG struct pvr2_i2c_client *cp; - struct v4l2_register req; + struct v4l2_dbg_register req; int stat = 0; int okFl = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - req.match_type = match_type; - req.match_chip = match_chip; + req.match = *match; req.reg = reg_id; if (setFl) req.val = *val_ptr; mutex_lock(&hdw->i2c_list_lock); do { list_for_each_entry(cp, &hdw->i2c_clients, list) { if (!v4l2_chip_match_i2c_client( cp->client, - req.match_type, req.match_chip)) { + &req.match)) { continue; } stat = pvr2_i2c_client_cmd( diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.h b/drivers/media/video/pvrusb2/pvrusb2-hdw.h index 49482d1f2b2..1b4fec337c6 100644 --- a/drivers/media/video/pvrusb2/pvrusb2-hdw.h +++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.h @@ -242,8 +242,8 @@ void pvr2_hdw_v4l_store_minor_number(struct pvr2_hdw *, setFl - true to set the register, false to read it val_ptr - storage location for source / result. */ int pvr2_hdw_register_access(struct pvr2_hdw *, - u32 match_type, u32 match_chip,u64 reg_id, - int setFl,u64 *val_ptr); + struct v4l2_dbg_match *match, u64 reg_id, + int setFl, u64 *val_ptr); /* The following entry points are all lower level things you normally don't want to worry about. */ diff --git a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c index b9aedceb2c4..878fd52a73b 100644 --- a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c +++ b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c @@ -851,11 +851,11 @@ static long pvr2_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg) case VIDIOC_DBG_G_REGISTER: { u64 val; - struct v4l2_register *req = (struct v4l2_register *)arg; + struct v4l2_dbg_register *req = (struct v4l2_dbg_register *)arg; if (cmd == VIDIOC_DBG_S_REGISTER) val = req->val; ret = pvr2_hdw_register_access( - hdw,req->match_type,req->match_chip,req->reg, - cmd == VIDIOC_DBG_S_REGISTER,&val); + hdw, &req->match, req->reg, + cmd == VIDIOC_DBG_S_REGISTER, &val); if (cmd == VIDIOC_DBG_G_REGISTER) req->val = val; break; } diff --git a/drivers/media/video/saa7115.c b/drivers/media/video/saa7115.c index 22708ecdf1b..46c796c3fec 100644 --- a/drivers/media/video/saa7115.c +++ b/drivers/media/video/saa7115.c @@ -1371,25 +1371,24 @@ static int saa711x_g_vbi_data(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_dat } #ifdef CONFIG_VIDEO_ADV_DEBUG -static int saa711x_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int saa711x_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; reg->val = saa711x_read(sd, reg->reg & 0xff); + reg->size = 1; return 0; } -static int saa711x_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int saa711x_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1398,7 +1397,7 @@ static int saa711x_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) } #endif -static int saa711x_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int saa711x_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct saa711x_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/drivers/media/video/saa7127.c b/drivers/media/video/saa7127.c index bfc85654795..d6848f7a503 100644 --- a/drivers/media/video/saa7127.c +++ b/drivers/media/video/saa7127.c @@ -623,25 +623,24 @@ static int saa7127_s_vbi_data(struct v4l2_subdev *sd, const struct v4l2_sliced_v } #ifdef CONFIG_VIDEO_ADV_DEBUG -static int saa7127_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int saa7127_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; reg->val = saa7127_read(sd, reg->reg & 0xff); + reg->size = 1; return 0; } -static int saa7127_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int saa7127_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -650,7 +649,7 @@ static int saa7127_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) } #endif -static int saa7127_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int saa7127_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct saa7127_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c index 1fb6eccdade..1fee6e84a51 100644 --- a/drivers/media/video/saa7134/saa6752hs.c +++ b/drivers/media/video/saa7134/saa6752hs.c @@ -838,7 +838,7 @@ saa6752hs_command(struct i2c_client *client, unsigned int cmd, void *arg) h->standard = *((v4l2_std_id *) arg); break; - case VIDIOC_G_CHIP_IDENT: + case VIDIOC_DBG_G_CHIP_IDENT: return v4l2_chip_ident_i2c_client(client, arg, h->chip, h->revision); diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c index 3beba480137..c9d8beb87a6 100644 --- a/drivers/media/video/saa7134/saa7134-empress.c +++ b/drivers/media/video/saa7134/saa7134-empress.c @@ -405,7 +405,7 @@ static int empress_querymenu(struct file *file, void *priv, } static int empress_g_chip_ident(struct file *file, void *fh, - struct v4l2_chip_ident *chip) + struct v4l2_dbg_chip_ident *chip) { struct saa7134_dev *dev = file->private_data; @@ -413,12 +413,12 @@ static int empress_g_chip_ident(struct file *file, void *fh, chip->revision = 0; if (dev->mpeg_i2c_client == NULL) return -EINVAL; - if (chip->match_type == V4L2_CHIP_MATCH_I2C_DRIVER && - chip->match_chip == I2C_DRIVERID_SAA6752HS) - return saa7134_i2c_call_saa6752(dev, VIDIOC_G_CHIP_IDENT, chip); - if (chip->match_type == V4L2_CHIP_MATCH_I2C_ADDR && - chip->match_chip == dev->mpeg_i2c_client->addr) - return saa7134_i2c_call_saa6752(dev, VIDIOC_G_CHIP_IDENT, chip); + if (chip->match.type == V4L2_CHIP_MATCH_I2C_DRIVER && + !strcmp(chip->match.name, "saa6752hs")) + return saa7134_i2c_call_saa6752(dev, VIDIOC_DBG_G_CHIP_IDENT, chip); + if (chip->match.type == V4L2_CHIP_MATCH_I2C_ADDR && + chip->match.addr == dev->mpeg_i2c_client->addr) + return saa7134_i2c_call_saa6752(dev, VIDIOC_DBG_G_CHIP_IDENT, chip); return -EINVAL; } diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c index 6b2ab57538e..a1f7e351f57 100644 --- a/drivers/media/video/saa7134/saa7134-video.c +++ b/drivers/media/video/saa7134/saa7134-video.c @@ -2247,24 +2247,25 @@ static int saa7134_g_parm(struct file *file, void *fh, #ifdef CONFIG_VIDEO_ADV_DEBUG static int vidioc_g_register (struct file *file, void *priv, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct saa7134_fh *fh = priv; struct saa7134_dev *dev = fh->dev; - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; reg->val = saa_readb(reg->reg); + reg->size = 1; return 0; } static int vidioc_s_register (struct file *file, void *priv, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct saa7134_fh *fh = priv; struct saa7134_dev *dev = fh->dev; - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; saa_writeb(reg->reg&0xffffff, reg->val); return 0; diff --git a/drivers/media/video/saa717x.c b/drivers/media/video/saa717x.c index 9befca65905..454ad1dd750 100644 --- a/drivers/media/video/saa717x.c +++ b/drivers/media/video/saa717x.c @@ -1171,25 +1171,26 @@ static int saa717x_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc) } #ifdef CONFIG_VIDEO_ADV_DEBUG -static int saa717x_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int saa717x_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; reg->val = saa717x_read(sd, reg->reg); + reg->size = 1; return 0; } -static int saa717x_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int saa717x_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); u16 addr = reg->reg & 0xffff; u8 val = reg->val & 0xff; - if (!v4l2_chip_match_i2c_client(client, reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c index 9986e02bcf1..fcb05f06de8 100644 --- a/drivers/media/video/soc_camera.c +++ b/drivers/media/video/soc_camera.c @@ -699,7 +699,7 @@ static int soc_camera_s_crop(struct file *file, void *fh, } static int soc_camera_g_chip_ident(struct file *file, void *fh, - struct v4l2_chip_ident *id) + struct v4l2_dbg_chip_ident *id) { struct soc_camera_file *icf = file->private_data; struct soc_camera_device *icd = icf->icd; @@ -712,7 +712,7 @@ static int soc_camera_g_chip_ident(struct file *file, void *fh, #ifdef CONFIG_VIDEO_ADV_DEBUG static int soc_camera_g_register(struct file *file, void *fh, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct soc_camera_file *icf = file->private_data; struct soc_camera_device *icd = icf->icd; @@ -724,7 +724,7 @@ static int soc_camera_g_register(struct file *file, void *fh, } static int soc_camera_s_register(struct file *file, void *fh, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct soc_camera_file *icf = file->private_data; struct soc_camera_device *icd = icf->icd; diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index d0c794da735..5aeccb301ce 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -1762,7 +1762,7 @@ static int tvaudio_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *fr return 0; } -static int tvaudio_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int tvaudio_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/drivers/media/video/tvp5150.c b/drivers/media/video/tvp5150.c index a388a9f0cb1..2cd64ef27b9 100644 --- a/drivers/media/video/tvp5150.c +++ b/drivers/media/video/tvp5150.c @@ -963,7 +963,7 @@ static int tvp5150_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *fmt) static int tvp5150_g_chip_ident(struct v4l2_subdev *sd, - struct v4l2_chip_ident *chip) + struct v4l2_dbg_chip_ident *chip) { int rev; struct i2c_client *client = v4l2_get_subdevdata(sd); @@ -977,25 +977,24 @@ static int tvp5150_g_chip_ident(struct v4l2_subdev *sd, #ifdef CONFIG_VIDEO_ADV_DEBUG -static int tvp5150_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int tvp5150_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; reg->val = tvp5150_read(sd, reg->reg & 0xff); + reg->size = 1; return 0; } -static int tvp5150_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int tvp5150_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c index d5cdc4be1a3..52c0357faa5 100644 --- a/drivers/media/video/tw9910.c +++ b/drivers/media/video/tw9910.c @@ -575,7 +575,7 @@ static unsigned long tw9910_query_bus_param(struct soc_camera_device *icd) } static int tw9910_get_chip_id(struct soc_camera_device *icd, - struct v4l2_chip_ident *id) + struct v4l2_dbg_chip_ident *id) { id->ident = V4L2_IDENT_TW9910; id->revision = 0; @@ -606,7 +606,7 @@ static int tw9910_enum_input(struct soc_camera_device *icd, #ifdef CONFIG_VIDEO_ADV_DEBUG static int tw9910_get_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd); int ret; @@ -627,7 +627,7 @@ static int tw9910_get_register(struct soc_camera_device *icd, } static int tw9910_set_register(struct soc_camera_device *icd, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd); diff --git a/drivers/media/video/upd64031a.c b/drivers/media/video/upd64031a.c index 7a609a3a6db..4f16effb530 100644 --- a/drivers/media/video/upd64031a.c +++ b/drivers/media/video/upd64031a.c @@ -147,7 +147,7 @@ static int upd64031a_s_routing(struct v4l2_subdev *sd, const struct v4l2_routing return upd64031a_s_frequency(sd, NULL); } -static int upd64031a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int upd64031a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct i2c_client *client = v4l2_get_subdevdata(sd); @@ -162,25 +162,24 @@ static int upd64031a_log_status(struct v4l2_subdev *sd) } #ifdef CONFIG_VIDEO_ADV_DEBUG -static int upd64031a_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int upd64031a_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; reg->val = upd64031a_read(sd, reg->reg & 0xff); + reg->size = 1; return 0; } -static int upd64031a_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int upd64031a_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; diff --git a/drivers/media/video/upd64083.c b/drivers/media/video/upd64083.c index 58412cb9c01..4b712f69d1b 100644 --- a/drivers/media/video/upd64083.c +++ b/drivers/media/video/upd64083.c @@ -120,25 +120,24 @@ static int upd64083_s_routing(struct v4l2_subdev *sd, const struct v4l2_routing } #ifdef CONFIG_VIDEO_ADV_DEBUG -static int upd64083_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int upd64083_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; reg->val = upd64083_read(sd, reg->reg & 0xff); + reg->size = 1; return 0; } -static int upd64083_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg) +static int upd64083_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); - if (!v4l2_chip_match_i2c_client(client, - reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_i2c_client(client, ®->match)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -147,7 +146,7 @@ static int upd64083_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg } #endif -static int upd64083_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int upd64083_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c index 7c61c6d5ced..2be5e47ed08 100644 --- a/drivers/media/video/usbvision/usbvision-video.c +++ b/drivers/media/video/usbvision/usbvision-video.c @@ -477,12 +477,12 @@ static int usbvision_v4l2_close(struct file *file) */ #ifdef CONFIG_VIDEO_ADV_DEBUG static int vidioc_g_register (struct file *file, void *priv, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct usb_usbvision *usbvision = video_drvdata(file); int errCode; - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; /* NT100x has a 8-bit register space */ errCode = usbvision_read_reg(usbvision, reg->reg&0xff); @@ -492,16 +492,17 @@ static int vidioc_g_register (struct file *file, void *priv, return errCode; } reg->val = errCode; + reg->size = 1; return 0; } static int vidioc_s_register (struct file *file, void *priv, - struct v4l2_register *reg) + struct v4l2_dbg_register *reg) { struct usb_usbvision *usbvision = video_drvdata(file); int errCode; - if (!v4l2_chip_match_host(reg->match_type, reg->match_chip)) + if (!v4l2_chip_match_host(®->match)) return -EINVAL; /* NT100x has a 8-bit register space */ errCode = usbvision_write_reg(usbvision, reg->reg&0xff, reg->val); diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c index c676b0b0f70..b8f2be8d5c0 100644 --- a/drivers/media/video/v4l2-common.c +++ b/drivers/media/video/v4l2-common.c @@ -797,11 +797,11 @@ u32 v4l2_ctrl_next(const u32 * const * ctrl_classes, u32 id) } EXPORT_SYMBOL(v4l2_ctrl_next); -int v4l2_chip_match_host(u32 match_type, u32 match_chip) +int v4l2_chip_match_host(const struct v4l2_dbg_match *match) { - switch (match_type) { + switch (match->type) { case V4L2_CHIP_MATCH_HOST: - return match_chip == 0; + return match->addr == 0; default: return 0; } @@ -809,23 +809,34 @@ int v4l2_chip_match_host(u32 match_type, u32 match_chip) EXPORT_SYMBOL(v4l2_chip_match_host); #if defined(CONFIG_I2C) || (defined(CONFIG_I2C_MODULE) && defined(MODULE)) -int v4l2_chip_match_i2c_client(struct i2c_client *c, u32 match_type, u32 match_chip) +int v4l2_chip_match_i2c_client(struct i2c_client *c, const struct v4l2_dbg_match *match) { - switch (match_type) { + int len; + + if (c == NULL || match == NULL) + return 0; + + switch (match->type) { case V4L2_CHIP_MATCH_I2C_DRIVER: - return (c != NULL && c->driver != NULL && c->driver->id == match_chip); + if (c->driver == NULL || c->driver->driver.name == NULL) + return 0; + len = strlen(c->driver->driver.name); + /* legacy drivers have a ' suffix, don't try to match that */ + if (len && c->driver->driver.name[len - 1] == '\'') + len--; + return len && !strncmp(c->driver->driver.name, match->name, len); case V4L2_CHIP_MATCH_I2C_ADDR: - return (c != NULL && c->addr == match_chip); + return c->addr == match->addr; default: return 0; } } EXPORT_SYMBOL(v4l2_chip_match_i2c_client); -int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_chip_ident *chip, +int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_dbg_chip_ident *chip, u32 ident, u32 revision) { - if (!v4l2_chip_match_i2c_client(c, chip->match_type, chip->match_chip)) + if (!v4l2_chip_match_i2c_client(c, &chip->match)) return 0; if (chip->ident == V4L2_IDENT_NONE) { chip->ident = ident; diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c index ec81b9737bd..110376be5d2 100644 --- a/drivers/media/video/v4l2-compat-ioctl32.c +++ b/drivers/media/video/v4l2-compat-ioctl32.c @@ -1046,7 +1046,8 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg) case VIDIOC_TRY_ENCODER_CMD: case VIDIOC_DBG_S_REGISTER: case VIDIOC_DBG_G_REGISTER: - case VIDIOC_G_CHIP_IDENT: + case VIDIOC_DBG_G_CHIP_IDENT: + case VIDIOC_G_CHIP_IDENT_OLD: case VIDIOC_S_HW_FREQ_SEEK: ret = do_video_ioctl(file, cmd, arg); break; diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c index 8f629ef5b9e..52d687b165e 100644 --- a/drivers/media/video/v4l2-ioctl.c +++ b/drivers/media/video/v4l2-ioctl.c @@ -266,7 +266,7 @@ static const char *v4l2_ioctls[] = { [_IOC_NR(VIDIOC_DBG_S_REGISTER)] = "VIDIOC_DBG_S_REGISTER", [_IOC_NR(VIDIOC_DBG_G_REGISTER)] = "VIDIOC_DBG_G_REGISTER", - [_IOC_NR(VIDIOC_G_CHIP_IDENT)] = "VIDIOC_G_CHIP_IDENT", + [_IOC_NR(VIDIOC_DBG_G_CHIP_IDENT)] = "VIDIOC_DBG_G_CHIP_IDENT", [_IOC_NR(VIDIOC_S_HW_FREQ_SEEK)] = "VIDIOC_S_HW_FREQ_SEEK", #endif }; @@ -1720,7 +1720,7 @@ static long __video_do_ioctl(struct file *file, #ifdef CONFIG_VIDEO_ADV_DEBUG case VIDIOC_DBG_G_REGISTER: { - struct v4l2_register *p = arg; + struct v4l2_dbg_register *p = arg; if (!capable(CAP_SYS_ADMIN)) ret = -EPERM; @@ -1730,7 +1730,7 @@ static long __video_do_ioctl(struct file *file, } case VIDIOC_DBG_S_REGISTER: { - struct v4l2_register *p = arg; + struct v4l2_dbg_register *p = arg; if (!capable(CAP_SYS_ADMIN)) ret = -EPERM; @@ -1739,9 +1739,9 @@ static long __video_do_ioctl(struct file *file, break; } #endif - case VIDIOC_G_CHIP_IDENT: + case VIDIOC_DBG_G_CHIP_IDENT: { - struct v4l2_chip_ident *p = arg; + struct v4l2_dbg_chip_ident *p = arg; if (!ops->vidioc_g_chip_ident) break; @@ -1750,6 +1750,11 @@ static long __video_do_ioctl(struct file *file, dbgarg(cmd, "chip_ident=%u, revision=0x%x\n", p->ident, p->revision); break; } + case VIDIOC_G_CHIP_IDENT_OLD: + printk(KERN_ERR "VIDIOC_G_CHIP_IDENT has been deprecated and will disappear in 2.6.30.\n"); + printk(KERN_ERR "It is a debugging ioctl and must not be used in applications!\n"); + return -EINVAL; + case VIDIOC_S_HW_FREQ_SEEK: { struct v4l2_hw_freq_seek *p = arg; diff --git a/drivers/media/video/v4l2-subdev.c b/drivers/media/video/v4l2-subdev.c index e3612f29d0d..fbe9cc0d433 100644 --- a/drivers/media/video/v4l2-subdev.c +++ b/drivers/media/video/v4l2-subdev.c @@ -37,7 +37,7 @@ int v4l2_subdev_command(struct v4l2_subdev *sd, unsigned cmd, void *arg) return v4l2_subdev_call(sd, core, queryctrl, arg); case VIDIOC_LOG_STATUS: return v4l2_subdev_call(sd, core, log_status); - case VIDIOC_G_CHIP_IDENT: + case VIDIOC_DBG_G_CHIP_IDENT: return v4l2_subdev_call(sd, core, g_chip_ident, arg); case VIDIOC_INT_S_STANDBY: return v4l2_subdev_call(sd, core, s_standby, arg ? (*(u32 *)arg) : 0); diff --git a/drivers/media/video/vp27smpx.c b/drivers/media/video/vp27smpx.c index f72b859486a..5d73f66d9f5 100644 --- a/drivers/media/video/vp27smpx.c +++ b/drivers/media/video/vp27smpx.c @@ -113,7 +113,7 @@ static int vp27smpx_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *vt) return 0; } -static int vp27smpx_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int vp27smpx_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/drivers/media/video/wm8739.c b/drivers/media/video/wm8739.c index 12a31e7a5f6..f2864d5cd18 100644 --- a/drivers/media/video/wm8739.c +++ b/drivers/media/video/wm8739.c @@ -233,7 +233,7 @@ static int wm8739_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc) return -EINVAL; } -static int wm8739_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int wm8739_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/drivers/media/video/wm8775.c b/drivers/media/video/wm8775.c index d0220b0ec0b..53fcd42843e 100644 --- a/drivers/media/video/wm8775.c +++ b/drivers/media/video/wm8775.c @@ -130,7 +130,7 @@ static int wm8775_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl) return 0; } -static int wm8775_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip) +static int wm8775_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { struct i2c_client *client = v4l2_get_subdevdata(sd); diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 1f126e30766..5571dbe1c0a 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -1370,25 +1370,41 @@ struct v4l2_streamparm { /* * A D V A N C E D D E B U G G I N G * - * NOTE: EXPERIMENTAL API + * NOTE: EXPERIMENTAL API, NEVER RELY ON THIS IN APPLICATIONS! + * FOR DEBUGGING, TESTING AND INTERNAL USE ONLY! */ /* VIDIOC_DBG_G_REGISTER and VIDIOC_DBG_S_REGISTER */ #define V4L2_CHIP_MATCH_HOST 0 /* Match against chip ID on host (0 for the host) */ -#define V4L2_CHIP_MATCH_I2C_DRIVER 1 /* Match against I2C driver ID */ +#define V4L2_CHIP_MATCH_I2C_DRIVER 1 /* Match against I2C driver name */ #define V4L2_CHIP_MATCH_I2C_ADDR 2 /* Match against I2C 7-bit address */ #define V4L2_CHIP_MATCH_AC97 3 /* Match against anciliary AC97 chip */ -struct v4l2_register { - __u32 match_type; /* Match type */ - __u32 match_chip; /* Match this chip, meaning determined by match_type */ +struct v4l2_dbg_match { + __u32 type; /* Match type */ + union { /* Match this chip, meaning determined by type */ + __u32 addr; + char name[32]; + }; +} __attribute__ ((packed)); + +struct v4l2_dbg_register { + struct v4l2_dbg_match match; + __u32 size; /* register size in bytes */ __u64 reg; __u64 val; -}; +} __attribute__ ((packed)); + +/* VIDIOC_DBG_G_CHIP_IDENT */ +struct v4l2_dbg_chip_ident { + struct v4l2_dbg_match match; + __u32 ident; /* chip identifier as specified in */ + __u32 revision; /* chip revision, chip specific */ +} __attribute__ ((packed)); -/* VIDIOC_G_CHIP_IDENT */ -struct v4l2_chip_ident { +/* VIDIOC_G_CHIP_IDENT_OLD: Deprecated, do not use */ +struct v4l2_chip_ident_old { __u32 match_type; /* Match type */ __u32 match_chip; /* Match this chip, meaning determined by match_type */ __u32 ident; /* chip identifier as specified in */ @@ -1460,13 +1476,22 @@ struct v4l2_chip_ident { #define VIDIOC_G_ENC_INDEX _IOR('V', 76, struct v4l2_enc_idx) #define VIDIOC_ENCODER_CMD _IOWR('V', 77, struct v4l2_encoder_cmd) #define VIDIOC_TRY_ENCODER_CMD _IOWR('V', 78, struct v4l2_encoder_cmd) +#endif -/* Experimental, only implemented if CONFIG_VIDEO_ADV_DEBUG is defined */ -#define VIDIOC_DBG_S_REGISTER _IOW('V', 79, struct v4l2_register) -#define VIDIOC_DBG_G_REGISTER _IOWR('V', 80, struct v4l2_register) - -#define VIDIOC_G_CHIP_IDENT _IOWR('V', 81, struct v4l2_chip_ident) +#if 1 +/* Experimental, meant for debugging, testing and internal use. + Only implemented if CONFIG_VIDEO_ADV_DEBUG is defined. + You must be root to use these ioctls. Never use these in applications! */ +#define VIDIOC_DBG_S_REGISTER _IOW('V', 79, struct v4l2_dbg_register) +#define VIDIOC_DBG_G_REGISTER _IOWR('V', 80, struct v4l2_dbg_register) + +/* Experimental, meant for debugging, testing and internal use. + Never use this ioctl in applications! */ +#define VIDIOC_DBG_G_CHIP_IDENT _IOWR('V', 81, struct v4l2_dbg_chip_ident) +/* This is deprecated and will go away in 2.6.30 */ +#define VIDIOC_G_CHIP_IDENT_OLD _IOWR('V', 81, struct v4l2_chip_ident_old) #endif + #define VIDIOC_S_HW_FREQ_SEEK _IOW('V', 82, struct v4l2_hw_freq_seek) /* Reminder: when adding new ioctls please add support for them to drivers/media/video/v4l2-compat-ioctl32.c as well! */ diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h index 425b6a98c95..7440d925066 100644 --- a/include/media/soc_camera.h +++ b/include/media/soc_camera.h @@ -164,12 +164,12 @@ struct soc_camera_ops { unsigned long (*query_bus_param)(struct soc_camera_device *); int (*set_bus_param)(struct soc_camera_device *, unsigned long); int (*get_chip_id)(struct soc_camera_device *, - struct v4l2_chip_ident *); + struct v4l2_dbg_chip_ident *); int (*set_std)(struct soc_camera_device *, v4l2_std_id *); int (*enum_input)(struct soc_camera_device *, struct v4l2_input *); #ifdef CONFIG_VIDEO_ADV_DEBUG - int (*get_register)(struct soc_camera_device *, struct v4l2_register *); - int (*set_register)(struct soc_camera_device *, struct v4l2_register *); + int (*get_register)(struct soc_camera_device *, struct v4l2_dbg_register *); + int (*set_register)(struct soc_camera_device *, struct v4l2_dbg_register *); #endif int (*get_control)(struct soc_camera_device *, struct v4l2_control *); int (*set_control)(struct soc_camera_device *, struct v4l2_control *); diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h index 43dbb659f1f..9aaf652b20e 100644 --- a/include/media/v4l2-chip-ident.h +++ b/include/media/v4l2-chip-ident.h @@ -2,7 +2,7 @@ v4l2 chip identifiers header This header provides a list of chip identifiers that can be returned - through the VIDIOC_G_CHIP_IDENT ioctl. + through the VIDIOC_DBG_G_CHIP_IDENT ioctl. Copyright (C) 2007 Hans Verkuil @@ -24,7 +24,7 @@ #ifndef V4L2_CHIP_IDENT_H_ #define V4L2_CHIP_IDENT_H_ -/* VIDIOC_G_CHIP_IDENT: identifies the actual chip installed on the board */ +/* VIDIOC_DBG_G_CHIP_IDENT: identifies the actual chip installed on the board */ enum { /* general idents: reserved range 0-49 */ V4L2_IDENT_NONE = 0, /* No chip matched */ diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index f99c866d8c3..95e74f1874e 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -114,10 +114,10 @@ u32 v4l2_ctrl_next(const u32 * const *ctrl_classes, u32 id); /* Register/chip ident helper function */ struct i2c_client; /* forward reference */ -int v4l2_chip_match_i2c_client(struct i2c_client *c, u32 id_type, u32 chip_id); -int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_chip_ident *chip, +int v4l2_chip_match_i2c_client(struct i2c_client *c, const struct v4l2_dbg_match *match); +int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_dbg_chip_ident *chip, u32 ident, u32 revision); -int v4l2_chip_match_host(u32 id_type, u32 chip_id); +int v4l2_chip_match_host(const struct v4l2_dbg_match *match); /* ------------------------------------------------------------------------- */ diff --git a/include/media/v4l2-int-device.h b/include/media/v4l2-int-device.h index ecda3c72583..fbf58556157 100644 --- a/include/media/v4l2-int-device.h +++ b/include/media/v4l2-int-device.h @@ -219,7 +219,7 @@ enum v4l2_int_ioctl_num { vidioc_int_reset_num, /* VIDIOC_INT_INIT */ vidioc_int_init_num, - /* VIDIOC_INT_G_CHIP_IDENT */ + /* VIDIOC_DBG_G_CHIP_IDENT */ vidioc_int_g_chip_ident_num, /* diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h index bf0e723a99c..b01c044868d 100644 --- a/include/media/v4l2-ioctl.h +++ b/include/media/v4l2-ioctl.h @@ -225,12 +225,12 @@ struct v4l2_ioctl_ops { /* Debugging ioctls */ #ifdef CONFIG_VIDEO_ADV_DEBUG int (*vidioc_g_register) (struct file *file, void *fh, - struct v4l2_register *reg); + struct v4l2_dbg_register *reg); int (*vidioc_s_register) (struct file *file, void *fh, - struct v4l2_register *reg); + struct v4l2_dbg_register *reg); #endif int (*vidioc_g_chip_ident) (struct file *file, void *fh, - struct v4l2_chip_ident *chip); + struct v4l2_dbg_chip_ident *chip); int (*vidioc_enum_framesizes) (struct file *file, void *fh, struct v4l2_frmsizeenum *fsize); diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 2517344313b..37b09e56e94 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -69,7 +69,7 @@ struct tuner_setup; not yet implemented) since ops provide proper type-checking. */ struct v4l2_subdev_core_ops { - int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip); + int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip); int (*log_status)(struct v4l2_subdev *sd); int (*init)(struct v4l2_subdev *sd, u32 val); int (*s_standby)(struct v4l2_subdev *sd, u32 standby); @@ -81,8 +81,8 @@ struct v4l2_subdev_core_ops { int (*querymenu)(struct v4l2_subdev *sd, struct v4l2_querymenu *qm); long (*ioctl)(struct v4l2_subdev *sd, unsigned int cmd, void *arg); #ifdef CONFIG_VIDEO_ADV_DEBUG - int (*g_register)(struct v4l2_subdev *sd, struct v4l2_register *reg); - int (*s_register)(struct v4l2_subdev *sd, struct v4l2_register *reg); + int (*g_register)(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg); + int (*s_register)(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg); #endif }; -- cgit v1.2.3-70-g09d2 From 6a2d802ca01bd83b860145e7497a7a049c354cd7 Mon Sep 17 00:00:00 2001 From: Pham Thanh Nam Date: Tue, 30 Dec 2008 23:26:09 -0300 Subject: V4L/DVB (10156): saa7134: Add support for Avermedia AVer TV GO 007 FM Plus This patch adds support for Avermedia AVer TV GO 007 FM Plus (M15C) on saa7134 driver (PCI ID 1461:f31d). Signed-off-by: Pham Thanh Nam Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.saa7134 | 1 + drivers/media/video/saa7134/saa7134-cards.c | 40 +++++++++++++++++++++++++++++ drivers/media/video/saa7134/saa7134-input.c | 1 + drivers/media/video/saa7134/saa7134.h | 1 + 4 files changed, 43 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134 index 335aef4dcae..b8d470596b0 100644 --- a/Documentation/video4linux/CARDLIST.saa7134 +++ b/Documentation/video4linux/CARDLIST.saa7134 @@ -152,3 +152,4 @@ 151 -> ADS Tech Instant HDTV [1421:0380] 152 -> Asus Tiger Rev:1.00 [1043:4857] 153 -> Kworld Plus TV Analog Lite PCI [17de:7128] +154 -> Avermedia AVerTV GO 007 FM Plus [1461:f31d] diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c index e240b4baf0e..e9c471cb04b 100644 --- a/drivers/media/video/saa7134/saa7134-cards.c +++ b/drivers/media/video/saa7134/saa7134-cards.c @@ -4642,6 +4642,38 @@ struct saa7134_board saa7134_boards[] = { .amux = 2, }, }, + [SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS] = { + .name = "Avermedia AVerTV GO 007 FM Plus", + .audio_clock = 0x00187de7, + .tuner_type = TUNER_PHILIPS_TDA8290, + .radio_type = UNSET, + .tuner_addr = ADDR_UNSET, + .radio_addr = ADDR_UNSET, + .gpiomask = 0x00300003, + /* .gpiomask = 0x8c240003, */ + .inputs = { { + .name = name_tv, + .vmux = 1, + .amux = TV, + .tv = 1, + .gpio = 0x01, + }, { + .name = name_svideo, + .vmux = 6, + .amux = LINE1, + .gpio = 0x02, + } }, + .radio = { + .name = name_radio, + .amux = TV, + .gpio = 0x00300001, + }, + .mute = { + .name = name_mute, + .amux = TV, + .gpio = 0x01, + }, + }, }; const unsigned int saa7134_bcount = ARRAY_SIZE(saa7134_boards); @@ -5739,6 +5771,13 @@ struct pci_device_id saa7134_pci_tbl[] = { .subdevice = PCI_ANY_ID, .driver_data = SAA7134_BOARD_UNKNOWN, },{ + .vendor = PCI_VENDOR_ID_PHILIPS, + .device = PCI_DEVICE_ID_PHILIPS_SAA7133, + .subvendor = 0x1461, /* Avermedia Technologies Inc */ + .subdevice = 0xf31d, + .driver_data = SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS, + + }, { /* --- end of list --- */ } }; @@ -5929,6 +5968,7 @@ int saa7134_board_init1(struct saa7134_dev *dev) case SAA7134_BOARD_GENIUS_TVGO_A11MCE: case SAA7134_BOARD_REAL_ANGEL_220: case SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG: + case SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS: dev->has_remote = SAA7134_REMOTE_GPIO; break; case SAA7134_BOARD_FLYDVBS_LR300: diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c index d2124f64e4e..8a106d36e72 100644 --- a/drivers/media/video/saa7134/saa7134-input.c +++ b/drivers/media/video/saa7134/saa7134-input.c @@ -449,6 +449,7 @@ int saa7134_input_init1(struct saa7134_dev *dev) case SAA7134_BOARD_AVERMEDIA_STUDIO_507: case SAA7134_BOARD_AVERMEDIA_GO_007_FM: case SAA7134_BOARD_AVERMEDIA_M102: + case SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS: ir_codes = ir_codes_avermedia; mask_keycode = 0x0007C8; mask_keydown = 0x000010; diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h index f6c1fcc7207..14ee265f337 100644 --- a/drivers/media/video/saa7134/saa7134.h +++ b/drivers/media/video/saa7134/saa7134.h @@ -276,6 +276,7 @@ struct saa7134_format { #define SAA7134_BOARD_ADS_INSTANT_HDTV_PCI 151 #define SAA7134_BOARD_ASUSTeK_TIGER 152 #define SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG 153 +#define SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS 154 #define SAA7134_MAXBOARDS 32 #define SAA7134_INPUT_MAX 8 -- cgit v1.2.3-70-g09d2 From 5e6de7d9a1a373414a41a7441100f90b71c6119f Mon Sep 17 00:00:00 2001 From: Mark Lord Date: Wed, 3 Dec 2008 15:26:15 -0300 Subject: V4L/DVB (10157): Add USB ID for the Sil4701 radio from DealExtreme Signed-off-by: Mark Lord Cc: Greg KH Signed-off-by: Andrew Morton [tobias.lorenz@gmx.net: Code beautifications and documentation added] Signed-off-by: Tobias Lorenz Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/si470x.txt | 1 + drivers/media/radio/radio-si470x.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/video4linux/si470x.txt b/Documentation/video4linux/si470x.txt index 11c5fd22a33..49679e6aaa7 100644 --- a/Documentation/video4linux/si470x.txt +++ b/Documentation/video4linux/si470x.txt @@ -41,6 +41,7 @@ chips are known to work: - 10c4:818a: Silicon Labs USB FM Radio Reference Design - 06e1:a155: ADS/Tech FM Radio Receiver (formerly Instant FM Music) (RDX-155-EF) - 1b80:d700: KWorld USB FM Radio SnapMusic Mobile 700 (FM700) +- 10c5:819a: DealExtreme USB Radio Software diff --git a/drivers/media/radio/radio-si470x.c b/drivers/media/radio/radio-si470x.c index 457445ec7b5..67cbce82cb9 100644 --- a/drivers/media/radio/radio-si470x.c +++ b/drivers/media/radio/radio-si470x.c @@ -96,6 +96,8 @@ * 2008-10-20 Alexey Klimov * - add support for KWorld USB FM Radio FM700 * - blacklisted KWorld radio in hid-core.c and hid-ids.h + * 2008-12-03 Mark Lord + * - add support for DealExtreme USB Radio * * ToDo: * - add firmware download/update support @@ -138,6 +140,8 @@ static struct usb_device_id si470x_usb_driver_id_table[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x06e1, 0xa155, USB_CLASS_HID, 0, 0) }, /* KWorld USB FM Radio SnapMusic Mobile 700 (FM700) */ { USB_DEVICE_AND_INTERFACE_INFO(0x1b80, 0xd700, USB_CLASS_HID, 0, 0) }, + /* DealExtreme USB Radio */ + { USB_DEVICE_AND_INTERFACE_INFO(0x10c5, 0x819a, USB_CLASS_HID, 0, 0) }, /* Terminating entry */ { } }; -- cgit v1.2.3-70-g09d2 From 2af238e455ef5fd31c2f7a06c2db3f13d843b9bf Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 29 Feb 2008 14:21:53 -0800 Subject: kbuild: make *config usage docs Create a kconfig user assistance guide, with a few tips and hints about using menuconfig, xconfig, and gconfig. Mostly contains user interface, environment variables, and search topics, along with mini.config/custom.config usage. Signed-off-by: Randy Dunlap Signed-off-by: Sam Ravnborg --- Documentation/kbuild/00-INDEX | 2 + Documentation/kbuild/kconfig.txt | 188 +++++++++++++++++++++++++++++++++++++++ README | 32 ++++--- 3 files changed, 210 insertions(+), 12 deletions(-) create mode 100644 Documentation/kbuild/kconfig.txt (limited to 'Documentation') diff --git a/Documentation/kbuild/00-INDEX b/Documentation/kbuild/00-INDEX index 11464428545..54a118a20f2 100644 --- a/Documentation/kbuild/00-INDEX +++ b/Documentation/kbuild/00-INDEX @@ -4,5 +4,7 @@ kconfig-language.txt - specification of Config Language, the language in Kconfig files makefiles.txt - developer information for linux kernel makefiles +kconfig.txt + - usage help for make *config modules.txt - how to build modules and to install them diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt new file mode 100644 index 00000000000..26a7c0a9319 --- /dev/null +++ b/Documentation/kbuild/kconfig.txt @@ -0,0 +1,188 @@ +This file contains some assistance for using "make *config". + +Use "make help" to list all of the possible configuration targets. + +The xconfig ('qconf') and menuconfig ('mconf') programs also +have embedded help text. Be sure to check it for navigation, +search, and other general help text. + +====================================================================== +General +-------------------------------------------------- + +New kernel releases often introduce new config symbols. Often more +important, new kernel releases may rename config symbols. When +this happens, using a previously working .config file and running +"make oldconfig" won't necessarily produce a working new kernel +for you, so you may find that you need to see what NEW kernel +symbols have been introduced. + +To see a list of new config symbols when using "make oldconfig", use + + cp user/some/old.config .config + yes "" | make oldconfig >conf.new + +and the config program will list as (NEW) any new symbols that have +unknown values. Of course, the .config file is also updated with +new (default) values, so you can use: + + grep "(NEW)" conf.new + +to see the new config symbols or you can 'diff' the previous and +new .config files to see the differences: + + diff .config.old .config | less + +(Yes, we need something better here.) + + +====================================================================== +menuconfig +-------------------------------------------------- + +SEARCHING for CONFIG symbols + +Searching in menuconfig: + + The Search function searches for kernel configuration symbol + names, so you have to know something close to what you are + looking for. + + Example: + /hotplug + This lists all config symbols that contain "hotplug", + e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG. + + For search help, enter / followed TAB-TAB-TAB (to highlight + ) and Enter. This will tell you that you can also use + regular expressions (regexes) in the search string, so if you + are not interested in MEMORY_HOTPLUG, you could try + + /^hotplug + + +______________________________________________________________________ +Color Themes for 'menuconfig' + +It is possible to select different color themes using the variable +MENUCONFIG_COLOR. To select a theme use: + + make MENUCONFIG_COLOR= menuconfig + +Available themes are: + mono => selects colors suitable for monochrome displays + blackbg => selects a color scheme with black background + classic => theme with blue background. The classic look + bluetitle => a LCD friendly version of classic. (default) + +______________________________________________________________________ +Environment variables in 'menuconfig' + +KCONFIG_ALLCONFIG +-------------------------------------------------- +(partially based on lkml email from/by Rob Landley, re: miniconfig) +-------------------------------------------------- +The allyesconfig/allmodconfig/allnoconfig/randconfig variants can +also use the environment variable KCONFIG_ALLCONFIG as a flag or a +filename that contains config symbols that the user requires to be +set to a specific value. If KCONFIG_ALLCONFIG is used without a +filename, "make *config" checks for a file named +"all{yes/mod/no/random}.config" (corresponding to the *config command +that was used) for symbol values that are to be forced. If this file +is not found, it checks for a file named "all.config" to contain forced +values. + +This enables you to create "miniature" config (miniconfig) or custom +config files containing just the config symbols that you are interested +in. Then the kernel config system generates the full .config file, +including dependencies of your miniconfig file, based on the miniconfig +file. + +This 'KCONFIG_ALLCONFIG' file is a config file which contains +(usually a subset of all) preset config symbols. These variable +settings are still subject to normal dependency checks. + +Examples: + KCONFIG_ALLCONFIG=custom-notebook.config make allnoconfig +or + KCONFIG_ALLCONFIG=mini.config make allnoconfig +or + make KCONFIG_ALLCONFIG=mini.config allnoconfig + +These examples will disable most options (allnoconfig) but enable or +disable the options that are explicitly listed in the specified +mini-config files. + +KCONFIG_NOSILENTUPDATE +-------------------------------------------------- +If this variable has a non-blank value, it prevents silent kernel +config udpates (requires explicit updates). + +KCONFIG_CONFIG +-------------------------------------------------- +This environment variable can be used to specify a default kernel config +file name to override the default name of ".config". + +KCONFIG_OVERWRITECONFIG +-------------------------------------------------- +If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not +break symlinks when .config is a symlink to somewhere else. + +KCONFIG_NOTIMESTAMP +-------------------------------------------------- +If this environment variable exists and is non-null, the timestamp line +in generated .config files is omitted. + +KCONFIG_AUTOCONFIG +-------------------------------------------------- +This environment variable can be set to specify the path & name of the +"auto.conf" file. Its default value is "include/config/auto.conf". + +KCONFIG_AUTOHEADER +-------------------------------------------------- +This environment variable can be set to specify the path & name of the +"autoconf.h" (header) file. Its default value is "include/linux/autoconf.h". + +______________________________________________________________________ +menuconfig User Interface Options +---------------------------------------------------------------------- +MENUCONFIG_MODE +-------------------------------------------------- +This mode shows all sub-menus in one large tree. + +Example: + MENUCONFIG_MODE=single_menu make menuconfig + +====================================================================== +xconfig +-------------------------------------------------- + +Searching in xconfig: + + The Search function searches for kernel configuration symbol + names, so you have to know something close to what you are + looking for. + + Example: + Ctrl-F hotplug + or + Menu: File, Search, hotplug + + lists all config symbol entries that contain "hotplug" in + the symbol name. In this Search dialog, you may change the + config setting for any of the entries that are not grayed out. + You can also enter a different search string without having + to return to the main menu. + + +====================================================================== +gconfig +-------------------------------------------------- + +Searching in gconfig: + + None (gconfig isn't maintained as well as xconfig or menuconfig); + however, gconfig does have a few more viewing choices than + xconfig does. + +### diff --git a/README b/README index 159912cf515..90a07658ede 100644 --- a/README +++ b/README @@ -52,11 +52,11 @@ DOCUMENTATION: - The Documentation/DocBook/ subdirectory contains several guides for kernel developers and users. These guides can be rendered in a - number of formats: PostScript (.ps), PDF, and HTML, among others. - After installation, "make psdocs", "make pdfdocs", or "make htmldocs" - will render the documentation in the requested format. + number of formats: PostScript (.ps), PDF, HTML, & man-pages, among others. + After installation, "make psdocs", "make pdfdocs", "make htmldocs", + or "make mandocs" will render the documentation in the requested format. -INSTALLING the kernel: +INSTALLING the kernel source: - If you install the full sources, put the kernel tarball in a directory where you have permissions (eg. your home directory) and @@ -187,14 +187,9 @@ CONFIGURING the kernel: "make randconfig" Create a ./.config file by setting symbol values to random values. - The allyesconfig/allmodconfig/allnoconfig/randconfig variants can - also use the environment variable KCONFIG_ALLCONFIG to specify a - filename that contains config options that the user requires to be - set to a specific value. If KCONFIG_ALLCONFIG=filename is not used, - "make *config" checks for a file named "all{yes/mod/no/random}.config" - for symbol values that are to be forced. If this file is not found, - it checks for a file named "all.config" to contain forced values. - + You can find more information on using the Linux kernel config tools + in Documentation/kbuild/make-configs.txt. + NOTES on "make config": - having unnecessary drivers will make the kernel bigger, and can under some circumstances lead to problems: probing for a @@ -231,6 +226,19 @@ COMPILING the kernel: - If you configured any of the parts of the kernel as `modules', you will also have to do "make modules_install". + - Verbose kernel compile/build output: + + Normally the kernel build system runs in a fairly quiet mode (but not + totally silent). However, sometimes you or other kernel developers need + to see compile, link, or other commands exactly as they are executed. + For this, use "verbose" build mode. This is done by inserting + "V=1" in the "make" command. E.g.: + + make V=1 all + + To have the build system also tell the reason for the rebuild of each + target, use "V=2". The default is "V=0". + - Keep a backup kernel handy in case something goes wrong. This is especially true for the development releases, since each new release contains new code which has not been debugged. Make sure you keep a -- cgit v1.2.3-70-g09d2 From acc08b516f25b79cfcff310e51d95048bfcf7b0d Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 29 Dec 2008 13:45:52 +0100 Subject: kbuild: document environment variables Add kbuild.txt to Documentation/kbuild More stuff can be added later - at least we have som of the varous environment variables documented now. Signed-off-by: Sam Ravnborg --- Documentation/kbuild/00-INDEX | 8 ++- Documentation/kbuild/kbuild.txt | 126 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 3 deletions(-) create mode 100644 Documentation/kbuild/kbuild.txt (limited to 'Documentation') diff --git a/Documentation/kbuild/00-INDEX b/Documentation/kbuild/00-INDEX index 54a118a20f2..e8d2b6d83a3 100644 --- a/Documentation/kbuild/00-INDEX +++ b/Documentation/kbuild/00-INDEX @@ -1,10 +1,12 @@ 00-INDEX - - this file: info on the kernel build process + - this file: info on the kernel build process +kbuild.txt + - developer information on kbuild +kconfig.txt + - usage help for make *config kconfig-language.txt - specification of Config Language, the language in Kconfig files makefiles.txt - developer information for linux kernel makefiles -kconfig.txt - - usage help for make *config modules.txt - how to build modules and to install them diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt new file mode 100644 index 00000000000..51771847e81 --- /dev/null +++ b/Documentation/kbuild/kbuild.txt @@ -0,0 +1,126 @@ +Environment variables + +KCPPFLAGS +-------------------------------------------------- +Additional options to pass when preprocessing. The preprocessing options +will be used in all cases where kbuild do preprocessing including +building C files and assembler files. + +KAFLAGS +-------------------------------------------------- +Additional options to the assembler. + +KCFLAGS +-------------------------------------------------- +Additional options to the C compiler. + +KBUILD_VERBOSE +-------------------------------------------------- +Set the kbuild verbosity. Can be assinged same values as "V=...". +See make help for the full list. +Setting "V=..." takes precedence over KBUILD_VERBOSE. + +KBUILD_EXTMOD +-------------------------------------------------- +Set the directory to look for the kernel source when building external +modules. +The directory can be specified in several ways: +1) Use "M=..." on the command line +2) Environmnet variable KBUILD_EXTMOD +3) Environmnet variable SUBDIRS +The possibilities are listed in the order they take precedence. +Using "M=..." will always override the others. + +KBUILD_OUTPUT +-------------------------------------------------- +Specify the output directory when building the kernel. +The output directory can also be specificed using "O=...". +Setting "O=..." takes precedence over KBUILD_OUTPUT + +ARCH +-------------------------------------------------- +Set ARCH to the architecture to be built. +In most cases the name of the architecture is the same as the +directory name found in the arch/ directory. +But some architectures suach as x86 and sparc has aliases. +x86: i386 for 32 bit, x86_64 for 64 bit +sparc: sparc for 32 bit, sparc64 for 64 bit + +CROSS_COMPILE +-------------------------------------------------- +Specify an optional fixed part of the binutils filename. +CROSS_COMPILE can be a part of the filename or the full path. + +CROSS_COMPILE is also used for ccache is some setups. + +CF +-------------------------------------------------- +Additional options for sparse. +CF is often used on the command-line like this: + + make CF=-Wbitwise C=2 + +INSTALL_PATH +-------------------------------------------------- +INSTALL_PATH specifies where to place the updated kernel and system map +images. Default is /boot, but you can set it to other values + + +MODLIB +-------------------------------------------------- +Specify where to install modules. +The default value is: + + $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) + +The value can be overridden in which case the default value is ignored. + +INSTALL_MOD_PATH +-------------------------------------------------- +INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory +relocations required by build roots. This is not defined in the +makefile but the argument can be passed to make if needed. + +INSTALL_MOD_STRIP +-------------------------------------------------- +INSTALL_MOD_STRIP, if defined, will cause modules to be +stripped after they are installed. If INSTALL_MOD_STRIP is '1', then +the default option --strip-debug will be used. Otherwise, +INSTALL_MOD_STRIP will used as the options to the strip command. + +INSTALL_FW_PATH +-------------------------------------------------- +INSTALL_FW_PATH specify where to install the firmware blobs. +The default value is: + + $(INSTALL_MOD_PATH)/lib/firmware + +The value can be overridden in which case the default value is ignored. + +INSTALL_HDR_PATH +-------------------------------------------------- +INSTALL_HDR_PATH specify where to install user space headers when +executing "make headers_*". +The default value is: + + $(objtree)/usr + +$(objtree) is the directory where output files are saved. +The output directory is often set using "O=..." on the commandline. + +The value can be overridden in which case the default value is ignored. + +KBUILD_MODPOST_WARN +-------------------------------------------------- +KBUILD_MODPOST_WARN can be set to avoid error out in case of undefined +symbols in the final module linking stage. + +KBUILD_MODPOST_FINAL +-------------------------------------------------- +KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules. +This is solely usefull to speed up test compiles. + +KBUILD_EXTRA_SYMBOLS +-------------------------------------------------- +For modules use symbols from another modules. +See more details in modules.txt. -- cgit v1.2.3-70-g09d2 From c31910672376dfb8d020e32afa7249763bcd924a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Jan 2009 11:14:25 -0500 Subject: ext4: Remove code to create the journal inode This code has been obsolete in quite some time, since the supported method for adding a journal inode is to use tune2fs (or to creating new filesystem with a journal via mke2fs or mkfs.ext4). Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 4 --- fs/ext4/super.c | 68 +++-------------------------------- fs/jbd2/journal.c | 72 -------------------------------------- include/linux/jbd2.h | 1 - 4 files changed, 4 insertions(+), 141 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index e3fcbea3ec8..9ec29d86ff8 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -149,10 +149,6 @@ journal_async_commit Commit block can be written to disk without waiting journal=update Update the ext4 file system's journal to the current format. -journal=inum When a journal already exists, this option is ignored. - Otherwise, it specifies the number of the inode which - will represent the ext4 file system's journal file. - journal_dev=devnum When the external journal device's major/minor numbers have changed, this option allows the user to specify the new journal location. The journal device is diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e5ab520724d..8036392b212 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -51,8 +51,6 @@ struct proc_dir_entry *ext4_proc_root; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); -static int ext4_create_journal(struct super_block *, struct ext4_super_block *, - unsigned int); static void ext4_commit_super(struct super_block *sb, struct ext4_super_block *es, int sync); static void ext4_mark_recovery_complete(struct super_block *sb, @@ -1006,7 +1004,7 @@ enum { Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, Opt_commit, Opt_min_batch_time, Opt_max_batch_time, - Opt_journal_update, Opt_journal_inum, Opt_journal_dev, + Opt_journal_update, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, @@ -1048,7 +1046,6 @@ static const match_table_t tokens = { {Opt_min_batch_time, "min_batch_time=%u"}, {Opt_max_batch_time, "max_batch_time=%u"}, {Opt_journal_update, "journal=update"}, - {Opt_journal_inum, "journal=%u"}, {Opt_journal_dev, "journal_dev=%u"}, {Opt_journal_checksum, "journal_checksum"}, {Opt_journal_async_commit, "journal_async_commit"}, @@ -1102,7 +1099,7 @@ static ext4_fsblk_t get_sb_block(void **data) } static int parse_options(char *options, struct super_block *sb, - unsigned int *inum, unsigned long *journal_devnum, + unsigned long *journal_devnum, ext4_fsblk_t *n_blocks_count, int is_remount) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1226,16 +1223,6 @@ static int parse_options(char *options, struct super_block *sb, } set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); break; - case Opt_journal_inum: - if (is_remount) { - printk(KERN_ERR "EXT4-fs: cannot specify " - "journal on remount\n"); - return 0; - } - if (match_int(&args[0], &option)) - return 0; - *inum = option; - break; case Opt_journal_dev: if (is_remount) { printk(KERN_ERR "EXT4-fs: cannot specify " @@ -2035,7 +2022,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; unsigned long offset = 0; - unsigned int journal_inum = 0; unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; @@ -2155,8 +2141,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, DELALLOC); - if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum, - NULL, 0)) + if (!parse_options((char *) data, sb, &journal_devnum, NULL, 0)) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -2460,9 +2445,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount4; } } - } else if (journal_inum) { - if (ext4_create_journal(sb, es, journal_inum)) - goto failed_mount3; } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { printk(KERN_ERR "EXT4-fs: required journal recovery " @@ -2926,48 +2908,6 @@ static int ext4_load_journal(struct super_block *sb, return 0; } -static int ext4_create_journal(struct super_block *sb, - struct ext4_super_block *es, - unsigned int journal_inum) -{ - journal_t *journal; - int err; - - if (sb->s_flags & MS_RDONLY) { - printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " - "create journal.\n"); - return -EROFS; - } - - journal = ext4_get_journal(sb, journal_inum); - if (!journal) - return -EINVAL; - - printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", - journal_inum); - - err = jbd2_journal_create(journal); - if (err) { - printk(KERN_ERR "EXT4-fs: error creating journal.\n"); - jbd2_journal_destroy(journal); - return -EIO; - } - - EXT4_SB(sb)->s_journal = journal; - - ext4_update_dynamic_rev(sb); - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL); - - es->s_journal_inum = cpu_to_le32(journal_inum); - sb->s_dirt = 1; - - /* Make sure we flush the recovery flag to disk. */ - ext4_commit_super(sb, es, 1); - - return 0; -} - static void ext4_commit_super(struct super_block *sb, struct ext4_super_block *es, int sync) { @@ -3209,7 +3149,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) /* * Allow the "check" option to be passed as a remount option. */ - if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { + if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) { err = -EINVAL; goto restore_opts; } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 34ef9805720..b10d7283ba5 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -66,7 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_update_format); EXPORT_SYMBOL(jbd2_journal_check_used_features); EXPORT_SYMBOL(jbd2_journal_check_available_features); EXPORT_SYMBOL(jbd2_journal_set_features); -EXPORT_SYMBOL(jbd2_journal_create); EXPORT_SYMBOL(jbd2_journal_load); EXPORT_SYMBOL(jbd2_journal_destroy); EXPORT_SYMBOL(jbd2_journal_abort); @@ -1162,77 +1161,6 @@ static int journal_reset(journal_t *journal) return jbd2_journal_start_thread(journal); } -/** - * int jbd2_journal_create() - Initialise the new journal file - * @journal: Journal to create. This structure must have been initialised - * - * Given a journal_t structure which tells us which disk blocks we can - * use, create a new journal superblock and initialise all of the - * journal fields from scratch. - **/ -int jbd2_journal_create(journal_t *journal) -{ - unsigned long long blocknr; - struct buffer_head *bh; - journal_superblock_t *sb; - int i, err; - - if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) { - printk (KERN_ERR "Journal length (%d blocks) too short.\n", - journal->j_maxlen); - journal_fail_superblock(journal); - return -EINVAL; - } - - if (journal->j_inode == NULL) { - /* - * We don't know what block to start at! - */ - printk(KERN_EMERG - "%s: creation of journal on external device!\n", - __func__); - BUG(); - } - - /* Zero out the entire journal on disk. We cannot afford to - have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */ - jbd_debug(1, "JBD: Zeroing out journal blocks...\n"); - for (i = 0; i < journal->j_maxlen; i++) { - err = jbd2_journal_bmap(journal, i, &blocknr); - if (err) - return err; - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - lock_buffer(bh); - memset (bh->b_data, 0, journal->j_blocksize); - BUFFER_TRACE(bh, "marking dirty"); - mark_buffer_dirty(bh); - BUFFER_TRACE(bh, "marking uptodate"); - set_buffer_uptodate(bh); - unlock_buffer(bh); - __brelse(bh); - } - - sync_blockdev(journal->j_dev); - jbd_debug(1, "JBD: journal cleared.\n"); - - /* OK, fill in the initial static fields in the new superblock */ - sb = journal->j_superblock; - - sb->s_header.h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); - sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); - - sb->s_blocksize = cpu_to_be32(journal->j_blocksize); - sb->s_maxlen = cpu_to_be32(journal->j_maxlen); - sb->s_first = cpu_to_be32(1); - - journal->j_transaction_sequence = 1; - - journal->j_flags &= ~JBD2_ABORT; - journal->j_format_version = 2; - - return journal_reset(journal); -} - /** * void jbd2_journal_update_superblock() - Update journal sb on disk. * @journal: The journal to update. diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 9d82084a160..adef1c9940d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1104,7 +1104,6 @@ extern int jbd2_journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); extern void jbd2_journal_clear_features (journal_t *, unsigned long, unsigned long, unsigned long); -extern int jbd2_journal_create (journal_t *); extern int jbd2_journal_load (journal_t *journal); extern int jbd2_journal_destroy (journal_t *); extern int jbd2_journal_recover (journal_t *journal); -- cgit v1.2.3-70-g09d2 From 2f6de3a199893ae3dd68e23bd79b55e1478c8268 Mon Sep 17 00:00:00 2001 From: Baodong Chen Date: Sat, 3 Jan 2009 12:37:06 +0800 Subject: Documentation/x86/boot.txt: payload length was changed to payload_length Signed-off-by: Baodong Chen <[email]chenbdchenbd@gmail.com[email]> Acked-by: Jiri Kosina Signed-off-by: Ingo Molnar --- Documentation/x86/boot.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index fcdc62b3c3d..7b4596ac412 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -44,7 +44,7 @@ Protocol 2.07: (Kernel 2.6.24) Added paravirtualised boot protocol. and KEEP_SEGMENTS flag in load_flags. Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format - payload. Introduced payload_offset and payload length + payload. Introduced payload_offset and payload_length fields to aid in locating the payload. Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical -- cgit v1.2.3-70-g09d2 From 9eb425c046f4129f1dafce7c04e949652e69fb01 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 5 Jan 2009 08:46:29 +0000 Subject: Squashfs: documentation Signed-off-by: Phillip Lougher --- Documentation/filesystems/squashfs.txt | 225 +++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 Documentation/filesystems/squashfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt new file mode 100644 index 00000000000..3e79e4a7a39 --- /dev/null +++ b/Documentation/filesystems/squashfs.txt @@ -0,0 +1,225 @@ +SQUASHFS 4.0 FILESYSTEM +======================= + +Squashfs is a compressed read-only filesystem for Linux. +It uses zlib compression to compress files, inodes and directories. +Inodes in the system are very small and all blocks are packed to minimise +data overhead. Block sizes greater than 4K are supported up to a maximum +of 1Mbytes (default block size 128K). + +Squashfs is intended for general read-only filesystem use, for archival +use (i.e. in cases where a .tar.gz file may be used), and in constrained +block device/memory systems (e.g. embedded systems) where low overhead is +needed. + +Mailing list: squashfs-devel@lists.sourceforge.net +Web site: www.squashfs.org + +1. FILESYSTEM FEATURES +---------------------- + +Squashfs filesystem features versus Cramfs: + + Squashfs Cramfs + +Max filesystem size: 2^64 16 MiB +Max file size: ~ 2 TiB 16 MiB +Max files: unlimited unlimited +Max directories: unlimited unlimited +Max entries per directory: unlimited unlimited +Max block size: 1 MiB 4 KiB +Metadata compression: yes no +Directory indexes: yes no +Sparse file support: yes no +Tail-end packing (fragments): yes no +Exportable (NFS etc.): yes no +Hard link support: yes no +"." and ".." in readdir: yes no +Real inode numbers: yes no +32-bit uids/gids: yes no +File creation time: yes no +Xattr and ACL support: no no + +Squashfs compresses data, inodes and directories. In addition, inode and +directory data are highly compacted, and packed on byte boundaries. Each +compressed inode is on average 8 bytes in length (the exact length varies on +file type, i.e. regular file, directory, symbolic link, and block/char device +inodes have different sizes). + +2. USING SQUASHFS +----------------- + +As squashfs is a read-only filesystem, the mksquashfs program must be used to +create populated squashfs filesystems. This and other squashfs utilities +can be obtained from http://www.squashfs.org. Usage instructions can be +obtained from this site also. + + +3. SQUASHFS FILESYSTEM DESIGN +----------------------------- + +A squashfs filesystem consists of seven parts, packed together on a byte +alignment: + + --------------- + | superblock | + |---------------| + | datablocks | + | & fragments | + |---------------| + | inode table | + |---------------| + | directory | + | table | + |---------------| + | fragment | + | table | + |---------------| + | export | + | table | + |---------------| + | uid/gid | + | lookup table | + --------------- + +Compressed data blocks are written to the filesystem as files are read from +the source directory, and checked for duplicates. Once all file data has been +written the completed inode, directory, fragment, export and uid/gid lookup +tables are written. + +3.1 Inodes +---------- + +Metadata (inodes and directories) are compressed in 8Kbyte blocks. Each +compressed block is prefixed by a two byte length, the top bit is set if the +block is uncompressed. A block will be uncompressed if the -noI option is set, +or if the compressed block was larger than the uncompressed block. + +Inodes are packed into the metadata blocks, and are not aligned to block +boundaries, therefore inodes overlap compressed blocks. Inodes are identified +by a 48-bit number which encodes the location of the compressed metadata block +containing the inode, and the byte offset into that block where the inode is +placed (). + +To maximise compression there are different inodes for each file type +(regular file, directory, device, etc.), the inode contents and length +varying with the type. + +To further maximise compression, two types of regular file inode and +directory inode are defined: inodes optimised for frequently occurring +regular files and directories, and extended types where extra +information has to be stored. + +3.2 Directories +--------------- + +Like inodes, directories are packed into compressed metadata blocks, stored +in a directory table. Directories are accessed using the start address of +the metablock containing the directory and the offset into the +decompressed block (). + +Directories are organised in a slightly complex way, and are not simply +a list of file names. The organisation takes advantage of the +fact that (in most cases) the inodes of the files will be in the same +compressed metadata block, and therefore, can share the start block. +Directories are therefore organised in a two level list, a directory +header containing the shared start block value, and a sequence of directory +entries, each of which share the shared start block. A new directory header +is written once/if the inode start block changes. The directory +header/directory entry list is repeated as many times as necessary. + +Directories are sorted, and can contain a directory index to speed up +file lookup. Directory indexes store one entry per metablock, each entry +storing the index/filename mapping to the first directory header +in each metadata block. Directories are sorted in alphabetical order, +and at lookup the index is scanned linearly looking for the first filename +alphabetically larger than the filename being looked up. At this point the +location of the metadata block the filename is in has been found. +The general idea of the index is ensure only one metadata block needs to be +decompressed to do a lookup irrespective of the length of the directory. +This scheme has the advantage that it doesn't require extra memory overhead +and doesn't require much extra storage on disk. + +3.3 File data +------------- + +Regular files consist of a sequence of contiguous compressed blocks, and/or a +compressed fragment block (tail-end packed block). The compressed size +of each datablock is stored in a block list contained within the +file inode. + +To speed up access to datablocks when reading 'large' files (256 Mbytes or +larger), the code implements an index cache that caches the mapping from +block index to datablock location on disk. + +The index cache allows Squashfs to handle large files (up to 1.75 TiB) while +retaining a simple and space-efficient block list on disk. The cache +is split into slots, caching up to eight 224 GiB files (128 KiB blocks). +Larger files use multiple slots, with 1.75 TiB files using all 8 slots. +The index cache is designed to be memory efficient, and by default uses +16 KiB. + +3.4 Fragment lookup table +------------------------- + +Regular files can contain a fragment index which is mapped to a fragment +location on disk and compressed size using a fragment lookup table. This +fragment lookup table is itself stored compressed into metadata blocks. +A second index table is used to locate these. This second index table for +speed of access (and because it is small) is read at mount time and cached +in memory. + +3.5 Uid/gid lookup table +------------------------ + +For space efficiency regular files store uid and gid indexes, which are +converted to 32-bit uids/gids using an id look up table. This table is +stored compressed into metadata blocks. A second index table is used to +locate these. This second index table for speed of access (and because it +is small) is read at mount time and cached in memory. + +3.6 Export table +---------------- + +To enable Squashfs filesystems to be exportable (via NFS etc.) filesystems +can optionally (disabled with the -no-exports Mksquashfs option) contain +an inode number to inode disk location lookup table. This is required to +enable Squashfs to map inode numbers passed in filehandles to the inode +location on disk, which is necessary when the export code reinstantiates +expired/flushed inodes. + +This table is stored compressed into metadata blocks. A second index table is +used to locate these. This second index table for speed of access (and because +it is small) is read at mount time and cached in memory. + + +4. TODOS AND OUTSTANDING ISSUES +------------------------------- + +4.1 Todo list +------------- + +Implement Xattr and ACL support. The Squashfs 4.0 filesystem layout has hooks +for these but the code has not been written. Once the code has been written +the existing layout should not require modification. + +4.2 Squashfs internal cache +--------------------------- + +Blocks in Squashfs are compressed. To avoid repeatedly decompressing +recently accessed data Squashfs uses two small metadata and fragment caches. + +The cache is not used for file datablocks, these are decompressed and cached in +the page-cache in the normal way. The cache is used to temporarily cache +fragment and metadata blocks which have been read as a result of a metadata +(i.e. inode or directory) or fragment access. Because metadata and fragments +are packed together into blocks (to gain greater compression) the read of a +particular piece of metadata or fragment will retrieve other metadata/fragments +which have been packed with it, these because of locality-of-reference may be +read in the near future. Temporarily caching them ensures they are available +for near future access without requiring an additional read and decompress. + +In the future this internal cache may be replaced with an implementation which +uses the kernel page cache. Because the page cache operates on page sized +units this may introduce additional complexity in terms of locking and +associated race conditions. -- cgit v1.2.3-70-g09d2 From 2ec220e27f5040aec1e88901c1b6ea3d135787ad Mon Sep 17 00:00:00 2001 From: Ken Chen Date: Mon, 10 Nov 2008 11:26:08 +0300 Subject: proc: add /proc/*/stack /proc/*/stack adds the ability to query a task's stack trace. It is more useful than /proc/*/wchan as it provides full stack trace instead of single depth. Example output: $ cat /proc/self/stack [] save_stack_trace_tsk+0x17/0x35 [] proc_pid_stack+0x4a/0x76 [] proc_single_show+0x4a/0x5e [] seq_read+0xf3/0x29f [] vfs_read+0x6d/0x91 [] sys_read+0x3b/0x60 [] syscall_call+0x7/0xb [] 0xffffffff [add save_stack_trace_tsk() on mips, ACK Ralf --adobriyan] Signed-off-by: Ken Chen Signed-off-by: Ingo Molnar Signed-off-by: Alexey Dobriyan --- Documentation/filesystems/proc.txt | 1 + arch/mips/kernel/stacktrace.c | 24 ++++++++++++++++++------ fs/proc/base.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 71df353e367..334ef2f983f 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -140,6 +140,7 @@ Table 1-1: Process specific entries in /proc statm Process memory status information status Process status in human readable form wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan + stack Report full stack trace, enable via CONFIG_STACKTRACE smaps Extension based on maps, the rss size for each mapped file .............................................................................. diff --git a/arch/mips/kernel/stacktrace.c b/arch/mips/kernel/stacktrace.c index 0632e2a849c..58f5cd76c8c 100644 --- a/arch/mips/kernel/stacktrace.c +++ b/arch/mips/kernel/stacktrace.c @@ -32,7 +32,8 @@ static void save_raw_context_stack(struct stack_trace *trace, } } -static void save_context_stack(struct stack_trace *trace, struct pt_regs *regs) +static void save_context_stack(struct stack_trace *trace, + struct task_struct *tsk, struct pt_regs *regs) { unsigned long sp = regs->regs[29]; #ifdef CONFIG_KALLSYMS @@ -41,7 +42,7 @@ static void save_context_stack(struct stack_trace *trace, struct pt_regs *regs) if (raw_show_trace || !__kernel_text_address(pc)) { unsigned long stack_page = - (unsigned long)task_stack_page(current); + (unsigned long)task_stack_page(tsk); if (stack_page && sp >= stack_page && sp <= stack_page + THREAD_SIZE - 32) save_raw_context_stack(trace, sp); @@ -54,7 +55,7 @@ static void save_context_stack(struct stack_trace *trace, struct pt_regs *regs) trace->entries[trace->nr_entries++] = pc; if (trace->nr_entries >= trace->max_entries) break; - pc = unwind_stack(current, &sp, pc, &ra); + pc = unwind_stack(tsk, &sp, pc, &ra); } while (pc); #else save_raw_context_stack(trace, sp); @@ -65,13 +66,24 @@ static void save_context_stack(struct stack_trace *trace, struct pt_regs *regs) * Save stack-backtrace addresses into a stack_trace buffer. */ void save_stack_trace(struct stack_trace *trace) +{ + save_stack_trace_tsk(current, trace); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { struct pt_regs dummyregs; struct pt_regs *regs = &dummyregs; WARN_ON(trace->nr_entries || !trace->max_entries); - prepare_frametrace(regs); - save_context_stack(trace, regs); + if (tsk != current) { + regs->regs[29] = tsk->thread.reg29; + regs->regs[31] = 0; + regs->cp0_epc = tsk->thread.reg31; + } else + prepare_frametrace(regs); + save_context_stack(trace, tsk, regs); } -EXPORT_SYMBOL_GPL(save_stack_trace); +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/fs/proc/base.c b/fs/proc/base.c index ce7a6da1b6a..eb7b4654d6a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -337,6 +338,37 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) } #endif /* CONFIG_KALLSYMS */ +#ifdef CONFIG_STACKTRACE + +#define MAX_STACK_TRACE_DEPTH 64 + +static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + struct stack_trace trace; + unsigned long *entries; + int i; + + entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + trace.nr_entries = 0; + trace.max_entries = MAX_STACK_TRACE_DEPTH; + trace.entries = entries; + trace.skip = 0; + save_stack_trace_tsk(task, &trace); + + for (i = 0; i < trace.nr_entries; i++) { + seq_printf(m, "[<%p>] %pS\n", + (void *)entries[i], (void *)entries[i]); + } + kfree(entries); + + return 0; +} +#endif + #ifdef CONFIG_SCHEDSTATS /* * Provides /proc/PID/schedstat @@ -2500,6 +2532,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_KALLSYMS INF("wchan", S_IRUGO, proc_pid_wchan), #endif +#ifdef CONFIG_STACKTRACE + ONE("stack", S_IRUSR, proc_pid_stack), +#endif #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, proc_pid_schedstat), #endif @@ -2835,6 +2870,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_KALLSYMS INF("wchan", S_IRUGO, proc_pid_wchan), #endif +#ifdef CONFIG_STACKTRACE + ONE("stack", S_IRUSR, proc_pid_stack), +#endif #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, proc_pid_schedstat), #endif -- cgit v1.2.3-70-g09d2 From a68979b857283daf4acc405e476dcc8812a3ff2b Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Fri, 14 Nov 2008 11:17:52 +0800 Subject: ocfs2: add mount option and Kconfig option for acl This patch adds the Kconfig option "CONFIG_OCFS2_FS_POSIX_ACL" and mount options "acl" to enable acls in Ocfs2. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- Documentation/filesystems/ocfs2.txt | 3 ++- fs/Kconfig | 9 +++++++++ fs/ocfs2/super.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 67310fbbb7d..c2a0871280a 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -31,7 +31,6 @@ Features which OCFS2 does not support yet: - quotas - Directory change notification (F_NOTIFY) - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) - - POSIX ACLs Mount options ============= @@ -79,3 +78,5 @@ inode64 Indicates that Ocfs2 is allowed to create inodes at bits of significance. user_xattr (*) Enables Extended User Attributes. nouser_xattr Disables Extended User Attributes. +acl Enables POSIX Access Control Lists support. +noacl (*) Disables POSIX Access Control Lists support. diff --git a/fs/Kconfig b/fs/Kconfig index ff0e8198020..e8a47f74a83 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -268,6 +268,15 @@ config OCFS2_COMPAT_JBD is backwards compatible with JBD. It is safe to say N here. However, if you really want to use the original JBD, say Y here. +config OCFS2_FS_POSIX_ACL + bool "OCFS2 POSIX Access Control Lists" + depends on OCFS2_FS + select FS_POSIX_ACL + default n + help + Posix Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + endif # BLOCK source "fs/notify/Kconfig" diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 304b63ac78c..9e7accc68b4 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -158,6 +158,8 @@ enum { Opt_user_xattr, Opt_nouser_xattr, Opt_inode64, + Opt_acl, + Opt_noacl, Opt_err, }; @@ -180,6 +182,8 @@ static const match_table_t tokens = { {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, {Opt_inode64, "inode64"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, {Opt_err, NULL} }; @@ -466,6 +470,8 @@ unlock_osb: if (!ret) { /* Only save off the new mount options in case of a successful * remount. */ + if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) + parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; @@ -651,6 +657,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) } brelse(bh); bh = NULL; + + if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) + parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; + osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; @@ -664,6 +674,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = OCFS2_SUPER_MAGIC; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, * heartbeat=none */ if (bdev_read_only(sb->s_bdev)) { @@ -945,6 +958,19 @@ static int ocfs2_parse_options(struct super_block *sb, case Opt_inode64: mopt->mount_opt |= OCFS2_MOUNT_INODE64; break; +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + case Opt_acl: + mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; + break; + case Opt_noacl: + mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; + break; +#else + case Opt_acl: + case Opt_noacl: + printk(KERN_INFO "ocfs2 (no)acl options not supported\n"); + break; +#endif default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -1017,6 +1043,13 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (opts & OCFS2_MOUNT_INODE64) seq_printf(s, ",inode64"); +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + if (opts & OCFS2_MOUNT_POSIX_ACL) + seq_printf(s, ",acl"); + else + seq_printf(s, ",noacl"); +#endif + return 0; } -- cgit v1.2.3-70-g09d2 From a808ad3b0d28411e2838117c5b2ae680ae42483c Mon Sep 17 00:00:00 2001 From: Sean MacLennan Date: Wed, 10 Dec 2008 13:16:34 +0000 Subject: [MTD] [NAND] ndfc driver The current ndfc driver only compiles under arch/ppc. This arch was removed from the kernel. I notice the event entry for the ndfc in Kconfig has been removed in 2.6.28. This patch converts the ndfc to a proper OF (OpenFirmware) driver. I can give a working example of the DTS if needed. The patch has been in production use on the PIKA Warp Appliance and is in use by others. The Warp basically boots from NAND, so the ndfc driver is very important to us. Signed-off-by: Sean MacLennan Acked-By: Josh Boyer Signed-off-by: David Woodhouse --- Documentation/powerpc/dts-bindings/4xx/ndfc.txt | 39 ++++ drivers/mtd/nand/Kconfig | 7 + drivers/mtd/nand/ndfc.c | 269 ++++++++++++------------ 3 files changed, 179 insertions(+), 136 deletions(-) create mode 100644 Documentation/powerpc/dts-bindings/4xx/ndfc.txt (limited to 'Documentation') diff --git a/Documentation/powerpc/dts-bindings/4xx/ndfc.txt b/Documentation/powerpc/dts-bindings/4xx/ndfc.txt new file mode 100644 index 00000000000..869f0b5f16e --- /dev/null +++ b/Documentation/powerpc/dts-bindings/4xx/ndfc.txt @@ -0,0 +1,39 @@ +AMCC NDFC (NanD Flash Controller) + +Required properties: +- compatible : "ibm,ndfc". +- reg : should specify chip select and size used for the chip (0x2000). + +Optional properties: +- ccr : NDFC config and control register value (default 0). +- bank-settings : NDFC bank configuration register value (default 0). + +Notes: +- partition(s) - follows the OF MTD standard for partitions + +Example: + +ndfc@1,0 { + compatible = "ibm,ndfc"; + reg = <0x00000001 0x00000000 0x00002000>; + ccr = <0x00001000>; + bank-settings = <0x80002222>; + #address-cells = <1>; + #size-cells = <1>; + + nand { + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "kernel"; + reg = <0x00000000 0x00200000>; + }; + partition@200000 { + label = "root"; + reg = <0x00200000 0x03E00000>; + }; + }; +}; + + diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index f8ae0400c49..8b12e6e109d 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -163,6 +163,13 @@ config MTD_NAND_S3C2410_HWECC incorrect ECC generation, and if using these, the default of software ECC is preferable. +config MTD_NAND_NDFC + tristate "NDFC NanD Flash Controller" + depends on 4xx + select MTD_NAND_ECC_SMC + help + NDFC Nand Flash Controllers are integrated in IBM/AMCC's 4xx SoCs + config MTD_NAND_S3C2410_CLKSTOP bool "S3C2410 NAND IDLE clock stop" depends on MTD_NAND_S3C2410 diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c index 955959eb02d..582cf80f555 100644 --- a/drivers/mtd/nand/ndfc.c +++ b/drivers/mtd/nand/ndfc.c @@ -2,12 +2,20 @@ * drivers/mtd/ndfc.c * * Overview: - * Platform independend driver for NDFC (NanD Flash Controller) + * Platform independent driver for NDFC (NanD Flash Controller) * integrated into EP440 cores * + * Ported to an OF platform driver by Sean MacLennan + * + * The NDFC supports multiple chips, but this driver only supports a + * single chip since I do not have access to any boards with + * multiple chips. + * * Author: Thomas Gleixner * * Copyright 2006 IBM + * Copyright 2008 PIKA Technologies + * Sean MacLennan * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -21,27 +29,20 @@ #include #include #include -#include - +#include #include -#ifdef CONFIG_40x -#include -#else -#include -#endif - -struct ndfc_nand_mtd { - struct mtd_info mtd; - struct nand_chip chip; - struct platform_nand_chip *pl_chip; -}; -static struct ndfc_nand_mtd ndfc_mtd[NDFC_MAX_BANKS]; struct ndfc_controller { - void __iomem *ndfcbase; - struct nand_hw_control ndfc_control; - atomic_t childs_active; + struct of_device *ofdev; + void __iomem *ndfcbase; + struct mtd_info mtd; + struct nand_chip chip; + int chip_select; + struct nand_hw_control ndfc_control; +#ifdef CONFIG_MTD_PARTITIONS + struct mtd_partition *parts; +#endif }; static struct ndfc_controller ndfc_ctrl; @@ -50,17 +51,14 @@ static void ndfc_select_chip(struct mtd_info *mtd, int chip) { uint32_t ccr; struct ndfc_controller *ndfc = &ndfc_ctrl; - struct nand_chip *nandchip = mtd->priv; - struct ndfc_nand_mtd *nandmtd = nandchip->priv; - struct platform_nand_chip *pchip = nandmtd->pl_chip; - ccr = __raw_readl(ndfc->ndfcbase + NDFC_CCR); + ccr = in_be32(ndfc->ndfcbase + NDFC_CCR); if (chip >= 0) { ccr &= ~NDFC_CCR_BS_MASK; - ccr |= NDFC_CCR_BS(chip + pchip->chip_offset); + ccr |= NDFC_CCR_BS(chip + ndfc->chip_select); } else ccr |= NDFC_CCR_RESET_CE; - __raw_writel(ccr, ndfc->ndfcbase + NDFC_CCR); + out_be32(ndfc->ndfcbase + NDFC_CCR, ccr); } static void ndfc_hwcontrol(struct mtd_info *mtd, int cmd, unsigned int ctrl) @@ -80,7 +78,7 @@ static int ndfc_ready(struct mtd_info *mtd) { struct ndfc_controller *ndfc = &ndfc_ctrl; - return __raw_readl(ndfc->ndfcbase + NDFC_STAT) & NDFC_STAT_IS_READY; + return in_be32(ndfc->ndfcbase + NDFC_STAT) & NDFC_STAT_IS_READY; } static void ndfc_enable_hwecc(struct mtd_info *mtd, int mode) @@ -88,9 +86,9 @@ static void ndfc_enable_hwecc(struct mtd_info *mtd, int mode) uint32_t ccr; struct ndfc_controller *ndfc = &ndfc_ctrl; - ccr = __raw_readl(ndfc->ndfcbase + NDFC_CCR); + ccr = in_be32(ndfc->ndfcbase + NDFC_CCR); ccr |= NDFC_CCR_RESET_ECC; - __raw_writel(ccr, ndfc->ndfcbase + NDFC_CCR); + out_be32(ndfc->ndfcbase + NDFC_CCR, ccr); wmb(); } @@ -102,9 +100,10 @@ static int ndfc_calculate_ecc(struct mtd_info *mtd, uint8_t *p = (uint8_t *)&ecc; wmb(); - ecc = __raw_readl(ndfc->ndfcbase + NDFC_ECC); - ecc_code[0] = p[1]; - ecc_code[1] = p[2]; + ecc = in_be32(ndfc->ndfcbase + NDFC_ECC); + /* The NDFC uses Smart Media (SMC) bytes order */ + ecc_code[0] = p[2]; + ecc_code[1] = p[1]; ecc_code[2] = p[3]; return 0; @@ -123,7 +122,7 @@ static void ndfc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len) uint32_t *p = (uint32_t *) buf; for(;len > 0; len -= 4) - *p++ = __raw_readl(ndfc->ndfcbase + NDFC_DATA); + *p++ = in_be32(ndfc->ndfcbase + NDFC_DATA); } static void ndfc_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len) @@ -132,7 +131,7 @@ static void ndfc_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len) uint32_t *p = (uint32_t *) buf; for(;len > 0; len -= 4) - __raw_writel(*p++, ndfc->ndfcbase + NDFC_DATA); + out_be32(ndfc->ndfcbase + NDFC_DATA, *p++); } static int ndfc_verify_buf(struct mtd_info *mtd, const uint8_t *buf, int len) @@ -141,7 +140,7 @@ static int ndfc_verify_buf(struct mtd_info *mtd, const uint8_t *buf, int len) uint32_t *p = (uint32_t *) buf; for(;len > 0; len -= 4) - if (*p++ != __raw_readl(ndfc->ndfcbase + NDFC_DATA)) + if (*p++ != in_be32(ndfc->ndfcbase + NDFC_DATA)) return -EFAULT; return 0; } @@ -149,10 +148,19 @@ static int ndfc_verify_buf(struct mtd_info *mtd, const uint8_t *buf, int len) /* * Initialize chip structure */ -static void ndfc_chip_init(struct ndfc_nand_mtd *mtd) +static int ndfc_chip_init(struct ndfc_controller *ndfc, + struct device_node *node) { - struct ndfc_controller *ndfc = &ndfc_ctrl; - struct nand_chip *chip = &mtd->chip; +#ifdef CONFIG_MTD_PARTITIONS +#ifdef CONFIG_MTD_CMDLINE_PARTS + static const char *part_types[] = { "cmdlinepart", NULL }; +#else + static const char *part_types[] = { NULL }; +#endif +#endif + struct device_node *flash_np; + struct nand_chip *chip = &ndfc->chip; + int ret; chip->IO_ADDR_R = ndfc->ndfcbase + NDFC_DATA; chip->IO_ADDR_W = ndfc->ndfcbase + NDFC_DATA; @@ -160,8 +168,6 @@ static void ndfc_chip_init(struct ndfc_nand_mtd *mtd) chip->dev_ready = ndfc_ready; chip->select_chip = ndfc_select_chip; chip->chip_delay = 50; - chip->priv = mtd; - chip->options = mtd->pl_chip->options; chip->controller = &ndfc->ndfc_control; chip->read_buf = ndfc_read_buf; chip->write_buf = ndfc_write_buf; @@ -172,143 +178,136 @@ static void ndfc_chip_init(struct ndfc_nand_mtd *mtd) chip->ecc.mode = NAND_ECC_HW; chip->ecc.size = 256; chip->ecc.bytes = 3; - chip->ecclayout = chip->ecc.layout = mtd->pl_chip->ecclayout; - mtd->mtd.priv = chip; - mtd->mtd.owner = THIS_MODULE; -} - -static int ndfc_chip_probe(struct platform_device *pdev) -{ - struct platform_nand_chip *nc = pdev->dev.platform_data; - struct ndfc_chip_settings *settings = nc->priv; - struct ndfc_controller *ndfc = &ndfc_ctrl; - struct ndfc_nand_mtd *nandmtd; - - if (nc->chip_offset >= NDFC_MAX_BANKS || nc->nr_chips > NDFC_MAX_BANKS) - return -EINVAL; - - /* Set the bank settings */ - __raw_writel(settings->bank_settings, - ndfc->ndfcbase + NDFC_BCFG0 + (nc->chip_offset << 2)); - nandmtd = &ndfc_mtd[pdev->id]; - if (nandmtd->pl_chip) - return -EBUSY; + ndfc->mtd.priv = chip; + ndfc->mtd.owner = THIS_MODULE; - nandmtd->pl_chip = nc; - ndfc_chip_init(nandmtd); - - /* Scan for chips */ - if (nand_scan(&nandmtd->mtd, nc->nr_chips)) { - nandmtd->pl_chip = NULL; + flash_np = of_get_next_child(node, NULL); + if (!flash_np) return -ENODEV; + + ndfc->mtd.name = kasprintf(GFP_KERNEL, "%s.%s", + ndfc->ofdev->dev.bus_id, flash_np->name); + if (!ndfc->mtd.name) { + ret = -ENOMEM; + goto err; } -#ifdef CONFIG_MTD_PARTITIONS - printk("Number of partitions %d\n", nc->nr_partitions); - if (nc->nr_partitions) { - /* Add the full device, so complete dumps can be made */ - add_mtd_device(&nandmtd->mtd); - add_mtd_partitions(&nandmtd->mtd, nc->partitions, - nc->nr_partitions); + ret = nand_scan(&ndfc->mtd, 1); + if (ret) + goto err; - } else -#else - add_mtd_device(&nandmtd->mtd); +#ifdef CONFIG_MTD_PARTITIONS + ret = parse_mtd_partitions(&ndfc->mtd, part_types, &ndfc->parts, 0); + if (ret < 0) + goto err; + +#ifdef CONFIG_MTD_OF_PARTS + if (ret == 0) { + ret = of_mtd_parse_partitions(&ndfc->ofdev->dev, flash_np, + &ndfc->parts); + if (ret < 0) + goto err; + } #endif - atomic_inc(&ndfc->childs_active); - return 0; -} + if (ret > 0) + ret = add_mtd_partitions(&ndfc->mtd, ndfc->parts, ret); + else +#endif + ret = add_mtd_device(&ndfc->mtd); -static int ndfc_chip_remove(struct platform_device *pdev) -{ - return 0; +err: + of_node_put(flash_np); + if (ret) + kfree(ndfc->mtd.name); + return ret; } -static int ndfc_nand_probe(struct platform_device *pdev) +static int __devinit ndfc_probe(struct of_device *ofdev, + const struct of_device_id *match) { - struct platform_nand_ctrl *nc = pdev->dev.platform_data; - struct ndfc_controller_settings *settings = nc->priv; - struct resource *res = pdev->resource; struct ndfc_controller *ndfc = &ndfc_ctrl; - unsigned long long phys = settings->ndfc_erpn | res->start; + const u32 *reg; + u32 ccr; + int err, len; -#ifndef CONFIG_PHYS_64BIT - ndfc->ndfcbase = ioremap((phys_addr_t)phys, res->end - res->start + 1); -#else - ndfc->ndfcbase = ioremap64(phys, res->end - res->start + 1); -#endif + spin_lock_init(&ndfc->ndfc_control.lock); + init_waitqueue_head(&ndfc->ndfc_control.wq); + ndfc->ofdev = ofdev; + dev_set_drvdata(&ofdev->dev, ndfc); + + /* Read the reg property to get the chip select */ + reg = of_get_property(ofdev->node, "reg", &len); + if (reg == NULL || len != 12) { + dev_err(&ofdev->dev, "unable read reg property (%d)\n", len); + return -ENOENT; + } + ndfc->chip_select = reg[0]; + + ndfc->ndfcbase = of_iomap(ofdev->node, 0); if (!ndfc->ndfcbase) { - printk(KERN_ERR "NDFC: ioremap failed\n"); + dev_err(&ofdev->dev, "failed to get memory\n"); return -EIO; } - __raw_writel(settings->ccr_settings, ndfc->ndfcbase + NDFC_CCR); + ccr = NDFC_CCR_BS(ndfc->chip_select); - spin_lock_init(&ndfc->ndfc_control.lock); - init_waitqueue_head(&ndfc->ndfc_control.wq); + /* It is ok if ccr does not exist - just default to 0 */ + reg = of_get_property(ofdev->node, "ccr", NULL); + if (reg) + ccr |= *reg; - platform_set_drvdata(pdev, ndfc); + out_be32(ndfc->ndfcbase + NDFC_CCR, ccr); - printk("NDFC NAND Driver initialized. Chip-Rev: 0x%08x\n", - __raw_readl(ndfc->ndfcbase + NDFC_REVID)); + /* Set the bank settings if given */ + reg = of_get_property(ofdev->node, "bank-settings", NULL); + if (reg) { + int offset = NDFC_BCFG0 + (ndfc->chip_select << 2); + out_be32(ndfc->ndfcbase + offset, *reg); + } + + err = ndfc_chip_init(ndfc, ofdev->node); + if (err) { + iounmap(ndfc->ndfcbase); + return err; + } return 0; } -static int ndfc_nand_remove(struct platform_device *pdev) +static int __devexit ndfc_remove(struct of_device *ofdev) { - struct ndfc_controller *ndfc = platform_get_drvdata(pdev); + struct ndfc_controller *ndfc = dev_get_drvdata(&ofdev->dev); - if (atomic_read(&ndfc->childs_active)) - return -EBUSY; + nand_release(&ndfc->mtd); - if (ndfc) { - platform_set_drvdata(pdev, NULL); - iounmap(ndfc_ctrl.ndfcbase); - ndfc_ctrl.ndfcbase = NULL; - } return 0; } -/* driver device registration */ - -static struct platform_driver ndfc_chip_driver = { - .probe = ndfc_chip_probe, - .remove = ndfc_chip_remove, - .driver = { - .name = "ndfc-chip", - .owner = THIS_MODULE, - }, +static const struct of_device_id ndfc_match[] = { + { .compatible = "ibm,ndfc", }, + {} }; +MODULE_DEVICE_TABLE(of, ndfc_match); -static struct platform_driver ndfc_nand_driver = { - .probe = ndfc_nand_probe, - .remove = ndfc_nand_remove, - .driver = { - .name = "ndfc-nand", - .owner = THIS_MODULE, +static struct of_platform_driver ndfc_driver = { + .driver = { + .name = "ndfc", }, + .match_table = ndfc_match, + .probe = ndfc_probe, + .remove = __devexit_p(ndfc_remove), }; static int __init ndfc_nand_init(void) { - int ret; - - spin_lock_init(&ndfc_ctrl.ndfc_control.lock); - init_waitqueue_head(&ndfc_ctrl.ndfc_control.wq); - - ret = platform_driver_register(&ndfc_nand_driver); - if (!ret) - ret = platform_driver_register(&ndfc_chip_driver); - return ret; + return of_register_platform_driver(&ndfc_driver); } static void __exit ndfc_nand_exit(void) { - platform_driver_unregister(&ndfc_chip_driver); - platform_driver_unregister(&ndfc_nand_driver); + of_unregister_platform_driver(&ndfc_driver); } module_init(ndfc_nand_init); @@ -316,6 +315,4 @@ module_exit(ndfc_nand_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Thomas Gleixner "); -MODULE_DESCRIPTION("Platform driver for NDFC"); -MODULE_ALIAS("platform:ndfc-chip"); -MODULE_ALIAS("platform:ndfc-nand"); +MODULE_DESCRIPTION("OF Platform driver for NDFC"); -- cgit v1.2.3-70-g09d2 From 28405d8d9ce05f5bd869ef8b48da5086f9527d73 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Jan 2009 17:14:31 -0700 Subject: async_tx, dmaengine: document channel allocation and api rework "Wouldn't it be better if the dmaengine layer made sure it didn't pass the same channel several times to a client? I mean, you seem concerned that the memcpy() API should be transparent and easy to use, but the whole registration interface is just ridiculously complicated..." - Haavard The dmaengine and async_tx registration/allocation interface is indeed needlessly complicated. This redesign has the following goals: 1/ Simplify reference counting: dma channels are not something one would expect to be hotplugged, it should be an exceptional event handled by drivers not something clients should be mandated to handle in a callback. The common case channel removal event is 'rmmod ', which for simplicity should be disallowed if the channel is in use. 2/ Add an interface for requesting exclusive access to a channel suitable to device-to-memory users. 3/ Convert all memory-to-memory users over to a common allocator, the goal here is to not have competing channel allocation schemes. The only competition should be between device-to-memory exclusive allocations and the memory-to-memory usage case where channels are shared between multiple "clients". Cc: Haavard Skinnemoen Cc: Neil Brown Cc: Jeff Garzik Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 96 ++++++++++++++++------------------- Documentation/dmaengine.txt | 1 + 2 files changed, 45 insertions(+), 52 deletions(-) create mode 100644 Documentation/dmaengine.txt (limited to 'Documentation') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index c1e9545c59b..9f59fcbf5d8 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -13,9 +13,9 @@ 3.6 Constraints 3.7 Example -4 DRIVER DEVELOPER NOTES +4 DMAENGINE DRIVER DEVELOPER NOTES 4.1 Conformance points -4.2 "My application needs finer control of hardware channels" +4.2 "My application needs exclusive control of hardware channels" 5 SOURCE @@ -150,6 +150,7 @@ ops_run_* and ops_complete_* routines in drivers/md/raid5.c for more implementation examples. 4 DRIVER DEVELOPMENT NOTES + 4.1 Conformance points: There are a few conformance points required in dmaengine drivers to accommodate assumptions made by applications using the async_tx API: @@ -158,58 +159,49 @@ accommodate assumptions made by applications using the async_tx API: 3/ Use async_tx_run_dependencies() in the descriptor clean up path to handle submission of dependent operations -4.2 "My application needs finer control of hardware channels" -This requirement seems to arise from cases where a DMA engine driver is -trying to support device-to-memory DMA. The dmaengine and async_tx -implementations were designed for offloading memory-to-memory -operations; however, there are some capabilities of the dmaengine layer -that can be used for platform-specific channel management. -Platform-specific constraints can be handled by registering the -application as a 'dma_client' and implementing a 'dma_event_callback' to -apply a filter to the available channels in the system. Before showing -how to implement a custom dma_event callback some background of -dmaengine's client support is required. - -The following routines in dmaengine support multiple clients requesting -use of a channel: -- dma_async_client_register(struct dma_client *client) -- dma_async_client_chan_request(struct dma_client *client) - -dma_async_client_register takes a pointer to an initialized dma_client -structure. It expects that the 'event_callback' and 'cap_mask' fields -are already initialized. - -dma_async_client_chan_request triggers dmaengine to notify the client of -all channels that satisfy the capability mask. It is up to the client's -event_callback routine to track how many channels the client needs and -how many it is currently using. The dma_event_callback routine returns a -dma_state_client code to let dmaengine know the status of the -allocation. - -Below is the example of how to extend this functionality for -platform-specific filtering of the available channels beyond the -standard capability mask: - -static enum dma_state_client -my_dma_client_callback(struct dma_client *client, - struct dma_chan *chan, enum dma_state state) -{ - struct dma_device *dma_dev; - struct my_platform_specific_dma *plat_dma_dev; - - dma_dev = chan->device; - plat_dma_dev = container_of(dma_dev, - struct my_platform_specific_dma, - dma_dev); - - if (!plat_dma_dev->platform_specific_capability) - return DMA_DUP; - - . . . -} +4.2 "My application needs exclusive control of hardware channels" +Primarily this requirement arises from cases where a DMA engine driver +is being used to support device-to-memory operations. A channel that is +performing these operations cannot, for many platform specific reasons, +be shared. For these cases the dma_request_channel() interface is +provided. + +The interface is: +struct dma_chan *dma_request_channel(dma_cap_mask_t mask, + dma_filter_fn filter_fn, + void *filter_param); + +Where dma_filter_fn is defined as: +typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); + +When the optional 'filter_fn' parameter is set to NULL +dma_request_channel simply returns the first channel that satisfies the +capability mask. Otherwise, when the mask parameter is insufficient for +specifying the necessary channel, the filter_fn routine can be used to +disposition the available channels in the system. The filter_fn routine +is called once for each free channel in the system. Upon seeing a +suitable channel filter_fn returns DMA_ACK which flags that channel to +be the return value from dma_request_channel. A channel allocated via +this interface is exclusive to the caller, until dma_release_channel() +is called. + +The DMA_PRIVATE capability flag is used to tag dma devices that should +not be used by the general-purpose allocator. It can be set at +initialization time if it is known that a channel will always be +private. Alternatively, it is set when dma_request_channel() finds an +unused "public" channel. + +A couple caveats to note when implementing a driver and consumer: +1/ Once a channel has been privately allocated it will no longer be + considered by the general-purpose allocator even after a call to + dma_release_channel(). +2/ Since capabilities are specified at the device level a dma_device + with multiple channels will either have all channels public, or all + channels private. 5 SOURCE -include/linux/dmaengine.h: core header file for DMA drivers and clients + +include/linux/dmaengine.h: core header file for DMA drivers and api users drivers/dma/dmaengine.c: offload engine channel management routines drivers/dma/: location for offload engine drivers include/linux/async_tx.h: core header file for the async_tx api diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt new file mode 100644 index 00000000000..0c1c2f63c0a --- /dev/null +++ b/Documentation/dmaengine.txt @@ -0,0 +1 @@ +See Documentation/crypto/async-tx-api.txt -- cgit v1.2.3-70-g09d2 From b3881f74b31b7d47d0f1c4d89ac3e7f0b9c05e3e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 5 Jan 2009 22:46:26 -0500 Subject: ext4: Add mount option to set kjournald's I/O priority Signed-off-by: "Theodore Ts'o" Cc: Jens Axboe --- Documentation/filesystems/ext4.txt | 7 +++++++ fs/ext4/super.c | 29 +++++++++++++++++++++++++---- fs/ioprio.c | 3 ++- include/linux/ioprio.h | 2 ++ 4 files changed, 36 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 9ec29d86ff8..8938949b201 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -308,6 +308,13 @@ min_batch_time=usec This parameter sets the commit time (as multi-threaded, synchronous workloads on very fast disks, at the cost of increasing latency. +journal_ioprio=prio The I/O priority (from 0 to 7, where 0 is the + highest priorty) which should be used for I/O + operations submitted by kjournald2 during a + commit operation. This defaults to 3, which is + a slightly higher priority than the default I/O + priority. + Data Mode ========= There are 3 different data modes: diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8036392b212..8ff8709828f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1013,7 +1013,7 @@ enum { Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, - Opt_inode_readahead_blks + Opt_inode_readahead_blks, Opt_journal_ioprio }; static const match_table_t tokens = { @@ -1074,6 +1074,7 @@ static const match_table_t tokens = { {Opt_delalloc, "delalloc"}, {Opt_nodelalloc, "nodelalloc"}, {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, + {Opt_journal_ioprio, "journal_ioprio=%u"}, {Opt_err, NULL}, }; @@ -1098,8 +1099,11 @@ static ext4_fsblk_t get_sb_block(void **data) return sb_block; } +#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) + static int parse_options(char *options, struct super_block *sb, unsigned long *journal_devnum, + unsigned int *journal_ioprio, ext4_fsblk_t *n_blocks_count, int is_remount) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1492,6 +1496,14 @@ set_qf_format: return 0; sbi->s_inode_readahead_blks = option; break; + case Opt_journal_ioprio: + if (match_int(&args[0], &option)) + return 0; + if (option < 0 || option > 7) + break; + *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, + option); + break; default: printk(KERN_ERR "EXT4-fs: Unrecognized mount option \"%s\" " @@ -2035,6 +2047,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) int features; __u64 blocks_count; int err; + unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) @@ -2141,7 +2154,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, DELALLOC); - if (!parse_options((char *) data, sb, &journal_devnum, NULL, 0)) + if (!parse_options((char *) data, sb, &journal_devnum, + &journal_ioprio, NULL, 0)) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -2506,6 +2520,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) default: break; } + set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); no_journal: @@ -3127,6 +3142,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) unsigned long old_sb_flags; struct ext4_mount_options old_opts; ext4_group_t g; + unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; int err; #ifdef CONFIG_QUOTA int i; @@ -3145,11 +3161,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) for (i = 0; i < MAXQUOTAS; i++) old_opts.s_qf_names[i] = sbi->s_qf_names[i]; #endif + if (sbi->s_journal && sbi->s_journal->j_task->io_context) + journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; /* * Allow the "check" option to be passed as a remount option. */ - if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) { + if (!parse_options(data, sb, NULL, &journal_ioprio, + &n_blocks_count, 1)) { err = -EINVAL; goto restore_opts; } @@ -3162,8 +3181,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) es = sbi->s_es; - if (sbi->s_journal) + if (sbi->s_journal) { ext4_init_journal_params(sb, sbi->s_journal); + set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); + } if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || n_blocks_count > ext4_blocks_count(es)) { diff --git a/fs/ioprio.c b/fs/ioprio.c index 3569e0ad86a..1a39ac37094 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -27,7 +27,7 @@ #include #include -static int set_task_ioprio(struct task_struct *task, int ioprio) +int set_task_ioprio(struct task_struct *task, int ioprio) { int err; struct io_context *ioc; @@ -70,6 +70,7 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) task_unlock(task); return err; } +EXPORT_SYMBOL_GPL(set_task_ioprio); asmlinkage long sys_ioprio_set(int which, int who, int ioprio) { diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index f98a656b17e..76dad480884 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -86,4 +86,6 @@ static inline int task_nice_ioclass(struct task_struct *task) */ extern int ioprio_best(unsigned short aprio, unsigned short bprio); +extern int set_task_ioprio(struct task_struct *task, int ioprio); + #endif -- cgit v1.2.3-70-g09d2 From 025dfdafe77f20b3890981a394774baab7b9c827 Mon Sep 17 00:00:00 2001 From: Frederik Schwarzer Date: Thu, 16 Oct 2008 19:02:37 +0200 Subject: trivial: fix then -> than typos in comments and documentation - (better, more, bigger ...) then -> (...) than Signed-off-by: Frederik Schwarzer Signed-off-by: Jiri Kosina --- Documentation/hwmon/abituguru-datasheet | 6 +++--- Documentation/networking/rxrpc.txt | 2 +- Documentation/scsi/ChangeLog.lpfc | 2 +- arch/blackfin/kernel/kgdb.c | 2 +- arch/ia64/kernel/kprobes.c | 2 +- arch/m68k/Kconfig | 2 +- arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c | 2 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/oprofile/cell/spu_profiler.c | 2 +- arch/s390/Kconfig | 2 +- arch/s390/kernel/kprobes.c | 2 +- arch/sparc/kernel/kprobes.c | 2 +- arch/x86/kernel/kprobes.c | 2 +- arch/x86/kernel/mfgpt_32.c | 2 +- drivers/hwmon/fschmd.c | 2 +- drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/message/i2o/i2o_scsi.c | 2 +- drivers/mtd/devices/pmc551.c | 2 +- drivers/mtd/ubi/eba.c | 2 +- drivers/mtd/ubi/io.c | 2 +- drivers/mtd/ubi/scan.c | 2 +- drivers/mtd/ubi/ubi-media.h | 4 ++-- drivers/mtd/ubi/vtbl.c | 2 +- drivers/mtd/ubi/wl.c | 4 ++-- drivers/net/bnx2x_link.c | 2 +- drivers/net/e1000/e1000_hw.c | 4 ++-- drivers/net/slip.h | 2 +- drivers/net/tehuti.c | 4 ++-- drivers/net/tokenring/smctr.c | 2 +- drivers/net/wireless/ipw2x00/ipw2100.c | 2 +- drivers/net/wireless/rt2x00/rt2x00crypto.c | 4 ++-- drivers/net/wireless/strip.c | 2 +- drivers/s390/block/dasd_eer.c | 4 ++-- drivers/s390/char/vmlogrdr.c | 4 ++-- drivers/scsi/lpfc/lpfc_hbadisc.c | 4 ++-- drivers/scsi/lpfc/lpfc_sli.c | 10 +++++----- drivers/serial/crisv10.c | 4 ++-- drivers/video/console/vgacon.c | 2 +- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/proc/task_nommu.c | 2 +- fs/ubifs/Kconfig | 2 +- fs/ubifs/budget.c | 4 ++-- fs/ubifs/gc.c | 2 +- fs/ubifs/journal.c | 2 +- fs/ubifs/shrinker.c | 2 +- fs/xfs/linux-2.6/xfs_super.c | 2 +- include/linux/mtd/mtd.h | 2 +- include/linux/spi/spi.h | 4 ++-- include/mtd/ubi-user.h | 2 +- kernel/pid.c | 2 +- kernel/time/jiffies.c | 2 +- net/sctp/auth.c | 4 ++-- net/sctp/sm_statefuns.c | 6 +++--- net/sctp/socket.c | 2 +- net/sctp/tsnmap.c | 2 +- sound/usb/usx2y/usbusx2y.c | 2 +- 56 files changed, 76 insertions(+), 76 deletions(-) (limited to 'Documentation') diff --git a/Documentation/hwmon/abituguru-datasheet b/Documentation/hwmon/abituguru-datasheet index aef5a9b3684..4d184f2db0e 100644 --- a/Documentation/hwmon/abituguru-datasheet +++ b/Documentation/hwmon/abituguru-datasheet @@ -74,7 +74,7 @@ a sensor. Notice that some banks have both a read and a write address this is how the uGuru determines if a read from or a write to the bank is taking place, thus when reading you should always use the read address and when writing the -write address. The write address is always one (1) more then the read address. +write address. The write address is always one (1) more than the read address. uGuru ready @@ -224,7 +224,7 @@ Bit 3: Beep if alarm (RW) Bit 4: 1 if alarm cause measured temp is over the warning threshold (R) Bit 5: 1 if alarm cause measured volt is over the max threshold (R) Bit 6: 1 if alarm cause measured volt is under the min threshold (R) -Bit 7: Volt sensor: Shutdown if alarm persist for more then 4 seconds (RW) +Bit 7: Volt sensor: Shutdown if alarm persist for more than 4 seconds (RW) Temp sensor: Shutdown if temp is over the shutdown threshold (RW) * This bit is only honored/used by the uGuru if a temp sensor is connected @@ -293,7 +293,7 @@ Byte 0: Alarm behaviour for the selected sensor. A 1 enables the described behaviour. Bit 0: Give an alarm if measured rpm is under the min threshold (RW) Bit 3: Beep if alarm (RW) -Bit 7: Shutdown if alarm persist for more then 4 seconds (RW) +Bit 7: Shutdown if alarm persist for more than 4 seconds (RW) Byte 1: min threshold (scale as bank 0x26) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index c3669a3fb4a..60d05eb77c6 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -540,7 +540,7 @@ A client would issue an operation by: MSG_MORE should be set in msghdr::msg_flags on all but the last part of the request. Multiple requests may be made simultaneously. - If a call is intended to go to a destination other then the default + If a call is intended to go to a destination other than the default specified through connect(), then msghdr::msg_name should be set on the first request message of that call. diff --git a/Documentation/scsi/ChangeLog.lpfc b/Documentation/scsi/ChangeLog.lpfc index ae3f962a7cf..ff19a52fe00 100644 --- a/Documentation/scsi/ChangeLog.lpfc +++ b/Documentation/scsi/ChangeLog.lpfc @@ -733,7 +733,7 @@ Changes from 20040920 to 20041018 I/O completion path a little more, especially taking care of fast-pathing the non-error case. Also removes tons of dead members and defines from lpfc_scsi.h - e.g. lpfc_target is down - to nothing more then the lpfc_nodelist pointer. + to nothing more than the lpfc_nodelist pointer. * Added binary sysfs file to issue mbox commands * Replaced #if __BIG_ENDIAN with #if __BIG_ENDIAN_BITFIELD for compatibility with the user space applications. diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c index b795a207742..1c5afaeb950 100644 --- a/arch/blackfin/kernel/kgdb.c +++ b/arch/blackfin/kernel/kgdb.c @@ -105,7 +105,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) * Extracts ebp, esp and eip values understandable by gdb from the values * saved by switch_to. * thread.esp points to ebp. flags and ebp are pushed in switch_to hence esp - * prior to entering switch_to is 8 greater then the value that is saved. + * prior to entering switch_to is 8 greater than the value that is saved. * If switch_to changes, change following code appropriately. */ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index f07688da947..0017b9de2dd 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -434,7 +434,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) /* * It is possible to have multiple instances associated with a given * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more then one return + * have a return probe installed on them, and/or more than one return * return probe was registered for a target function. * * We can handle this because: diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index c825bde17cb..fb87c08c6b5 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -303,7 +303,7 @@ config M68KFPU_EMU_EXTRAPREC correct rounding, the emulator can (often) do the same but this extra calculation can cost quite some time, so you can disable it here. The emulator will then "only" calculate with a 64 bit - mantissa and round slightly incorrect, what is more then enough + mantissa and round slightly incorrect, what is more than enough for normal usage. config M68KFPU_EMU_ONLY diff --git a/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c b/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c index 97862f45496..caf5e9a0acc 100644 --- a/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c +++ b/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c @@ -148,7 +148,7 @@ int read_eeprom(char *buffer, int eeprom_size, int size) send_byte(W_HEADER); recv_ack(); - /* EEPROM with size of more then 2K need two byte addressing */ + /* EEPROM with size of more than 2K need two byte addressing */ if (eeprom_size > 2048) { send_byte(0x00); recv_ack(); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index de79915452c..b29005a5a8f 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -316,7 +316,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, /* * It is possible to have multiple instances associated with a given * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more then one return + * have a return probe installed on them, and/or more than one return * return probe was registered for a target function. * * We can handle this because: diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index dd499c3e9da..83faa958b9d 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -49,7 +49,7 @@ void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_rese * of precision. This is close enough for the purpose at hand. * * The value of the timeout should be small enough that the hw - * trace buffer will not get more then about 1/3 full for the + * trace buffer will not get more than about 1/3 full for the * maximum user specified (the LFSR value) hw sampling frequency. * This is to ensure the trace buffer will never fill even if the * kernel thread scheduling varies under a heavy system load. diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 19577aeffd7..a94a3c3ae93 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -299,7 +299,7 @@ config WARN_STACK This option enables the compiler options -mwarn-framesize and -mwarn-dynamicstack. If the compiler supports these options it will generate warnings for function which either use alloca or - create a stack frame bigger then CONFIG_WARN_STACK_SIZE. + create a stack frame bigger than CONFIG_WARN_STACK_SIZE. Say N if you are unsure. diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 569079ec4ff..267f6698680 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -381,7 +381,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, /* * It is possible to have multiple instances associated with a given * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more then one return + * have a return probe installed on them, and/or more than one return * return probe was registered for a target function. * * We can handle this because: diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c index 201a6e547e4..3bc6527c95a 100644 --- a/arch/sparc/kernel/kprobes.c +++ b/arch/sparc/kernel/kprobes.c @@ -517,7 +517,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) /* * It is possible to have multiple instances associated with a given * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more then one return + * have a return probe installed on them, and/or more than one return * return probe was registered for a target function. * * We can handle this because: diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 6c27679ec6a..a116e6d5726 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -694,7 +694,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) /* * It is possible to have multiple instances associated with a given * task either because multiple functions in the call path have - * return probes installed on them, and/or more then one + * return probes installed on them, and/or more than one * return probe was registered for a target function. * * We can handle this because: diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index c12314c9e86..8815f3c7fec 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -252,7 +252,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); /* * The MFPGT timers on the CS5536 provide us with suitable timers to use * as clock event sources - not as good as a HPET or APIC, but certainly - * better then the PIT. This isn't a general purpose MFGPT driver, but + * better than the PIT. This isn't a general purpose MFGPT driver, but * a simplified one designed specifically to act as a clock event source. * For full details about the MFGPT, please consult the CS5536 data sheet. */ diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c index 96717036893..8b2d756595d 100644 --- a/drivers/hwmon/fschmd.c +++ b/drivers/hwmon/fschmd.c @@ -75,7 +75,7 @@ static const u8 FSCHMD_REG_VOLT[3] = { 0x45, 0x42, 0x48 }; /* minimum pwm at which the fan is driven (pwm can by increased depending on the temp. Notice that for the scy some fans share there minimum speed. - Also notice that with the scy the sensor order is different then with the + Also notice that with the scy the sensor order is different than with the other chips, this order was in the 2.4 driver and kept for consistency. */ static const u8 FSCHMD_REG_FAN_MIN[5][6] = { { 0x55, 0x65 }, /* pos */ diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index a3c5af1d7ec..de5263beab4 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -367,7 +367,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) if (err) goto out; } else { - /* Can't be smaller then the number of outstanding CQEs */ + /* Can't be smaller than the number of outstanding CQEs */ outst_cqe = mlx4_ib_get_outstanding_cqes(cq); if (entries < outst_cqe + 1) { err = 0; diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c index 1bcdbbb9e7d..3d45817e6dc 100644 --- a/drivers/message/i2o/i2o_scsi.c +++ b/drivers/message/i2o/i2o_scsi.c @@ -390,7 +390,7 @@ static int i2o_scsi_reply(struct i2o_controller *c, u32 m, * @i2o_dev: the I2O device which was added * * If a I2O device is added we catch the notification, because I2O classes - * other then SCSI peripheral will not be received through + * other than SCSI peripheral will not be received through * i2o_scsi_probe(). */ static void i2o_scsi_notify_device_add(struct i2o_device *i2o_dev) diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c index d38bca64bb1..d2fd550f7e0 100644 --- a/drivers/mtd/devices/pmc551.c +++ b/drivers/mtd/devices/pmc551.c @@ -34,7 +34,7 @@ * aperture size, not the dram size, and the V370PDC supplies no * other method for memory size discovery. This problem is * mostly only relevant when compiled as a module, as the - * unloading of the module with an aperture size smaller then + * unloading of the module with an aperture size smaller than * the ram will cause the driver to detect the onboard memory * size to be equal to the aperture size when the module is * reloaded. Soooo, to help, the module supports an msize diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 048a606cebd..25def348e5b 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -717,7 +717,7 @@ write_error: * to the real data size, although the @buf buffer has to contain the * alignment. In all other cases, @len has to be aligned. * - * It is prohibited to write more then once to logical eraseblocks of static + * It is prohibited to write more than once to logical eraseblocks of static * volumes. This function returns zero in case of success and a negative error * code in case of failure. */ diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index a74118c0574..fe81039f2a7 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -465,7 +465,7 @@ out: * This function synchronously erases physical eraseblock @pnum. If @torture * flag is not zero, the physical eraseblock is checked by means of writing * different patterns to it and reading them back. If the torturing is enabled, - * the physical eraseblock is erased more then once. + * the physical eraseblock is erased more than once. * * This function returns the number of erasures made in case of success, %-EIO * if the erasure failed or the torturing test failed, and other negative error diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 41d47e1cf15..ecde202a5a1 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -478,7 +478,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, return 0; } else { /* - * This logical eraseblock is older then the one found + * This logical eraseblock is older than the one found * previously. */ if (cmp_res & 4) diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h index 2ad94040905..8419fdccc79 100644 --- a/drivers/mtd/ubi/ubi-media.h +++ b/drivers/mtd/ubi/ubi-media.h @@ -135,7 +135,7 @@ enum { * The erase counter header takes 64 bytes and has a plenty of unused space for * future usage. The unused fields are zeroed. The @version field is used to * indicate the version of UBI implementation which is supposed to be able to - * work with this UBI image. If @version is greater then the current UBI + * work with this UBI image. If @version is greater than the current UBI * version, the image is rejected. This may be useful in future if something * is changed radically. This field is duplicated in the volume identifier * header. @@ -187,7 +187,7 @@ struct ubi_ec_hdr { * (sequence number) is used to distinguish between older and newer versions of * logical eraseblocks. * - * There are 2 situations when there may be more then one physical eraseblock + * There are 2 situations when there may be more than one physical eraseblock * corresponding to the same logical eraseblock, i.e., having the same @vol_id * and @lnum values in the volume identifier header. Suppose we have a logical * eraseblock L and it is mapped to the physical eraseblock P. diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 333c8941552..1afc61e7455 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -577,7 +577,7 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) { /* Auto re-size flag may be set only for one volume */ if (ubi->autoresize_vol_id != -1) { - ubi_err("more then one auto-resize volume (%d " + ubi_err("more than one auto-resize volume (%d " "and %d)", ubi->autoresize_vol_id, i); kfree(vol); return -EINVAL; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 14901cb82c1..891534f8210 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -128,7 +128,7 @@ * situation when the picked physical eraseblock is constantly erased after the * data is written to it. So, we have a constant which limits the highest erase * counter of the free physical eraseblock to pick. Namely, the WL sub-system - * does not pick eraseblocks with erase counter greater then the lowest erase + * does not pick eraseblocks with erase counter greater than the lowest erase * counter plus %WL_FREE_MAX_DIFF. */ #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) @@ -917,7 +917,7 @@ static int ensure_wear_leveling(struct ubi_device *ubi) /* * We schedule wear-leveling only if the difference between the * lowest erase counter of used physical eraseblocks and a high - * erase counter of free physical eraseblocks is greater then + * erase counter of free physical eraseblocks is greater than * %UBI_WL_THRESHOLD. */ e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); diff --git a/drivers/net/bnx2x_link.c b/drivers/net/bnx2x_link.c index 67de94f1f30..fefa6ab1306 100644 --- a/drivers/net/bnx2x_link.c +++ b/drivers/net/bnx2x_link.c @@ -3359,7 +3359,7 @@ static u8 bnx2x_format_ver(u32 num, u8 *str, u16 len) u8 shift = 8*4; u8 digit; if (len < 10) { - /* Need more then 10chars for this format */ + /* Need more than 10chars for this format */ *str_ptr = '\0'; return -EINVAL; } diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c index d04eef53571..e1a3fc1303e 100644 --- a/drivers/net/e1000/e1000_hw.c +++ b/drivers/net/e1000/e1000_hw.c @@ -6758,7 +6758,7 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length, * returns: - E1000_ERR_XXX * E1000_SUCCESS * - * For phy's older then IGP, this function simply reads the polarity bit in the + * For phy's older than IGP, this function simply reads the polarity bit in the * Phy Status register. For IGP phy's, this bit is valid only if link speed is * 10 Mbps. If the link speed is 100 Mbps there is no polarity so this bit will * return 0. If the link speed is 1000 Mbps the polarity status is in the @@ -6834,7 +6834,7 @@ static s32 e1000_check_polarity(struct e1000_hw *hw, * returns: - E1000_ERR_XXX * E1000_SUCCESS * - * For phy's older then IGP, this function reads the Downshift bit in the Phy + * For phy's older than IGP, this function reads the Downshift bit in the Phy * Specific Status register. For IGP phy's, it reads the Downgrade bit in the * Link Health register. In IGP this bit is latched high, so the driver must * read it immediately after link is established. diff --git a/drivers/net/slip.h b/drivers/net/slip.h index 853e0f6ec71..9ea5c11287d 100644 --- a/drivers/net/slip.h +++ b/drivers/net/slip.h @@ -75,7 +75,7 @@ struct slip { unsigned long tx_errors; /* Planned stuff */ unsigned long rx_dropped; /* No memory for skb */ unsigned long tx_dropped; /* When MTU change */ - unsigned long rx_over_errors; /* Frame bigger then SLIP buf. */ + unsigned long rx_over_errors; /* Frame bigger than SLIP buf. */ #ifdef SL_INCLUDE_CSLIP unsigned long tx_compressed; unsigned long rx_compressed; diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index a10a83a11d9..a7a4dc4d631 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -1004,7 +1004,7 @@ static inline void bdx_rxdb_free_elem(struct rxdb *db, int n) * skb for rx. It assumes that Rx is desabled in HW * funcs are grouped for better cache usage * - * RxD fifo is smaller then RxF fifo by design. Upon high load, RxD will be + * RxD fifo is smaller than RxF fifo by design. Upon high load, RxD will be * filled and packets will be dropped by nic without getting into host or * cousing interrupt. Anyway, in that condition, host has no chance to proccess * all packets, but dropping in nic is cheaper, since it takes 0 cpu cycles @@ -1826,7 +1826,7 @@ static void bdx_tx_free(struct bdx_priv *priv) * * Pushes desc to TxD fifo and overlaps it if needed. * NOTE: this func does not check for available space. this is responsibility - * of the caller. Neither does it check that data size is smaller then + * of the caller. Neither does it check that data size is smaller than * fifo size. */ static void bdx_tx_push_desc(struct bdx_priv *priv, void *data, int size) diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c index a011666342f..50eb29ce3c8 100644 --- a/drivers/net/tokenring/smctr.c +++ b/drivers/net/tokenring/smctr.c @@ -3064,7 +3064,7 @@ static int smctr_load_node_addr(struct net_device *dev) * will consequently cause a timeout. * * NOTE 1: If the monitor_state is MS_BEACON_TEST_STATE, all transmit - * queues other then the one used for the lobe_media_test should be + * queues other than the one used for the lobe_media_test should be * disabled.!? * * NOTE 2: If the monitor_state is MS_BEACON_TEST_STATE and the receive_mask diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index 1667065b86a..753de1a9c4b 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -1332,7 +1332,7 @@ static int ipw2100_power_cycle_adapter(struct ipw2100_priv *priv) IPW_AUX_HOST_RESET_REG_STOP_MASTER); /* Step 2. Wait for stop Master Assert - * (not more then 50us, otherwise ret error */ + * (not more than 50us, otherwise ret error */ i = 5; do { udelay(IPW_WAIT_RESET_MASTER_ASSERT_COMPLETE_DELAY); diff --git a/drivers/net/wireless/rt2x00/rt2x00crypto.c b/drivers/net/wireless/rt2x00/rt2x00crypto.c index 37ad0d2fb64..aee9cba13eb 100644 --- a/drivers/net/wireless/rt2x00/rt2x00crypto.c +++ b/drivers/net/wireless/rt2x00/rt2x00crypto.c @@ -184,8 +184,8 @@ void rt2x00crypto_rx_insert_iv(struct sk_buff *skb, unsigned int align, * Make room for new data, note that we increase both * headsize and tailsize when required. The tailsize is * only needed when ICV data needs to be inserted and - * the padding is smaller then the ICV data. - * When alignment requirements is greater then the + * the padding is smaller than the ICV data. + * When alignment requirements is greater than the * ICV data we must trim the skb to the correct size * because we need to remove the extra bytes. */ diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index dd0de3a9ed4..7015f248055 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -236,7 +236,7 @@ struct strip { unsigned long tx_errors; /* Planned stuff */ unsigned long rx_dropped; /* No memory for skb */ unsigned long tx_dropped; /* When MTU change */ - unsigned long rx_over_errors; /* Frame bigger then STRIP buf. */ + unsigned long rx_over_errors; /* Frame bigger than STRIP buf. */ unsigned long pps_timer; /* Timer to determine pps */ unsigned long rx_pps_count; /* Counter to determine pps */ diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index 892e2878d61..f8e05ce9862 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -535,8 +535,8 @@ static int dasd_eer_open(struct inode *inp, struct file *filp) eerb->buffer_page_count > INT_MAX / PAGE_SIZE) { kfree(eerb); MESSAGE(KERN_WARNING, "can't open device since module " - "parameter eer_pages is smaller then 1 or" - " bigger then %d", (int)(INT_MAX / PAGE_SIZE)); + "parameter eer_pages is smaller than 1 or" + " bigger than %d", (int)(INT_MAX / PAGE_SIZE)); unlock_kernel(); return -EINVAL; } diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index aabbeb909cc..d8a2289fcb6 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -427,7 +427,7 @@ static int vmlogrdr_receive_data(struct vmlogrdr_priv_t *priv) buffer = priv->buffer + sizeof(int); } /* - * If the record is bigger then our buffer, we receive only + * If the record is bigger than our buffer, we receive only * a part of it. We can get the rest later. */ if (iucv_data_count > NET_BUFFER_SIZE) @@ -437,7 +437,7 @@ static int vmlogrdr_receive_data(struct vmlogrdr_priv_t *priv) 0, buffer, iucv_data_count, &priv->residual_length); spin_unlock_bh(&priv->priv_lock); - /* An rc of 5 indicates that the record was bigger then + /* An rc of 5 indicates that the record was bigger than * the buffer, which is OK for us. A 9 indicates that the * record was purged befor we could receive it. */ diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 8c64494444b..311ed6dea72 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1964,10 +1964,10 @@ lpfc_set_disctmo(struct lpfc_vport *vport) uint32_t tmo; if (vport->port_state == LPFC_LOCAL_CFG_LINK) { - /* For FAN, timeout should be greater then edtov */ + /* For FAN, timeout should be greater than edtov */ tmo = (((phba->fc_edtov + 999) / 1000) + 1); } else { - /* Normal discovery timeout should be > then ELS/CT timeout + /* Normal discovery timeout should be > than ELS/CT timeout * FC spec states we need 3 * ratov for CT requests */ tmo = ((phba->fc_ratov * 3) + 3); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 01dfdc8696f..a36a120561e 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -420,7 +420,7 @@ lpfc_sli_next_iocb_slot (struct lpfc_hba *phba, struct lpfc_sli_ring *pring) if (unlikely(pring->local_getidx >= max_cmd_idx)) { lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0315 Ring %d issue: portCmdGet %d " - "is bigger then cmd ring %d\n", + "is bigger than cmd ring %d\n", pring->ringno, pring->local_getidx, max_cmd_idx); @@ -1628,12 +1628,12 @@ lpfc_sli_rsp_pointers_error(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { struct lpfc_pgp *pgp = &phba->port_gp[pring->ringno]; /* - * Ring handler: portRspPut is bigger then + * Ring handler: portRspPut is bigger than * rsp ring */ lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0312 Ring %d handler: portRspPut %d " - "is bigger then rsp ring %d\n", + "is bigger than rsp ring %d\n", pring->ringno, le32_to_cpu(pgp->rspPutInx), pring->numRiocb); @@ -2083,12 +2083,12 @@ lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba, portRspPut = le32_to_cpu(pgp->rspPutInx); if (portRspPut >= portRspMax) { /* - * Ring handler: portRspPut is bigger then + * Ring handler: portRspPut is bigger than * rsp ring */ lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0303 Ring %d handler: portRspPut %d " - "is bigger then rsp ring %d\n", + "is bigger than rsp ring %d\n", pring->ringno, portRspPut, portRspMax); phba->link_state = LPFC_HBA_ERROR; diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c index 8b2c619a09f..e642c22c80e 100644 --- a/drivers/serial/crisv10.c +++ b/drivers/serial/crisv10.c @@ -1203,7 +1203,7 @@ static void e100_disable_txdma_channel(struct e100_serial *info) unsigned long flags; /* Disable output DMA channel for the serial port in question - * ( set to something other then serialX) + * ( set to something other than serialX) */ local_irq_save(flags); DFLOW(DEBUG_LOG(info->line, "disable_txdma_channel %i\n", info->line)); @@ -1266,7 +1266,7 @@ static void e100_disable_rxdma_channel(struct e100_serial *info) unsigned long flags; /* Disable input DMA channel for the serial port in question - * ( set to something other then serialX) + * ( set to something other than serialX) */ local_irq_save(flags); if (info->line == 0) { diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index e6210725b9a..d012edda6d1 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1332,7 +1332,7 @@ static void vgacon_save_screen(struct vc_data *c) c->vc_y = screen_info.orig_y; } - /* We can't copy in more then the size of the video buffer, + /* We can't copy in more than the size of the video buffer, * or we'll be copying in VGA BIOS */ if (!vga_is_gfx) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 6ebaa58e2c0..04697ba7f73 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -854,7 +854,7 @@ static int o2hb_thread(void *data) while (!kthread_should_stop() && !reg->hr_unclean_stop) { /* We track the time spent inside - * o2hb_do_disk_heartbeat so that we avoid more then + * o2hb_do_disk_heartbeat so that we avoid more than * hr_timeout_ms between disk writes. On busy systems * this should result in a heartbeat which is less * likely to time itself out. */ diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 219bd79ea89..d4a8be32b90 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -9,7 +9,7 @@ /* * Logic: we've got two memory sums for each process, "shared", and - * "non-shared". Shared memory may get counted more then once, for + * "non-shared". Shared memory may get counted more than once, for * each process that owns it. Non-shared memory is counted * accurately. */ diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index 91ceeda7e5b..e35b54d5059 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -40,7 +40,7 @@ config UBIFS_FS_ZLIB depends on UBIFS_FS default y help - Zlib copresses better then LZO but it is slower. Say 'Y' if unsure. + Zlib compresses better than LZO but it is slower. Say 'Y' if unsure. # Debugging-related stuff config UBIFS_FS_DEBUG diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 0e5e54d8292..175f9c590b7 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -142,7 +142,7 @@ static long long get_liability(struct ubifs_info *c) * * This function is called when an operation cannot be budgeted because there * is supposedly no free space. But in most cases there is some free space: - * o budgeting is pessimistic, so it always budgets more then it is actually + * o budgeting is pessimistic, so it always budgets more than it is actually * needed, so shrinking the liability is one way to make free space - the * cached data will take less space then it was budgeted for; * o GC may turn some dark space into free space (budgeting treats dark space @@ -606,7 +606,7 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) * @c: UBIFS file-system description object * * This function converts budget which was allocated for a new page of data to - * the budget of changing an existing page of data. The latter is smaller then + * the budget of changing an existing page of data. The latter is smaller than * the former, so this function only does simple re-calculation and does not * involve any write-back. */ diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 0bef6501d58..9832f9abe28 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -45,7 +45,7 @@ #define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ /* - * GC may need to move more then one LEB to make progress. The below constants + * GC may need to move more than one LEB to make progress. The below constants * define "soft" and "hard" limits on the number of LEBs the garbage collector * may move. */ diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 10ae25b7d1d..9b7c54e0cd2 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -191,7 +191,7 @@ again: if (wbuf->lnum != -1 && avail >= len) { /* * Someone else has switched the journal head and we have - * enough space now. This happens when more then one process is + * enough space now. This happens when more than one process is * trying to write to the same journal head at the same time. */ dbg_jnl("return LEB %d back, already have LEB %d:%d", diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index f248533841a..e7bab52a141 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -151,7 +151,7 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) * @contention: if any contention, this is set to %1 * * This function walks the list of mounted UBIFS file-systems and frees clean - * znodes which are older then @age, until at least @nr znodes are freed. + * znodes which are older than @age, until at least @nr znodes are freed. * Returns the number of freed znodes. */ static int shrink_tnc_trees(int nr, int age, int *contention) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 36f6cc703ef..be846d606ae 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1348,7 +1348,7 @@ xfs_finish_flags( { int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); - /* Fail a mount where the logbuf is smaller then the log stripe */ + /* Fail a mount where the logbuf is smaller than the log stripe */ if (xfs_sb_version_haslogv2(&mp->m_sb)) { if (mp->m_logbsize <= 0 && mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index eae26bb6430..64433eb411d 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -83,7 +83,7 @@ typedef enum { * @datbuf: data buffer - if NULL only oob data are read/written * @oobbuf: oob data buffer * - * Note, it is allowed to read more then one OOB area at one go, but not write. + * Note, it is allowed to read more than one OOB area at one go, but not write. * The interface assumes that the OOB write requests program only one page's * OOB area. */ diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 82229317753..68bb1c501d0 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -327,9 +327,9 @@ extern struct spi_master *spi_busnum_to_master(u16 busnum); * @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped * @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped * @len: size of rx and tx buffers (in bytes) - * @speed_hz: Select a speed other then the device default for this + * @speed_hz: Select a speed other than the device default for this * transfer. If 0 the default (from @spi_device) is used. - * @bits_per_word: select a bits_per_word other then the device default + * @bits_per_word: select a bits_per_word other than the device default * for this transfer. If 0 the default (from @spi_device) is used. * @cs_change: affects chipselect after this transfer completes * @delay_usecs: microseconds to delay after this transfer before diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index ccdc562e444..2dc2eb2b8e2 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -253,7 +253,7 @@ struct ubi_mkvol_req { * * Re-sizing is possible for both dynamic and static volumes. But while dynamic * volumes may be re-sized arbitrarily, static volumes cannot be made to be - * smaller then the number of bytes they bear. To arbitrarily shrink a static + * smaller than the number of bytes they bear. To arbitrarily shrink a static * volume, it must be wiped out first (by means of volume update operation with * zero number of bytes). */ diff --git a/kernel/pid.c b/kernel/pid.c index 064e76afa50..af9224cdd6c 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -475,7 +475,7 @@ pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) EXPORT_SYMBOL(task_session_nr_ns); /* - * Used by proc to find the first pid that is greater then or equal to nr. + * Used by proc to find the first pid that is greater than or equal to nr. * * If there is a pid at nr this function is exactly the same as find_pid_ns. */ diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 1ca99557e92..06f197560f3 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -45,7 +45,7 @@ * * The value 8 is somewhat carefully chosen, as anything * larger can result in overflows. NSEC_PER_JIFFY grows as - * HZ shrinks, so values greater then 8 overflow 32bits when + * HZ shrinks, so values greater than 8 overflow 32bits when * HZ=100. */ #define JIFFIES_SHIFT 8 diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 52db5f60daa..20c576f530f 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -141,8 +141,8 @@ void sctp_auth_destroy_keys(struct list_head *keys) /* Compare two byte vectors as numbers. Return values * are: * 0 - vectors are equal - * < 0 - vector 1 is smaller then vector2 - * > 0 - vector 1 is greater then vector2 + * < 0 - vector 1 is smaller than vector2 + * > 0 - vector 1 is greater than vector2 * * Algorithm is: * This is performed by selecting the numerically smaller key vector... diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 1c4e5d6c29c..3a0cd075914 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4268,9 +4268,9 @@ nomem: /* * Handle a protocol violation when the chunk length is invalid. - * "Invalid" length is identified as smaller then the minimal length a + * "Invalid" length is identified as smaller than the minimal length a * given chunk can be. For example, a SACK chunk has invalid length - * if it's length is set to be smaller then the size of sctp_sack_chunk_t. + * if its length is set to be smaller than the size of sctp_sack_chunk_t. * * We inform the other end by sending an ABORT with a Protocol Violation * error code. @@ -4300,7 +4300,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( /* * Handle a protocol violation when the parameter length is invalid. - * "Invalid" length is identified as smaller then the minimal length a + * "Invalid" length is identified as smaller than the minimal length a * given parameter can be. */ static sctp_disposition_t sctp_sf_violation_paramlen( diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b14a8f33e42..ff0a8f88de0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2717,7 +2717,7 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int o paths++; } - /* Only validate asocmaxrxt if we have more then + /* Only validate asocmaxrxt if we have more than * one path/transport. We do this because path * retransmissions are only counted when we have more * then one path. diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 35c73e82553..9bd64565021 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -227,7 +227,7 @@ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn) */ bitmap_zero(map->tsn_map, map->len); } else { - /* If the gap is smaller then the map size, + /* If the gap is smaller than the map size, * shift the map by 'gap' bits and update further. */ bitmap_shift_right(map->tsn_map, map->tsn_map, gap, map->len); diff --git a/sound/usb/usx2y/usbusx2y.c b/sound/usb/usx2y/usbusx2y.c index ca26c532e77..11639bd72a5 100644 --- a/sound/usb/usx2y/usbusx2y.c +++ b/sound/usb/usx2y/usbusx2y.c @@ -238,7 +238,7 @@ static void i_usX2Y_In04Int(struct urb *urb) send = 0; for (j = 0; j < URBS_AsyncSeq && !err; ++j) if (0 == usX2Y->AS04.urb[j]->status) { - struct us428_p4out *p4out = us428ctls->p4out + send; // FIXME if more then 1 p4out is new, 1 gets lost. + struct us428_p4out *p4out = us428ctls->p4out + send; // FIXME if more than 1 p4out is new, 1 gets lost. usb_fill_bulk_urb(usX2Y->AS04.urb[j], usX2Y->chip.dev, usb_sndbulkpipe(usX2Y->chip.dev, 0x04), &p4out->val.vol, p4out->type == eLT_Light ? sizeof(struct us428_lights) : 5, -- cgit v1.2.3-70-g09d2 From 0211a9c8508b2183e0e539509aad60414f1c3813 Mon Sep 17 00:00:00 2001 From: Frederik Schwarzer Date: Mon, 29 Dec 2008 22:14:56 +0100 Subject: trivial: fix an -> a typos in documentation and comments It is always "an" if there is a vowel _spoken_ (not written). So it is: "an hour" (spoken vowel) but "a uniform" (spoken 'j') Signed-off-by: Frederik Schwarzer Signed-off-by: Jiri Kosina --- Documentation/dell_rbu.txt | 4 ++-- Documentation/laptops/thinkpad-acpi.txt | 2 +- Documentation/networking/tuntap.txt | 2 +- arch/m68k/kernel/traps.c | 2 +- drivers/acpi/executer/exprep.c | 2 +- drivers/acpi/executer/exresolv.c | 2 +- drivers/acpi/executer/exstore.c | 2 +- drivers/acpi/resources/rscreate.c | 2 +- drivers/acpi/utilities/utobject.c | 4 ++-- drivers/char/epca.c | 2 +- drivers/cpufreq/Kconfig | 4 ++-- drivers/input/keyboard/atkbd.c | 2 +- drivers/macintosh/Kconfig | 2 +- drivers/misc/phantom.c | 2 +- fs/ncpfs/ioctl.c | 2 +- include/acpi/acmacros.h | 4 ++-- include/acpi/actypes.h | 2 +- include/linux/ncp_fs.h | 2 +- mm/slub.c | 2 +- sound/oss/aedsp16.c | 2 +- 20 files changed, 24 insertions(+), 24 deletions(-) (limited to 'Documentation') diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt index 2c0d631de0c..c11b931f8f9 100644 --- a/Documentation/dell_rbu.txt +++ b/Documentation/dell_rbu.txt @@ -81,8 +81,8 @@ Until this step is completed the driver cannot be unloaded. Also echoing either mono ,packet or init in to image_type will free up the memory allocated by the driver. -If an user by accident executes steps 1 and 3 above without executing step 2; -it will make the /sys/class/firmware/dell_rbu/ entries to disappear. +If a user by accident executes steps 1 and 3 above without executing step 2; +it will make the /sys/class/firmware/dell_rbu/ entries disappear. The entries can be recreated by doing the following echo init > /sys/devices/platform/dell_rbu/image_type NOTE: echoing init in image_type does not change it original value. diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index 71f0fe1fc1b..898b4987bb8 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -1475,7 +1475,7 @@ Sysfs interface changelog: 0x020100: Marker for thinkpad-acpi with hot key NVRAM polling support. If you must, use it to know you should not - start an userspace NVRAM poller (allows to detect when + start a userspace NVRAM poller (allows to detect when NVRAM is compiled out by the user because it is unneeded/undesired in the first place). 0x020101: Marker for thinkpad-acpi with hot key NVRAM polling diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt index 839cbb71388..c0aab985bad 100644 --- a/Documentation/networking/tuntap.txt +++ b/Documentation/networking/tuntap.txt @@ -118,7 +118,7 @@ As mentioned above, main purpose of TUN/TAP driver is tunneling. It is used by VTun (http://vtun.sourceforge.net). Another interesting application using TUN/TAP is pipsecd -(http://perso.enst.fr/~beyssac/pipsec/), an userspace IPSec +(http://perso.enst.fr/~beyssac/pipsec/), a userspace IPSec implementation that can use complete kernel routing (unlike FreeS/WAN). 3. How does Virtual network device actually work ? diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 6d813de2baf..184acc90808 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -401,7 +401,7 @@ static inline void do_040writebacks(struct frame *fp) * called from sigreturn(), must ensure userspace code didn't * manipulate exception frame to circumvent protection, then complete * pending writebacks - * we just clear TM2 to turn it into an userspace access + * we just clear TM2 to turn it into a userspace access */ asmlinkage void berr_040cleanup(struct frame *fp) { diff --git a/drivers/acpi/executer/exprep.c b/drivers/acpi/executer/exprep.c index 5d438c32989..a7dc87ecee3 100644 --- a/drivers/acpi/executer/exprep.c +++ b/drivers/acpi/executer/exprep.c @@ -404,7 +404,7 @@ acpi_ex_prep_common_field_object(union acpi_operand_object *obj_desc, * * RETURN: Status * - * DESCRIPTION: Construct an union acpi_operand_object of type def_field and + * DESCRIPTION: Construct a union acpi_operand_object of type def_field and * connect it to the parent Node. * ******************************************************************************/ diff --git a/drivers/acpi/executer/exresolv.c b/drivers/acpi/executer/exresolv.c index 89571b92a52..60e8c47128e 100644 --- a/drivers/acpi/executer/exresolv.c +++ b/drivers/acpi/executer/exresolv.c @@ -146,7 +146,7 @@ acpi_ex_resolve_object_to_value(union acpi_operand_object **stack_ptr, stack_desc = *stack_ptr; - /* This is an union acpi_operand_object */ + /* This is a union acpi_operand_object */ switch (ACPI_GET_OBJECT_TYPE(stack_desc)) { case ACPI_TYPE_LOCAL_REFERENCE: diff --git a/drivers/acpi/executer/exstore.c b/drivers/acpi/executer/exstore.c index 3318df4cbd9..1c118ba78ad 100644 --- a/drivers/acpi/executer/exstore.c +++ b/drivers/acpi/executer/exstore.c @@ -274,7 +274,7 @@ acpi_ex_do_debug_object(union acpi_operand_object *source_desc, * * PARAMETERS: *source_desc - Value to be stored * *dest_desc - Where to store it. Must be an NS node - * or an union acpi_operand_object of type + * or a union acpi_operand_object of type * Reference; * walk_state - Current walk state * diff --git a/drivers/acpi/resources/rscreate.c b/drivers/acpi/resources/rscreate.c index c0bbfa2c419..08b8d73e6ee 100644 --- a/drivers/acpi/resources/rscreate.c +++ b/drivers/acpi/resources/rscreate.c @@ -124,7 +124,7 @@ acpi_rs_create_resource_list(union acpi_operand_object *aml_buffer, * * FUNCTION: acpi_rs_create_pci_routing_table * - * PARAMETERS: package_object - Pointer to an union acpi_operand_object + * PARAMETERS: package_object - Pointer to a union acpi_operand_object * package * output_buffer - Pointer to the user's buffer * diff --git a/drivers/acpi/utilities/utobject.c b/drivers/acpi/utilities/utobject.c index c354e7a42bc..4bef3cfbacc 100644 --- a/drivers/acpi/utilities/utobject.c +++ b/drivers/acpi/utilities/utobject.c @@ -297,7 +297,7 @@ union acpi_operand_object *acpi_ut_create_string_object(acpi_size string_size) * * RETURN: TRUE if object is valid, FALSE otherwise * - * DESCRIPTION: Validate a pointer to be an union acpi_operand_object + * DESCRIPTION: Validate a pointer to be a union acpi_operand_object * ******************************************************************************/ @@ -389,7 +389,7 @@ void acpi_ut_delete_object_desc(union acpi_operand_object *object) { ACPI_FUNCTION_TRACE_PTR(ut_delete_object_desc, object); - /* Object must be an union acpi_operand_object */ + /* Object must be a union acpi_operand_object */ if (ACPI_GET_DESCRIPTOR_TYPE(object) != ACPI_DESC_TYPE_OPERAND) { ACPI_ERROR((AE_INFO, diff --git a/drivers/char/epca.c b/drivers/char/epca.c index 39ad820b235..af7c13ca949 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -769,7 +769,7 @@ static int pc_open(struct tty_struct *tty, struct file *filp) /* Check status of board configured in system. */ /* - * I check to see if the epca_setup routine detected an user error. It + * I check to see if the epca_setup routine detected a user error. It * might be better to put this in pc_init, but for the moment it goes * here. */ diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 5f076aef74f..a8c8d9c19d7 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -83,7 +83,7 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE select CPU_FREQ_GOV_USERSPACE help Use the CPUFreq governor 'userspace' as default. This allows - you to set the CPU frequency manually or when an userspace + you to set the CPU frequency manually or when a userspace program shall be able to set the CPU dynamically without having to enable the userspace governor manually. @@ -138,7 +138,7 @@ config CPU_FREQ_GOV_USERSPACE tristate "'userspace' governor for userspace frequency scaling" help Enable this cpufreq governor when you either want to set the - CPU frequency manually or when an userspace program shall + CPU frequency manually or when a userspace program shall be able to set the CPU dynamically, like on LART . diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 379b7ff354e..b9e6bef594a 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -65,7 +65,7 @@ MODULE_PARM_DESC(extra, "Enable extra LEDs and keys on IBM RapidAcces, EzKey and /* * Scancode to keycode tables. These are just the default setting, and - * are loadable via an userland utility. + * are loadable via a userland utility. */ static const unsigned short atkbd_set2_keycode[512] = { diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index b52659620d5..173cf55c64d 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -138,7 +138,7 @@ config PMAC_BACKLIGHT Say Y here to enable Macintosh specific extensions of the generic backlight code. With this enabled, the brightness keys on older PowerBooks will be enabled so you can change the screen brightness. - Newer models should use an userspace daemon like pbbuttonsd. + Newer models should use a userspace daemon like pbbuttonsd. config PMAC_BACKLIGHT_LEGACY bool "Provide legacy ioctl's on /dev/pmu for the backlight" diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c index abdebe34738..fa57b67593a 100644 --- a/drivers/misc/phantom.c +++ b/drivers/misc/phantom.c @@ -6,7 +6,7 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * - * You need an userspace library to cooperate with this driver. It (and other + * You need a userspace library to cooperate with this driver. It (and other * info) may be obtained here: * http://www.fi.muni.cz/~xslaby/phantom.html * or alternatively, you might use OpenHaptics provided by Sensable. diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 6d04e050c74..f54360f50a9 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -98,7 +98,7 @@ struct compat_ncp_objectname_ioctl { s32 auth_type; u32 object_name_len; - compat_caddr_t object_name; /* an userspace data, in most cases user name */ + compat_caddr_t object_name; /* a userspace data, in most cases user name */ }; struct compat_ncp_fs_info_v2 { diff --git a/include/acpi/acmacros.h b/include/acpi/acmacros.h index a597207e283..1954c9d1d01 100644 --- a/include/acpi/acmacros.h +++ b/include/acpi/acmacros.h @@ -333,8 +333,8 @@ struct acpi_integer_overlay { #define ACPI_INSERT_BITS(target, mask, source) target = ((target & (~(mask))) | (source & mask)) /* - * An struct acpi_namespace_node can appear in some contexts - * where a pointer to an union acpi_operand_object can also + * A struct acpi_namespace_node can appear in some contexts + * where a pointer to a union acpi_operand_object can also * appear. This macro is used to distinguish them. * * The "Descriptor" field is the first field in both structures. diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 7220361790b..8222e8de0d1 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -467,7 +467,7 @@ typedef u32 acpi_object_type; /* * These are special object types that never appear in - * a Namespace node, only in an union acpi_operand_object + * a Namespace node, only in a union acpi_operand_object */ #define ACPI_TYPE_LOCAL_EXTRA 0x1C #define ACPI_TYPE_LOCAL_DATA 0x1D diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index 9f2d76347f1..f69e66d151c 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -87,7 +87,7 @@ struct ncp_objectname_ioctl #define NCP_AUTH_NDS 0x32 int auth_type; size_t object_name_len; - void __user * object_name; /* an userspace data, in most cases user name */ + void __user * object_name; /* a userspace data, in most cases user name */ }; struct ncp_privatedata_ioctl diff --git a/mm/slub.c b/mm/slub.c index f0e2892fe40..6392ae5cc6b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2254,7 +2254,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * Add some empty padding so that we can catch * overwrites from earlier objects rather than let * tracking information or the free pointer be - * corrupted if an user writes before the start + * corrupted if a user writes before the start * of the object. */ size += sizeof(void *); diff --git a/sound/oss/aedsp16.c b/sound/oss/aedsp16.c index a0274f3dac0..3ee9900ffd7 100644 --- a/sound/oss/aedsp16.c +++ b/sound/oss/aedsp16.c @@ -157,7 +157,7 @@ Started Fri Mar 17 16:13:18 MET 1995 - v0.1 (ALPHA, was an user-level program called AudioExcelDSP16.c) + v0.1 (ALPHA, was a user-level program called AudioExcelDSP16.c) - Initial code. v0.2 (ALPHA) - Cleanups. -- cgit v1.2.3-70-g09d2 From 8b5b8f4cea18cb30f748baa913234c62cdc64541 Mon Sep 17 00:00:00 2001 From: Nick Andrew Date: Sat, 3 Jan 2009 19:00:37 +1100 Subject: trivial: Fix misspelling of "firmware" in docs for ncr53c8xx/sym53c8xx Fix misspelling of "firmware" in docs for ncr53c8xx/sym53c8xx It's spelled "firmware". Signed-off-by: Nick Andrew Signed-off-by: Jiri Kosina --- Documentation/scsi/ChangeLog.ncr53c8xx | 2 +- Documentation/scsi/ChangeLog.sym53c8xx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/scsi/ChangeLog.ncr53c8xx b/Documentation/scsi/ChangeLog.ncr53c8xx index a9f721aeb11..8b278c10edf 100644 --- a/Documentation/scsi/ChangeLog.ncr53c8xx +++ b/Documentation/scsi/ChangeLog.ncr53c8xx @@ -19,7 +19,7 @@ Sun Sep 24 21:30 2000 Gerard Roudier (groudier@club-internet.fr) Wed Jul 26 23:30 2000 Gerard Roudier (groudier@club-internet.fr) * version ncr53c8xx-3.4.1 - - Provide OpenFirmare path through the proc FS on PPC. + - Provide OpenFirmware path through the proc FS on PPC. - Remove trailing argument #2 from a couple of #undefs. Sun Jul 09 16:30 2000 Gerard Roudier (groudier@club-internet.fr) diff --git a/Documentation/scsi/ChangeLog.sym53c8xx b/Documentation/scsi/ChangeLog.sym53c8xx index ef985ec348e..02ffbc1e8a8 100644 --- a/Documentation/scsi/ChangeLog.sym53c8xx +++ b/Documentation/scsi/ChangeLog.sym53c8xx @@ -81,7 +81,7 @@ Sun Sep 24 21:30 2000 Gerard Roudier (groudier@club-internet.fr) Wed Jul 26 23:30 2000 Gerard Roudier (groudier@club-internet.fr) * version sym53c8xx-1.7.1 - - Provide OpenFirmare path through the proc FS on PPC. + - Provide OpenFirmware path through the proc FS on PPC. - Download of on-chip SRAM using memcpy_toio() doesn't work on PPC. Restore previous method (MEMORY MOVE from SCRIPTS). - Remove trailing argument #2 from a couple of #undefs. -- cgit v1.2.3-70-g09d2 From 02c84bde234eee4f2089ac3587d138607aaec777 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:21:00 +0100 Subject: ide: update warm-plug HOWTO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: Bruno PrĂ©mont Signed-off-by: Bartlomiej Zolnierkiewicz --- Documentation/ide/warm-plug-howto.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/ide/warm-plug-howto.txt b/Documentation/ide/warm-plug-howto.txt index d5885468b07..98152bcd515 100644 --- a/Documentation/ide/warm-plug-howto.txt +++ b/Documentation/ide/warm-plug-howto.txt @@ -11,3 +11,8 @@ unplug old device(s) and plug new device(s) # echo -n "1" > /sys/class/ide_port/idex/scan done + +NOTE: please make sure that partitions are unmounted and that there are +no other active references to devices before doing "delete_devices" step, +also do not attempt "scan" step on devices currently in use -- otherwise +results may be unpredictable and lead to data loss if you're unlucky -- cgit v1.2.3-70-g09d2 From 0732b49c8c40c56033cff8986dbad55aa54a5d40 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 9 Dec 2008 08:32:14 +1030 Subject: kobject: Make Documentation/kobject.txt a little more coherent. While reading Documentation/kobject.txt: Note kobject_rename does perform any locking or have a solid notion of what names are valid so the provide must provide their own sanity checking and serialization. I expect better: You never see me hard with time word making sentence coherent stuff. Ever. Signed-off-by: Rusty Russell Acked-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- Documentation/kobject.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt index f5d2aad65a6..b2e374586bd 100644 --- a/Documentation/kobject.txt +++ b/Documentation/kobject.txt @@ -118,8 +118,8 @@ the name of the kobject, call kobject_rename(): int kobject_rename(struct kobject *kobj, const char *new_name); -Note kobject_rename does perform any locking or have a solid notion of -what names are valid so the provide must provide their own sanity checking +kobject_rename does not perform any locking or have a solid notion of +what names are valid so the caller must provide their own sanity checking and serialization. There is a function called kobject_set_name() but that is legacy cruft and -- cgit v1.2.3-70-g09d2 From a2ab3d30005cdce45c2c7e31ad6743ad7975609a Mon Sep 17 00:00:00 2001 From: "Hans J. Koch" Date: Sat, 6 Dec 2008 02:25:13 +0100 Subject: UIO: Documentation for UIO ioport info handling This patch updates UIO documentation with the changes introduced by previous UIO patch. Signed-off-by: Hans J. Koch Signed-off-by: Greg Kroah-Hartman --- Documentation/DocBook/uio-howto.tmpl | 97 ++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) (limited to 'Documentation') diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl index df87d1b9360..6116b93608d 100644 --- a/Documentation/DocBook/uio-howto.tmpl +++ b/Documentation/DocBook/uio-howto.tmpl @@ -41,6 +41,12 @@ GPL version 2. + + 0.6 + 2008-12-05 + hjk + Added description of portio sysfs attributes. + 0.5 2008-05-22 @@ -318,6 +324,54 @@ interested in translating it, please email me offset = N * getpagesize(); + + Sometimes there is hardware with memory-like regions that can not be + mapped with the technique described here, but there are still ways to + access them from userspace. The most common example are x86 ioports. + On x86 systems, userspace can access these ioports using + ioperm(), iopl(), + inb(), outb(), and similar + functions. + + + Since these ioport regions can not be mapped, they will not appear under + /sys/class/uio/uioX/maps/ like the normal memory + described above. Without information about the port regions a hardware + has to offer, it becomes difficult for the userspace part of the + driver to find out which ports belong to which UIO device. + + + To address this situation, the new directory + /sys/class/uio/uioX/portio/ was added. It only + exists if the driver wants to pass information about one or more port + regions to userspace. If that is the case, subdirectories named + port0, port1, and so on, + will appear underneath + /sys/class/uio/uioX/portio/. + + + Each portX/ directory contains three read-only + files that show start, size, and type of the port region: + + + + + start: The first port of this region. + + + + + size: The number of ports in this region. + + + + + porttype: A string describing the type of port. + + + + + @@ -355,6 +409,13 @@ mapping you need to fill one of the uio_mem structures. See the description below for details. + +struct uio_port port[ MAX_UIO_PORTS_REGIONS ]: Required +if you want to pass information about ioports to userspace. For each port +region you need to fill one of the uio_port structures. +See the description below for details. + + long irq: Required. If your hardware generates an interrupt, it's your modules task to determine the irq number during @@ -448,6 +509,42 @@ Please do not touch the kobj element of struct uio_mem! It is used by the UIO framework to set up sysfs files for this mapping. Simply leave it alone. + + +Sometimes, your device can have one or more port regions which can not be +mapped to userspace. But if there are other possibilities for userspace to +access these ports, it makes sense to make information about the ports +available in sysfs. For each region, you have to set up a +struct uio_port in the port[] array. +Here's a description of the fields of struct uio_port: + + + + +char *porttype: Required. Set this to one of the predefined +constants. Use UIO_PORT_X86 for the ioports found in x86 +architectures. + + + +unsigned long start: Required if the port region is used. +Fill in the number of the first port of this region. + + + +unsigned long size: Fill in the number of ports in this +region. If size is zero, the region is considered unused. +Note that you must initialize size +with zero for all unused regions. + + + + +Please do not touch the portio element of +struct uio_port! It is used internally by the UIO +framework to set up sysfs files for this region. Simply leave it alone. + + -- cgit v1.2.3-70-g09d2 From b8ac9fc0e8cda9f9776019c5b0464b0c6d2d4c90 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 12 Dec 2008 11:44:21 +0100 Subject: uio: make uio_info's name and version const These are only ever assigned constant strings and never modified. This was noticed because Wolfram Sang needed to cast the result of of_get_property() in order to assign it to the name field of a struct uio_info. Signed-off-by: Stephen Rothwell Signed-off-by: Hans J. Koch Signed-off-by: Greg Kroah-Hartman --- Documentation/DocBook/uio-howto.tmpl | 4 ++-- include/linux/uio_driver.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl index 6116b93608d..b787e4721c9 100644 --- a/Documentation/DocBook/uio-howto.tmpl +++ b/Documentation/DocBook/uio-howto.tmpl @@ -393,12 +393,12 @@ offset = N * getpagesize(); -char *name: Required. The name of your driver as +const char *name: Required. The name of your driver as it will appear in sysfs. I recommend using the name of your module for this. -char *version: Required. This string appears in +const char *version: Required. This string appears in /sys/class/uio/uioX/version. diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 20be327bfbb..a0bb6bd2e5c 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -76,8 +76,8 @@ struct uio_device; */ struct uio_info { struct uio_device *uio_dev; - char *name; - char *version; + const char *name; + const char *version; struct uio_mem mem[MAX_UIO_MAPS]; struct uio_port port[MAX_UIO_PORT_REGIONS]; long irq; -- cgit v1.2.3-70-g09d2 From 83982b6f47201c4c7767210d24d7d8c99567a0b3 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Jan 2009 14:53:16 -0500 Subject: ext4: Remove "extents" mount option This mount option is largely superfluous, and in fact the way it was implemented was buggy; if a filesystem which did not have the extents feature flag was mounted -o extents, the filesystem would attempt to create and use extents-based file even though the extents feature flag was not eabled. The simplest thing to do is to nuke the mount option entirely. It's not all that useful to force the non-creation of new extent-based files if the filesystem can support it. Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 5 ---- fs/ext4/ext4.h | 1 - fs/ext4/ext4_jbd2.h | 4 ++-- fs/ext4/extents.c | 4 ++-- fs/ext4/ialloc.c | 2 +- fs/ext4/migrate.c | 14 +++++------ fs/ext4/super.c | 48 ++------------------------------------ 7 files changed, 14 insertions(+), 64 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 8938949b201..cec829bc729 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -131,11 +131,6 @@ ro Mount filesystem read only. Note that ext4 will mount options "ro,noload" can be used to prevent writes to the filesystem. -extents (*) ext4 will use extents to address file data. The - file system will no longer be mountable by ext3. - -noextents ext4 will not use extents for newly created files - journal_checksum Enable checksumming of the journal transactions. This will allow the recovery code in e2fsck and the kernel to detect corruption in the kernel. It is a diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 695b45cc34e..db1718833f5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -536,7 +536,6 @@ do { \ #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ -#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 663197adae5..be2f426f680 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -32,8 +32,8 @@ * 5 levels of tree + root which are stored in the inode. */ #define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ - (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ - || test_opt(sb, EXTENTS) ? 27U : 8U) + (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ + ? 27U : 8U) /* Extended attribute operations touch at most two data buffers, * two bitmap buffers, and two group summaries, in addition to the inode diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c64080e4949..240cf0daad4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2247,7 +2247,7 @@ void ext4_ext_init(struct super_block *sb) * possible initialization would be here */ - if (test_opt(sb, EXTENTS)) { + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { printk(KERN_INFO "EXT4-fs: file extents enabled"); #ifdef AGGRESSIVE_TEST printk(", aggressive tests"); @@ -2272,7 +2272,7 @@ void ext4_ext_init(struct super_block *sb) */ void ext4_ext_release(struct super_block *sb) { - if (!test_opt(sb, EXTENTS)) + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) return; #ifdef EXTENTS_STATS diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 369c34c6429..4fb86a0061d 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -917,7 +917,7 @@ got: if (err) goto fail_free_drop; - if (test_opt(sb, EXTENTS)) { + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { /* set extent flag only for directory, file and normal symlink*/ if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index e7cd488da4b..734abca25e3 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -459,13 +459,13 @@ int ext4_ext_migrate(struct inode *inode) struct list_blocks_struct lb; unsigned long max_entries; - if (!test_opt(inode->i_sb, EXTENTS)) - /* - * if mounted with noextents we don't allow the migrate - */ - return -EINVAL; - - if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + /* + * If the filesystem does not support extents, or the inode + * already is extent-based, error out. + */ + if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_INCOMPAT_EXTENTS) || + (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) return -EINVAL; if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b69d0920386..acb69c00fd4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -829,8 +829,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",journal_async_commit"); if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); - if (!test_opt(sb, EXTENTS)) - seq_puts(seq, ",noextents"); if (test_opt(sb, I_VERSION)) seq_puts(seq, ",i_version"); if (!test_opt(sb, DELALLOC)) @@ -1011,7 +1009,7 @@ enum { Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, + Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_inode_readahead_blks, Opt_journal_ioprio }; @@ -1066,8 +1064,6 @@ static const match_table_t tokens = { {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, - {Opt_extents, "extents"}, - {Opt_noextents, "noextents"}, {Opt_i_version, "i_version"}, {Opt_stripe, "stripe=%u"}, {Opt_resize, "resize"}, @@ -1115,7 +1111,6 @@ static int parse_options(char *options, struct super_block *sb, int qtype, qfmt; char *qname; #endif - ext4_fsblk_t last_block; if (!options) return 1; @@ -1445,33 +1440,6 @@ set_qf_format: case Opt_bh: clear_opt(sbi->s_mount_opt, NOBH); break; - case Opt_extents: - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_EXTENTS)) { - ext4_warning(sb, __func__, - "extents feature not enabled " - "on this filesystem, use tune2fs"); - return 0; - } - set_opt(sbi->s_mount_opt, EXTENTS); - break; - case Opt_noextents: - /* - * When e2fsprogs support resizing an already existing - * ext3 file system to greater than 2**32 we need to - * add support to block allocator to handle growing - * already existing block mapped inode so that blocks - * allocated for them fall within 2**32 - */ - last_block = ext4_blocks_count(sbi->s_es) - 1; - if (last_block > 0xffffffffULL) { - printk(KERN_ERR "EXT4-fs: Filesystem too " - "large to mount with " - "-o noextents options\n"); - return 0; - } - clear_opt(sbi->s_mount_opt, EXTENTS); - break; case Opt_i_version: set_opt(sbi->s_mount_opt, I_VERSION); sb->s_flags |= MS_I_VERSION; @@ -2135,18 +2103,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, RESERVATION); set_opt(sbi->s_mount_opt, BARRIER); - /* - * turn on extents feature by default in ext4 filesystem - * only if feature flag already set by mkfs or tune2fs. - * Use -o noextents to turn it off - */ - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) - set_opt(sbi->s_mount_opt, EXTENTS); - else - ext4_warning(sb, __func__, - "extents feature not enabled on this filesystem, " - "use tune2fs."); - /* * enable delayed allocation by default * Use -o nodelalloc to turn it off @@ -3825,7 +3781,7 @@ static void __exit exit_ext4_fs(void) } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); -MODULE_DESCRIPTION("Fourth Extended Filesystem with extents"); +MODULE_DESCRIPTION("Fourth Extended Filesystem"); MODULE_LICENSE("GPL"); module_init(init_ext4_fs) module_exit(exit_ext4_fs) -- cgit v1.2.3-70-g09d2 From c04fc586c1a480ba198f03ae7b6cbd7b57380b91 Mon Sep 17 00:00:00 2001 From: Gary Hade Date: Tue, 6 Jan 2009 14:39:14 -0800 Subject: mm: show node to memory section relationship with symlinks in sysfs Show node to memory section relationship with symlinks in sysfs Add /sys/devices/system/node/nodeX/memoryY symlinks for all the memory sections located on nodeX. For example: /sys/devices/system/node/node1/memory135 -> ../../memory/memory135 indicates that memory section 135 resides on node1. Also revises documentation to cover this change as well as updating Documentation/ABI/testing/sysfs-devices-memory to include descriptions of memory hotremove files 'phys_device', 'phys_index', and 'state' that were previously not described there. In addition to it always being a good policy to provide users with the maximum possible amount of physical location information for resources that can be hot-added and/or hot-removed, the following are some (but likely not all) of the user benefits provided by this change. Immediate: - Provides information needed to determine the specific node on which a defective DIMM is located. This will reduce system downtime when the node or defective DIMM is swapped out. - Prevents unintended onlining of a memory section that was previously offlined due to a defective DIMM. This could happen during node hot-add when the user or node hot-add assist script onlines _all_ offlined sections due to user or script inability to identify the specific memory sections located on the hot-added node. The consequences of reintroducing the defective memory could be ugly. - Provides information needed to vary the amount and distribution of memory on specific nodes for testing or debugging purposes. Future: - Will provide information needed to identify the memory sections that need to be offlined prior to physical removal of a specific node. Symlink creation during boot was tested on 2-node x86_64, 2-node ppc64, and 2-node ia64 systems. Symlink creation during physical memory hot-add tested on a 2-node x86_64 system. Signed-off-by: Gary Hade Signed-off-by: Badari Pulavarty Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-devices-memory | 51 +++++++++++- Documentation/memory-hotplug.txt | 16 +++- arch/ia64/mm/init.c | 2 +- arch/powerpc/mm/mem.c | 2 +- arch/s390/mm/init.c | 2 +- arch/sh/mm/init.c | 3 +- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/init_64.c | 2 +- drivers/base/memory.c | 19 +++-- drivers/base/node.c | 103 +++++++++++++++++++++++++ include/linux/memory.h | 6 +- include/linux/memory_hotplug.h | 2 +- include/linux/node.h | 13 ++++ mm/memory_hotplug.c | 11 +-- 14 files changed, 209 insertions(+), 25 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory index 7a16fe1e227..9fe91c02ee4 100644 --- a/Documentation/ABI/testing/sysfs-devices-memory +++ b/Documentation/ABI/testing/sysfs-devices-memory @@ -6,7 +6,6 @@ Description: internal state of the kernel memory blocks. Files could be added or removed dynamically to represent hot-add/remove operations. - Users: hotplug memory add/remove tools https://w3.opensource.ibm.com/projects/powerpc-utils/ @@ -19,6 +18,56 @@ Description: This is useful for a user-level agent to determine identify removable sections of the memory before attempting potentially expensive hot-remove memory operation +Users: hotplug memory remove tools + https://w3.opensource.ibm.com/projects/powerpc-utils/ + +What: /sys/devices/system/memory/memoryX/phys_device +Date: September 2008 +Contact: Badari Pulavarty +Description: + The file /sys/devices/system/memory/memoryX/phys_device + is read-only and is designed to show the name of physical + memory device. Implementation is currently incomplete. +What: /sys/devices/system/memory/memoryX/phys_index +Date: September 2008 +Contact: Badari Pulavarty +Description: + The file /sys/devices/system/memory/memoryX/phys_index + is read-only and contains the section ID in hexadecimal + which is equivalent to decimal X contained in the + memory section directory name. + +What: /sys/devices/system/memory/memoryX/state +Date: September 2008 +Contact: Badari Pulavarty +Description: + The file /sys/devices/system/memory/memoryX/state + is read-write. When read, it's contents show the + online/offline state of the memory section. When written, + root can toggle the the online/offline state of a removable + memory section (see removable file description above) + using the following commands. + # echo online > /sys/devices/system/memory/memoryX/state + # echo offline > /sys/devices/system/memory/memoryX/state + + For example, if /sys/devices/system/memory/memory22/removable + contains a value of 1 and + /sys/devices/system/memory/memory22/state contains the + string "online" the following command can be executed by + by root to offline that section. + # echo offline > /sys/devices/system/memory/memory22/state Users: hotplug memory remove tools https://w3.opensource.ibm.com/projects/powerpc-utils/ + +What: /sys/devices/system/node/nodeX/memoryY +Date: September 2008 +Contact: Gary Hade +Description: + When CONFIG_NUMA is enabled + /sys/devices/system/node/nodeX/memoryY is a symbolic link that + points to the corresponding /sys/devices/system/memory/memoryY + memory section directory. For example, the following symbolic + link is created for memory section 9 on node0. + /sys/devices/system/node/node0/memory9 -> ../../memory/memory9 + diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 168117bd6ee..4c2ecf537a4 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt @@ -124,7 +124,7 @@ config options. This option can be kernel module too. -------------------------------- -3 sysfs files for memory hotplug +4 sysfs files for memory hotplug -------------------------------- All sections have their device information under /sys/devices/system/memory as @@ -138,11 +138,12 @@ For example, assume 1GiB section size. A device for a memory starting at (0x100000000 / 1Gib = 4) This device covers address range [0x100000000 ... 0x140000000) -Under each section, you can see 3 files. +Under each section, you can see 4 files. /sys/devices/system/memory/memoryXXX/phys_index /sys/devices/system/memory/memoryXXX/phys_device /sys/devices/system/memory/memoryXXX/state +/sys/devices/system/memory/memoryXXX/removable 'phys_index' : read-only and contains section id, same as XXX. 'state' : read-write @@ -150,10 +151,20 @@ Under each section, you can see 3 files. at write: user can specify "online", "offline" command 'phys_device': read-only: designed to show the name of physical memory device. This is not well implemented now. +'removable' : read-only: contains an integer value indicating + whether the memory section is removable or not + removable. A value of 1 indicates that the memory + section is removable and a value of 0 indicates that + it is not removable. NOTE: These directories/files appear after physical memory hotplug phase. +If CONFIG_NUMA is enabled the +/sys/devices/system/memory/memoryXXX memory section +directories can also be accessed via symbolic links located in +the /sys/devices/system/node/node* directories. For example: +/sys/devices/system/node/node0/memory9 -> ../../memory/memory9 -------------------------------- 4. Physical memory hot-add phase @@ -365,7 +376,6 @@ node if necessary. - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like sysctl or new control file. - showing memory section and physical device relationship. - - showing memory section and node relationship (maybe good for NUMA) - showing memory section is under ZONE_MOVABLE or not - test and make it better memory offlining. - support HugeTLB page migration and offlining. diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 054bcd9439a..56e12903973 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -692,7 +692,7 @@ int arch_add_memory(int nid, u64 start, u64 size) pgdat = NODE_DATA(nid); zone = pgdat->node_zones + ZONE_NORMAL; - ret = __add_pages(zone, start_pfn, nr_pages); + ret = __add_pages(nid, zone, start_pfn, nr_pages); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 53b06ebb3f2..f00f09a77f1 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -132,7 +132,7 @@ int arch_add_memory(int nid, u64 start, u64 size) /* this should work for most non-highmem platforms */ zone = pgdata->node_zones; - return __add_pages(zone, start_pfn, nr_pages); + return __add_pages(nid, zone, start_pfn, nr_pages); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 158b0d6d704..f0258ca3b17 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -183,7 +183,7 @@ int arch_add_memory(int nid, u64 start, u64 size) rc = vmem_add_mapping(start, size); if (rc) return rc; - rc = __add_pages(zone, PFN_DOWN(start), PFN_DOWN(size)); + rc = __add_pages(nid, zone, PFN_DOWN(start), PFN_DOWN(size)); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 6cbef8caeb5..3edf297c829 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -311,7 +311,8 @@ int arch_add_memory(int nid, u64 start, u64 size) pgdat = NODE_DATA(nid); /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(pgdat->node_zones + ZONE_NORMAL, start_pfn, nr_pages); + ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL, + start_pfn, nr_pages); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f99a6c6c432..544d724caee 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1079,7 +1079,7 @@ int arch_add_memory(int nid, u64 start, u64 size) unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(zone, start_pfn, nr_pages); + return __add_pages(nid, zone, start_pfn, nr_pages); } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9f7a0d24d42..54c437e9654 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -857,7 +857,7 @@ int arch_add_memory(int nid, u64 start, u64 size) if (last_mapped_pfn > max_pfn_mapped) max_pfn_mapped = last_mapped_pfn; - ret = __add_pages(zone, start_pfn, nr_pages); + ret = __add_pages(nid, zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); return ret; diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 5260e9e0df4..989429cfed8 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -347,8 +347,9 @@ static inline int memory_probe_init(void) * section belongs to... */ -static int add_memory_block(unsigned long node_id, struct mem_section *section, - unsigned long state, int phys_device) +static int add_memory_block(int nid, struct mem_section *section, + unsigned long state, int phys_device, + enum mem_add_context context) { struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); int ret = 0; @@ -370,6 +371,10 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section, ret = mem_create_simple_file(mem, phys_device); if (!ret) ret = mem_create_simple_file(mem, removable); + if (!ret) { + if (context == HOTPLUG) + ret = register_mem_sect_under_node(mem, nid); + } return ret; } @@ -382,7 +387,7 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section, * * This could be made generic for all sysdev classes. */ -static struct memory_block *find_memory_block(struct mem_section *section) +struct memory_block *find_memory_block(struct mem_section *section) { struct kobject *kobj; struct sys_device *sysdev; @@ -411,6 +416,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, struct memory_block *mem; mem = find_memory_block(section); + unregister_mem_sect_under_nodes(mem); mem_remove_simple_file(mem, phys_index); mem_remove_simple_file(mem, state); mem_remove_simple_file(mem, phys_device); @@ -424,9 +430,9 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, * need an interface for the VM to add new memory regions, * but without onlining it. */ -int register_new_memory(struct mem_section *section) +int register_new_memory(int nid, struct mem_section *section) { - return add_memory_block(0, section, MEM_OFFLINE, 0); + return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG); } int unregister_memory_section(struct mem_section *section) @@ -458,7 +464,8 @@ int __init memory_dev_init(void) for (i = 0; i < NR_MEM_SECTIONS; i++) { if (!present_section_nr(i)) continue; - err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); + err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, + 0, BOOT); if (!ret) ret = err; } diff --git a/drivers/base/node.c b/drivers/base/node.c index 91636cd8b6c..43fa90b837e 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -248,6 +249,105 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) return 0; } +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#define page_initialized(page) (page->lru.next) + +static int get_nid_for_pfn(unsigned long pfn) +{ + struct page *page; + + if (!pfn_valid_within(pfn)) + return -1; + page = pfn_to_page(pfn); + if (!page_initialized(page)) + return -1; + return pfn_to_nid(pfn); +} + +/* register memory section under specified node if it spans that node */ +int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) +{ + unsigned long pfn, sect_start_pfn, sect_end_pfn; + + if (!mem_blk) + return -EFAULT; + if (!node_online(nid)) + return 0; + sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); + sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { + int page_nid; + + page_nid = get_nid_for_pfn(pfn); + if (page_nid < 0) + continue; + if (page_nid != nid) + continue; + return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj, + &mem_blk->sysdev.kobj, + kobject_name(&mem_blk->sysdev.kobj)); + } + /* mem section does not span the specified node */ + return 0; +} + +/* unregister memory section under all nodes that it spans */ +int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) +{ + nodemask_t unlinked_nodes; + unsigned long pfn, sect_start_pfn, sect_end_pfn; + + if (!mem_blk) + return -EFAULT; + nodes_clear(unlinked_nodes); + sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); + sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { + unsigned int nid; + + nid = get_nid_for_pfn(pfn); + if (nid < 0) + continue; + if (!node_online(nid)) + continue; + if (node_test_and_set(nid, unlinked_nodes)) + continue; + sysfs_remove_link(&node_devices[nid].sysdev.kobj, + kobject_name(&mem_blk->sysdev.kobj)); + } + return 0; +} + +static int link_mem_sections(int nid) +{ + unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn; + unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages; + unsigned long pfn; + int err = 0; + + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + unsigned long section_nr = pfn_to_section_nr(pfn); + struct mem_section *mem_sect; + struct memory_block *mem_blk; + int ret; + + if (!present_section_nr(section_nr)) + continue; + mem_sect = __nr_to_section(section_nr); + mem_blk = find_memory_block(mem_sect); + ret = register_mem_sect_under_node(mem_blk, nid); + if (!err) + err = ret; + + /* discard ref obtained in find_memory_block() */ + kobject_put(&mem_blk->sysdev.kobj); + } + return err; +} +#else +static int link_mem_sections(int nid) { return 0; } +#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ + int register_one_node(int nid) { int error = 0; @@ -267,6 +367,9 @@ int register_one_node(int nid) if (cpu_to_node(cpu) == nid) register_cpu_under_node(cpu, nid); } + + /* link memory sections under this node */ + error = link_mem_sections(nid); } return error; diff --git a/include/linux/memory.h b/include/linux/memory.h index 36c82c9e6ea..3fdc10806d3 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -79,14 +79,14 @@ static inline int memory_notify(unsigned long val, void *v) #else extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); -extern int register_new_memory(struct mem_section *); +extern int register_new_memory(int, struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); extern int memory_notify(unsigned long val, void *v); +extern struct memory_block *find_memory_block(struct mem_section *); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION< Date: Tue, 6 Jan 2009 14:39:31 -0800 Subject: mm: add dirty_background_bytes and dirty_bytes sysctls This change introduces two new sysctls to /proc/sys/vm: dirty_background_bytes and dirty_bytes. dirty_background_bytes is the counterpart to dirty_background_ratio and dirty_bytes is the counterpart to dirty_ratio. With growing memory capacities of individual machines, it's no longer sufficient to specify dirty thresholds as a percentage of the amount of dirtyable memory over the entire system. dirty_background_bytes and dirty_bytes specify quantities of memory, in bytes, that represent the dirty limits for the entire system. If either of these values is set, its value represents the amount of dirty memory that is needed to commence either background or direct writeback. When a `bytes' or `ratio' file is written, its counterpart becomes a function of the written value. For example, if dirty_bytes is written to be 8096, 8K of memory is required to commence direct writeback. dirty_ratio is then functionally equivalent to 8K / the amount of dirtyable memory: dirtyable_memory = free pages + mapped pages + file cache dirty_background_bytes = dirty_background_ratio * dirtyable_memory -or- dirty_background_ratio = dirty_background_bytes / dirtyable_memory AND dirty_bytes = dirty_ratio * dirtyable_memory -or- dirty_ratio = dirty_bytes / dirtyable_memory Only one of dirty_background_bytes and dirty_background_ratio may be specified at a time, and only one of dirty_bytes and dirty_ratio may be specified. When one sysctl is written, the other appears as 0 when read. The `bytes' files operate on a page size granularity since dirty limits are compared with ZVC values, which are in page units. Prior to this change, the minimum dirty_ratio was 5 as implemented by get_dirty_limits() although /proc/sys/vm/dirty_ratio would show any user written value between 0 and 100. This restriction is maintained, but dirty_bytes has a lower limit of only one page. Also prior to this change, the dirty_background_ratio could not equal or exceed dirty_ratio. This restriction is maintained in addition to restricting dirty_background_bytes. If either background threshold equals or exceeds that of the dirty threshold, it is implicitly set to half the dirty threshold. Acked-by: Peter Zijlstra Cc: Dave Chinner Cc: Christoph Lameter Signed-off-by: David Rientjes Cc: Andrea Righi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 26 +++++++++- Documentation/sysctl/vm.txt | 3 +- include/linux/writeback.h | 11 ++++ kernel/sysctl.c | 27 ++++++++-- mm/page-writeback.c | 102 +++++++++++++++++++++++++++++++------ 5 files changed, 146 insertions(+), 23 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 71df353e367..32e94635484 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1385,6 +1385,15 @@ swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100 causes the kernel to prefer to reclaim dentries and inodes. +dirty_background_bytes +---------------------- + +Contains the amount of dirty memory at which the pdflush background writeback +daemon will start writeback. + +If dirty_background_bytes is written, dirty_background_ratio becomes a function +of its value (dirty_background_bytes / the amount of dirtyable system memory). + dirty_background_ratio ---------------------- @@ -1393,14 +1402,29 @@ pages + file cache, not including locked pages and HugePages), the number of pages at which the pdflush background writeback daemon will start writing out dirty data. +If dirty_background_ratio is written, dirty_background_bytes becomes a function +of its value (dirty_background_ratio * the amount of dirtyable system memory). + +dirty_bytes +----------- + +Contains the amount of dirty memory at which a process generating disk writes +will itself start writeback. + +If dirty_bytes is written, dirty_ratio becomes a function of its value +(dirty_bytes / the amount of dirtyable system memory). + dirty_ratio ------------------ +----------- Contains, as a percentage of the dirtyable system memory (free pages + mapped pages + file cache, not including locked pages and HugePages), the number of pages at which a process which is generating disk writes will itself start writing out dirty data. +If dirty_ratio is written, dirty_bytes becomes a function of its value +(dirty_ratio * the amount of dirtyable system memory). + dirty_writeback_centisecs ------------------------- diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index d79eeda7a69..cd05994a49e 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -41,7 +41,8 @@ Currently, these files are in /proc/sys/vm: ============================================================== -dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, +dirty_bytes, dirty_ratio, dirty_background_bytes, +dirty_background_ratio, dirty_expire_centisecs, dirty_writeback_centisecs, highmem_is_dirtyable, vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout, drop-caches, hugepages_treat_as_movable: diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 259e9ea58ca..bb28c975c1d 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -107,7 +107,9 @@ void throttle_vm_writeout(gfp_t gfp_mask); /* These are exported to sysctl. */ extern int dirty_background_ratio; +extern unsigned long dirty_background_bytes; extern int vm_dirty_ratio; +extern unsigned long vm_dirty_bytes; extern int dirty_writeback_interval; extern int dirty_expire_interval; extern int vm_highmem_is_dirtyable; @@ -116,9 +118,18 @@ extern int laptop_mode; extern unsigned long determine_dirtyable_memory(void); +extern int dirty_background_ratio_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); +extern int dirty_background_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int dirty_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); struct ctl_table; struct file; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ff6d45c7626..92f6e5bc3c2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -87,10 +87,6 @@ extern int rcutorture_runnable; #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ /* Constants used for minimum and maximum */ -#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP) -static int one = 1; -#endif - #ifdef CONFIG_DETECT_SOFTLOCKUP static int sixty = 60; static int neg_one = -1; @@ -101,6 +97,7 @@ static int two = 2; #endif static int zero; +static int one = 1; static int one_hundred = 100; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ @@ -952,11 +949,21 @@ static struct ctl_table vm_table[] = { .data = &dirty_background_ratio, .maxlen = sizeof(dirty_background_ratio), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = &dirty_background_ratio_handler, .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &one_hundred, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "dirty_background_bytes", + .data = &dirty_background_bytes, + .maxlen = sizeof(dirty_background_bytes), + .mode = 0644, + .proc_handler = &dirty_background_bytes_handler, + .strategy = &sysctl_intvec, + .extra1 = &one, + }, { .ctl_name = VM_DIRTY_RATIO, .procname = "dirty_ratio", @@ -968,6 +975,16 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "dirty_bytes", + .data = &vm_dirty_bytes, + .maxlen = sizeof(vm_dirty_bytes), + .mode = 0644, + .proc_handler = &dirty_bytes_handler, + .strategy = &sysctl_intvec, + .extra1 = &one, + }, { .procname = "dirty_writeback_centisecs", .data = &dirty_writeback_interval, diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4d4074cff30..b493db7841d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -68,6 +68,12 @@ static inline long sync_writeback_pages(void) */ int dirty_background_ratio = 5; +/* + * dirty_background_bytes starts at 0 (disabled) so that it is a function of + * dirty_background_ratio * the amount of dirtyable memory + */ +unsigned long dirty_background_bytes; + /* * free highmem will not be subtracted from the total free memory * for calculating free ratios if vm_highmem_is_dirtyable is true @@ -79,6 +85,12 @@ int vm_highmem_is_dirtyable; */ int vm_dirty_ratio = 10; +/* + * vm_dirty_bytes starts at 0 (disabled) so that it is a function of + * vm_dirty_ratio * the amount of dirtyable memory + */ +unsigned long vm_dirty_bytes; + /* * The interval between `kupdate'-style writebacks, in jiffies */ @@ -135,23 +147,75 @@ static int calc_period_shift(void) { unsigned long dirty_total; - dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100; + if (vm_dirty_bytes) + dirty_total = vm_dirty_bytes / PAGE_SIZE; + else + dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / + 100; return 2 + ilog2(dirty_total - 1); } /* - * update the period when the dirty ratio changes. + * update the period when the dirty threshold changes. */ +static void update_completion_period(void) +{ + int shift = calc_period_shift(); + prop_change_shift(&vm_completions, shift); + prop_change_shift(&vm_dirties, shift); +} + +int dirty_background_ratio_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + if (ret == 0 && write) + dirty_background_bytes = 0; + return ret; +} + +int dirty_background_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + + ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + if (ret == 0 && write) + dirty_background_ratio = 0; + return ret; +} + int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret; + + ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_ratio != old_ratio) { - int shift = calc_period_shift(); - prop_change_shift(&vm_completions, shift); - prop_change_shift(&vm_dirties, shift); + update_completion_period(); + vm_dirty_bytes = 0; + } + return ret; +} + + +int dirty_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int old_bytes = vm_dirty_bytes; + int ret; + + ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + if (ret == 0 && write && vm_dirty_bytes != old_bytes) { + update_completion_period(); + vm_dirty_ratio = 0; } return ret; } @@ -365,23 +429,29 @@ void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, unsigned long *pbdi_dirty, struct backing_dev_info *bdi) { - int background_ratio; /* Percentages */ - int dirty_ratio; unsigned long background; unsigned long dirty; unsigned long available_memory = determine_dirtyable_memory(); struct task_struct *tsk; - dirty_ratio = vm_dirty_ratio; - if (dirty_ratio < 5) - dirty_ratio = 5; + if (vm_dirty_bytes) + dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE); + else { + int dirty_ratio; - background_ratio = dirty_background_ratio; - if (background_ratio >= dirty_ratio) - background_ratio = dirty_ratio / 2; + dirty_ratio = vm_dirty_ratio; + if (dirty_ratio < 5) + dirty_ratio = 5; + dirty = (dirty_ratio * available_memory) / 100; + } + + if (dirty_background_bytes) + background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE); + else + background = (dirty_background_ratio * available_memory) / 100; - background = (background_ratio * available_memory) / 100; - dirty = (dirty_ratio * available_memory) / 100; + if (background >= dirty) + background = dirty / 2; tsk = current; if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { background += background / 4; -- cgit v1.2.3-70-g09d2 From 63d6c5ad7fc27455ce5cb4706884671fb7e0df08 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:38 -0800 Subject: mm: remove try_to_munlock from vmscan An unfortunate feature of the Unevictable LRU work was that reclaiming an anonymous page involved an extra scan through the anon_vma: to check that the page is evictable before allocating swap, because the swap could not be freed reliably soon afterwards. Now try_to_free_swap() has replaced remove_exclusive_swap_page(), that's not an issue any more: remove try_to_munlock() call from shrink_page_list(), leaving it to try_to_munmap() to discover if the page is one to be culled to the unevictable list - in which case then try_to_free_swap(). Update unevictable-lru.txt to remove comments on the try_to_munlock() in shrink_page_list(), and shorten some lines over 80 columns. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Acked-by: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/unevictable-lru.txt | 63 +++++++++++------------------------- mm/vmscan.c | 11 ++----- 2 files changed, 20 insertions(+), 54 deletions(-) (limited to 'Documentation') diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt index 125eed560e5..0706a7282a8 100644 --- a/Documentation/vm/unevictable-lru.txt +++ b/Documentation/vm/unevictable-lru.txt @@ -137,13 +137,6 @@ shrink_page_list() where they will be detected when vmscan walks the reverse map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, shrink_page_list() will cull the page at that point. -Note that for anonymous pages, shrink_page_list() attempts to add the page to -the swap cache before it tries to unmap the page. To avoid this unnecessary -consumption of swap space, shrink_page_list() calls try_to_munlock() to check -whether any VM_LOCKED vmas map the page without attempting to unmap the page. -If try_to_munlock() returns SWAP_MLOCK, shrink_page_list() will cull the page -without consuming swap space. try_to_munlock() will be described below. - To "cull" an unevictable page, vmscan simply puts the page back on the lru list using putback_lru_page()--the inverse operation to isolate_lru_page()-- after dropping the page lock. Because the condition which makes the page @@ -190,8 +183,8 @@ several places: in the VM_LOCKED flag being set for the vma. 3) in the fault path, if mlocked pages are "culled" in the fault path, and when a VM_LOCKED stack segment is expanded. -4) as mentioned above, in vmscan:shrink_page_list() with attempting to - reclaim a page in a VM_LOCKED vma--via try_to_unmap() or try_to_munlock(). +4) as mentioned above, in vmscan:shrink_page_list() when attempting to + reclaim a page in a VM_LOCKED vma via try_to_unmap(). Mlocked pages become unlocked and rescued from the unevictable list when: @@ -260,9 +253,9 @@ mlock_fixup() filters several classes of "special" vmas: 2) vmas mapping hugetlbfs page are already effectively pinned into memory. We don't need nor want to mlock() these pages. However, to preserve the - prior behavior of mlock()--before the unevictable/mlock changes--mlock_fixup() - will call make_pages_present() in the hugetlbfs vma range to allocate the - huge pages and populate the ptes. + prior behavior of mlock()--before the unevictable/mlock changes-- + mlock_fixup() will call make_pages_present() in the hugetlbfs vma range + to allocate the huge pages and populate the ptes. 3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of kernel pages, such as the vdso page, relay channel pages, etc. These pages @@ -322,7 +315,7 @@ __mlock_vma_pages_range()--the same function used to mlock a vma range-- passing a flag to indicate that munlock() is being performed. Because the vma access protections could have been changed to PROT_NONE after -faulting in and mlocking some pages, get_user_pages() was unreliable for visiting +faulting in and mlocking pages, get_user_pages() was unreliable for visiting these pages for munlocking. Because we don't want to leave pages mlocked(), get_user_pages() was enhanced to accept a flag to ignore the permissions when fetching the pages--all of which should be resident as a result of previous @@ -416,8 +409,8 @@ Mlocked Pages: munmap()/exit()/exec() System Call Handling When unmapping an mlocked region of memory, whether by an explicit call to munmap() or via an internal unmap from exit() or exec() processing, we must munlock the pages if we're removing the last VM_LOCKED vma that maps the pages. -Before the unevictable/mlock changes, mlocking did not mark the pages in any way, -so unmapping them required no processing. +Before the unevictable/mlock changes, mlocking did not mark the pages in any +way, so unmapping them required no processing. To munlock a range of memory under the unevictable/mlock infrastructure, the munmap() hander and task address space tear down function call @@ -517,12 +510,10 @@ couldn't be mlocked. Mlocked pages: try_to_munlock() Reverse Map Scan TODO/FIXME: a better name might be page_mlocked()--analogous to the -page_referenced() reverse map walker--especially if we continue to call this -from shrink_page_list(). See related TODO/FIXME below. +page_referenced() reverse map walker. -When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() System -Call Handling" above--tries to munlock a page, or when shrink_page_list() -encounters an anonymous page that is not yet in the swap cache, they need to +When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() +System Call Handling" above--tries to munlock a page, it needs to determine whether or not the page is mapped by any VM_LOCKED vma, without actually attempting to unmap all ptes from the page. For this purpose, the unevictable/mlock infrastructure introduced a variant of try_to_unmap() called @@ -535,10 +526,7 @@ for VM_LOCKED vmas. When such a vma is found for anonymous pages and file pages mapped in linear VMAs, as in the try_to_unmap() case, the functions attempt to acquire the associated mmap semphore, mlock the page via mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the -pre-clearing of the page's PG_mlocked done by munlock_vma_page() and informs -shrink_page_list() that the anonymous page should be culled rather than added -to the swap cache in preparation for a try_to_unmap() that will almost -certainly fail. +pre-clearing of the page's PG_mlocked done by munlock_vma_page. If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() @@ -557,10 +545,7 @@ However, the scan can terminate when it encounters a VM_LOCKED vma and can successfully acquire the vma's mmap semphore for read and mlock the page. Although try_to_munlock() can be called many [very many!] times when munlock()ing a large region or tearing down a large address space that has been -mlocked via mlockall(), overall this is a fairly rare event. In addition, -although shrink_page_list() calls try_to_munlock() for every anonymous page that -it handles that is not yet in the swap cache, on average anonymous pages will -have very short reverse map lists. +mlocked via mlockall(), overall this is a fairly rare event. Mlocked Page: Page Reclaim in shrink_*_list() @@ -588,8 +573,8 @@ Some examples of these unevictable pages on the LRU lists are: munlock_vma_page() was forced to let the page back on to the normal LRU list for vmscan to handle. -shrink_inactive_list() also culls any unevictable pages that it finds -on the inactive lists, again diverting them to the appropriate zone's unevictable +shrink_inactive_list() also culls any unevictable pages that it finds on +the inactive lists, again diverting them to the appropriate zone's unevictable lru list. shrink_inactive_list() should only see SHM_LOCKed pages that became SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from @@ -597,19 +582,7 @@ the lru to recheck via try_to_munlock(). shrink_inactive_list() won't notice the latter, but will pass on to shrink_page_list(). shrink_page_list() again culls obviously unevictable pages that it could -encounter for similar reason to shrink_inactive_list(). As already discussed, -shrink_page_list() proactively looks for anonymous pages that should have -PG_mlocked set but don't--these would not be detected by page_evictable()--to -avoid adding them to the swap cache unnecessarily. File pages mapped into +encounter for similar reason to shrink_inactive_list(). Pages mapped into VM_LOCKED vmas but without PG_mlocked set will make it all the way to -try_to_unmap(). shrink_page_list() will divert them to the unevictable list when -try_to_unmap() returns SWAP_MLOCK, as discussed above. - -TODO/FIXME: If we can enhance the swap cache to reliably remove entries -with page_count(page) > 2, as long as all ptes are mapped to the page and -not the swap entry, we can probably remove the call to try_to_munlock() in -shrink_page_list() and just remove the page from the swap cache when -try_to_unmap() returns SWAP_MLOCK. Currently, remove_exclusive_swap_page() -doesn't seem to allow that. - - +try_to_unmap(). shrink_page_list() will divert them to the unevictable list +when try_to_unmap() returns SWAP_MLOCK, as discussed above. diff --git a/mm/vmscan.c b/mm/vmscan.c index c8601dd3660..74f875733e2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -625,15 +625,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageAnon(page) && !PageSwapCache(page)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; - switch (try_to_munlock(page)) { - case SWAP_FAIL: /* shouldn't happen */ - case SWAP_AGAIN: - goto keep_locked; - case SWAP_MLOCK: - goto cull_mlocked; - case SWAP_SUCCESS: - ; /* fall thru'; add to swap cache */ - } if (!add_to_swap(page, GFP_ATOMIC)) goto activate_locked; may_enter_fs = 1; @@ -752,6 +743,8 @@ free_it: continue; cull_mlocked: + if (PageSwapCache(page)) + try_to_free_swap(page); unlock_page(page); putback_lru_page(page); continue; -- cgit v1.2.3-70-g09d2 From 5f820f648c92a5ecc771a96b3c29aa6e90013bba Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 6 Jan 2009 14:40:59 -0800 Subject: poll: allow f_op->poll to sleep f_op->poll is the only vfs operation which is not allowed to sleep. It's because poll and select implementation used task state to synchronize against wake ups, which doesn't have to be the case anymore as wait/wake interface can now use custom wake up functions. The non-sleep restriction can be a bit tricky because ->poll is not called from an atomic context and the result of accidentally sleeping in ->poll only shows up as temporary busy looping when the timing is right or rather wrong. This patch converts poll/select to use custom wake up function and use separate triggered variable to synchronize against wake up events. The only added overhead is an extra function call during wake up and negligible. This patch removes the one non-sleep exception from vfs locking rules and is beneficial to userland filesystem implementations like FUSE, 9p or peculiar fs like spufs as it's very difficult for those to implement non-sleeping poll method. While at it, make the following cosmetic changes to make poll.h and select.c checkpatch friendly. * s/type * symbol/type *symbol/ : three places in poll.h * remove blank line before EXPORT_SYMBOL() : two places in select.c Oleg: spotted missing barrier in poll_schedule_timeout() Davide: spotted missing write barrier in pollwake() Signed-off-by: Tejun Heo Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Ingo Molnar Cc: Christoph Hellwig Signed-off-by: Miklos Szeredi Cc: Davide Libenzi Cc: Brad Boyer Cc: Al Viro Cc: Roland McGrath Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Cc: Davide Libenzi Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 2 +- drivers/media/video/v4l1-compat.c | 4 +-- fs/select.c | 76 +++++++++++++++++++++++++++++++-------- include/linux/poll.h | 15 ++++++-- 4 files changed, 76 insertions(+), 21 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index ccec5539438..cfbfa15a46b 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -397,7 +397,7 @@ prototypes: }; locking rules: - All except ->poll() may block. + All may block. BKL llseek: no (see below) read: no diff --git a/drivers/media/video/v4l1-compat.c b/drivers/media/video/v4l1-compat.c index d450cab20be..b617bf05e2d 100644 --- a/drivers/media/video/v4l1-compat.c +++ b/drivers/media/video/v4l1-compat.c @@ -203,7 +203,6 @@ static int poll_one(struct file *file, struct poll_wqueues *pwq) table = &pwq->pt; for (;;) { int mask; - set_current_state(TASK_INTERRUPTIBLE); mask = file->f_op->poll(file, table); if (mask & POLLIN) break; @@ -212,9 +211,8 @@ static int poll_one(struct file *file, struct poll_wqueues *pwq) retval = -ERESTARTSYS; break; } - schedule(); + poll_schedule(pwq, TASK_INTERRUPTIBLE); } - set_current_state(TASK_RUNNING); poll_freewait(pwq); return retval; } diff --git a/fs/select.c b/fs/select.c index 87df51eadcf..08b91beed80 100644 --- a/fs/select.c +++ b/fs/select.c @@ -109,11 +109,11 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, void poll_initwait(struct poll_wqueues *pwq) { init_poll_funcptr(&pwq->pt, __pollwait); + pwq->polling_task = current; pwq->error = 0; pwq->table = NULL; pwq->inline_index = 0; } - EXPORT_SYMBOL(poll_initwait); static void free_poll_entry(struct poll_table_entry *entry) @@ -142,12 +142,10 @@ void poll_freewait(struct poll_wqueues *pwq) free_page((unsigned long) old); } } - EXPORT_SYMBOL(poll_freewait); -static struct poll_table_entry *poll_get_entry(poll_table *_p) +static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p) { - struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt); struct poll_table_page *table = p->table; if (p->inline_index < N_INLINE_POLL_ENTRIES) @@ -159,7 +157,6 @@ static struct poll_table_entry *poll_get_entry(poll_table *_p) new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); if (!new_table) { p->error = -ENOMEM; - __set_current_state(TASK_RUNNING); return NULL; } new_table->entry = new_table->entries; @@ -171,20 +168,75 @@ static struct poll_table_entry *poll_get_entry(poll_table *_p) return table->entry++; } +static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct poll_wqueues *pwq = wait->private; + DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task); + + /* + * Although this function is called under waitqueue lock, LOCK + * doesn't imply write barrier and the users expect write + * barrier semantics on wakeup functions. The following + * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() + * and is paired with set_mb() in poll_schedule_timeout. + */ + smp_wmb(); + pwq->triggered = 1; + + /* + * Perform the default wake up operation using a dummy + * waitqueue. + * + * TODO: This is hacky but there currently is no interface to + * pass in @sync. @sync is scheduled to be removed and once + * that happens, wake_up_process() can be used directly. + */ + return default_wake_function(&dummy_wait, mode, sync, key); +} + /* Add a new entry */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { - struct poll_table_entry *entry = poll_get_entry(p); + struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt); + struct poll_table_entry *entry = poll_get_entry(pwq); if (!entry) return; get_file(filp); entry->filp = filp; entry->wait_address = wait_address; - init_waitqueue_entry(&entry->wait, current); + init_waitqueue_func_entry(&entry->wait, pollwake); + entry->wait.private = pwq; add_wait_queue(wait_address, &entry->wait); } +int poll_schedule_timeout(struct poll_wqueues *pwq, int state, + ktime_t *expires, unsigned long slack) +{ + int rc = -EINTR; + + set_current_state(state); + if (!pwq->triggered) + rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); + __set_current_state(TASK_RUNNING); + + /* + * Prepare for the next iteration. + * + * The following set_mb() serves two purposes. First, it's + * the counterpart rmb of the wmb in pollwake() such that data + * written before wake up is always visible after wake up. + * Second, the full barrier guarantees that triggered clearing + * doesn't pass event check of the next iteration. Note that + * this problem doesn't exist for the first iteration as + * add_wait_queue() has full barrier semantics. + */ + set_mb(pwq->triggered, 0); + + return rc; +} +EXPORT_SYMBOL(poll_schedule_timeout); + /** * poll_select_set_timeout - helper function to setup the timeout value * @to: pointer to timespec variable for the final timeout @@ -340,8 +392,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; - set_current_state(TASK_INTERRUPTIBLE); - inp = fds->in; outp = fds->out; exp = fds->ex; rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; @@ -411,10 +461,10 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) to = &expire; } - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE, + to, slack)) timed_out = 1; } - __set_current_state(TASK_RUNNING); poll_freewait(&table); @@ -666,7 +716,6 @@ static int do_poll(unsigned int nfds, struct poll_list *list, for (;;) { struct poll_list *walk; - set_current_state(TASK_INTERRUPTIBLE); for (walk = list; walk != NULL; walk = walk->next) { struct pollfd * pfd, * pfd_end; @@ -709,10 +758,9 @@ static int do_poll(unsigned int nfds, struct poll_list *list, to = &expire; } - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack)) timed_out = 1; } - __set_current_state(TASK_RUNNING); return count; } diff --git a/include/linux/poll.h b/include/linux/poll.h index badd98ab06f..8c24ef8d997 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -46,9 +46,9 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) } struct poll_table_entry { - struct file * filp; + struct file *filp; wait_queue_t wait; - wait_queue_head_t * wait_address; + wait_queue_head_t *wait_address; }; /* @@ -56,7 +56,9 @@ struct poll_table_entry { */ struct poll_wqueues { poll_table pt; - struct poll_table_page * table; + struct poll_table_page *table; + struct task_struct *polling_task; + int triggered; int error; int inline_index; struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES]; @@ -64,6 +66,13 @@ struct poll_wqueues { extern void poll_initwait(struct poll_wqueues *pwq); extern void poll_freewait(struct poll_wqueues *pwq); +extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, + ktime_t *expires, unsigned long slack); + +static inline int poll_schedule(struct poll_wqueues *pwq, int state) +{ + return poll_schedule_timeout(pwq, state, NULL, 0); +} /* * Scaleable version of the fd_set. -- cgit v1.2.3-70-g09d2 From 89fac11cb3e7c5860c425dba14845c09ccede39d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 6 Jan 2009 14:41:34 -0800 Subject: adt7470: make automatic fan control really work It turns out that the adt7470's automatic fan control algorithm only works when the temperature sensors get updated. This in turn happens only when someone tells the chip to read its temperature sensors. Regrettably, this means that we have to drive the chip periodically. Signed-off-by: Darrick J. Wong Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/hwmon/adt7470 | 19 +++--- drivers/hwmon/adt7470.c | 156 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 142 insertions(+), 33 deletions(-) (limited to 'Documentation') diff --git a/Documentation/hwmon/adt7470 b/Documentation/hwmon/adt7470 index 75d13ca147c..8ce4aa0a0f5 100644 --- a/Documentation/hwmon/adt7470 +++ b/Documentation/hwmon/adt7470 @@ -31,15 +31,11 @@ Each of the measured inputs (temperature, fan speed) has corresponding high/low limit values. The ADT7470 will signal an ALARM if any measured value exceeds either limit. -The ADT7470 DOES NOT sample all inputs continuously. A single pin on the -ADT7470 is connected to a multitude of thermal diodes, but the chip must be -instructed explicitly to read the multitude of diodes. If you want to use -automatic fan control mode, you must manually read any of the temperature -sensors or the fan control algorithm will not run. The chip WILL NOT DO THIS -AUTOMATICALLY; this must be done from userspace. This may be a bug in the chip -design, given that many other AD chips take care of this. The driver will not -read the registers more often than once every 5 seconds. Further, -configuration data is only read once per minute. +The ADT7470 samples all inputs continuously. A kernel thread is started up for +the purpose of periodically querying the temperature sensors, thus allowing the +automatic fan pwm control to set the fan speed. The driver will not read the +registers more often than once every 5 seconds. Further, configuration data is +only read once per minute. Special Features ---------------- @@ -72,5 +68,6 @@ pwm#_auto_point2_temp. Notes ----- -As stated above, the temperature inputs must be read periodically from -userspace in order for the automatic pwm algorithm to run. +The temperature inputs no longer need to be read periodically from userspace in +order for the automatic pwm algorithm to run. This was the case for earlier +versions of the driver. diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index ab8d5ebc9f7..633e1a1e9d7 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -28,6 +28,7 @@ #include #include #include +#include /* Addresses to scan */ static const unsigned short normal_i2c[] = { 0x2C, 0x2E, 0x2F, I2C_CLIENT_END }; @@ -132,6 +133,9 @@ I2C_CLIENT_INSMOD_1(adt7470); /* Wait at least 200ms per sensor for 10 sensors */ #define TEMP_COLLECTION_TIME 2000 +/* auto update thing won't fire more than every 2s */ +#define AUTO_UPDATE_INTERVAL 2000 + /* datasheet says to divide this number by the fan reading to get fan rpm */ #define FAN_PERIOD_TO_RPM(x) ((90000 * 60) / (x)) #define FAN_RPM_TO_PERIOD FAN_PERIOD_TO_RPM @@ -148,6 +152,7 @@ struct adt7470_data { unsigned long limits_last_updated; /* In jiffies */ int num_temp_sensors; /* -1 = probe */ + int temperatures_probed; s8 temp[ADT7470_TEMP_COUNT]; s8 temp_min[ADT7470_TEMP_COUNT]; @@ -164,6 +169,10 @@ struct adt7470_data { u8 pwm_min[ADT7470_PWM_COUNT]; s8 pwm_tmin[ADT7470_PWM_COUNT]; u8 pwm_auto_temp[ADT7470_PWM_COUNT]; + + struct task_struct *auto_update; + struct completion auto_update_stop; + unsigned int auto_update_interval; }; static int adt7470_probe(struct i2c_client *client, @@ -221,19 +230,13 @@ static void adt7470_init_client(struct i2c_client *client) } } -static struct adt7470_data *adt7470_update_device(struct device *dev) +/* Probe for temperature sensors. Assumes lock is held */ +static int adt7470_read_temperatures(struct i2c_client *client, + struct adt7470_data *data) { - struct i2c_client *client = to_i2c_client(dev); - struct adt7470_data *data = i2c_get_clientdata(client); - unsigned long local_jiffies = jiffies; - u8 cfg, pwm[4], pwm_cfg[2]; + unsigned long res; int i; - - mutex_lock(&data->lock); - if (time_before(local_jiffies, data->sensors_last_updated + - SENSOR_REFRESH_INTERVAL) - && data->sensors_valid) - goto no_sensor_update; + u8 cfg, pwm[4], pwm_cfg[2]; /* save pwm[1-4] config register */ pwm_cfg[0] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM_CFG(0)); @@ -259,9 +262,9 @@ static struct adt7470_data *adt7470_update_device(struct device *dev) i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg); /* Delay is 200ms * number of temp sensors. */ - msleep((data->num_temp_sensors >= 0 ? - data->num_temp_sensors * 200 : - TEMP_COLLECTION_TIME)); + res = msleep_interruptible((data->num_temp_sensors >= 0 ? + data->num_temp_sensors * 200 : + TEMP_COLLECTION_TIME)); /* done reading temperature sensors */ cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG); @@ -272,15 +275,81 @@ static struct adt7470_data *adt7470_update_device(struct device *dev) i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(0), pwm_cfg[0]); i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(2), pwm_cfg[1]); - for (i = 0; i < ADT7470_TEMP_COUNT; i++) + if (res) { + printk(KERN_ERR "ha ha, interrupted"); + return -EAGAIN; + } + + /* Only count fans if we have to */ + if (data->num_temp_sensors >= 0) + return 0; + + for (i = 0; i < ADT7470_TEMP_COUNT; i++) { data->temp[i] = i2c_smbus_read_byte_data(client, ADT7470_TEMP_REG(i)); + if (data->temp[i]) + data->num_temp_sensors = i + 1; + } + data->temperatures_probed = 1; + return 0; +} - /* Figure out the number of temp sensors */ - if (data->num_temp_sensors < 0) +static int adt7470_update_thread(void *p) +{ + struct i2c_client *client = p; + struct adt7470_data *data = i2c_get_clientdata(client); + + while (!kthread_should_stop()) { + mutex_lock(&data->lock); + adt7470_read_temperatures(client, data); + mutex_unlock(&data->lock); + if (kthread_should_stop()) + break; + msleep_interruptible(data->auto_update_interval); + } + + complete_all(&data->auto_update_stop); + return 0; +} + +static struct adt7470_data *adt7470_update_device(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct adt7470_data *data = i2c_get_clientdata(client); + unsigned long local_jiffies = jiffies; + u8 cfg; + int i; + int need_sensors = 1; + int need_limits = 1; + + /* + * Figure out if we need to update the shadow registers. + * Lockless means that we may occasionally report out of + * date data. + */ + if (time_before(local_jiffies, data->sensors_last_updated + + SENSOR_REFRESH_INTERVAL) && + data->sensors_valid) + need_sensors = 0; + + if (time_before(local_jiffies, data->limits_last_updated + + LIMIT_REFRESH_INTERVAL) && + data->limits_valid) + need_limits = 0; + + if (!need_sensors && !need_limits) + return data; + + mutex_lock(&data->lock); + if (!need_sensors) + goto no_sensor_update; + + if (!data->temperatures_probed) + adt7470_read_temperatures(client, data); + else for (i = 0; i < ADT7470_TEMP_COUNT; i++) - if (data->temp[i]) - data->num_temp_sensors = i + 1; + data->temp[i] = i2c_smbus_read_byte_data(client, + ADT7470_TEMP_REG(i)); for (i = 0; i < ADT7470_FAN_COUNT; i++) data->fan[i] = adt7470_read_word_data(client, @@ -329,9 +398,7 @@ static struct adt7470_data *adt7470_update_device(struct device *dev) data->sensors_valid = 1; no_sensor_update: - if (time_before(local_jiffies, data->limits_last_updated + - LIMIT_REFRESH_INTERVAL) - && data->limits_valid) + if (!need_limits) goto out; for (i = 0; i < ADT7470_TEMP_COUNT; i++) { @@ -365,6 +432,35 @@ out: return data; } +static ssize_t show_auto_update_interval(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct adt7470_data *data = adt7470_update_device(dev); + return sprintf(buf, "%d\n", data->auto_update_interval); +} + +static ssize_t set_auto_update_interval(struct device *dev, + struct device_attribute *devattr, + const char *buf, + size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct adt7470_data *data = i2c_get_clientdata(client); + long temp; + + if (strict_strtol(buf, 10, &temp)) + return -EINVAL; + + temp = SENSORS_LIMIT(temp, 0, 60000); + + mutex_lock(&data->lock); + data->auto_update_interval = temp; + mutex_unlock(&data->lock); + + return count; +} + static ssize_t show_num_temp_sensors(struct device *dev, struct device_attribute *devattr, char *buf) @@ -389,6 +485,8 @@ static ssize_t set_num_temp_sensors(struct device *dev, mutex_lock(&data->lock); data->num_temp_sensors = temp; + if (temp < 0) + data->temperatures_probed = 0; mutex_unlock(&data->lock); return count; @@ -862,6 +960,8 @@ static ssize_t show_alarm(struct device *dev, static DEVICE_ATTR(alarm_mask, S_IRUGO, show_alarm_mask, NULL); static DEVICE_ATTR(num_temp_sensors, S_IWUSR | S_IRUGO, show_num_temp_sensors, set_num_temp_sensors); +static DEVICE_ATTR(auto_update_interval, S_IWUSR | S_IRUGO, + show_auto_update_interval, set_auto_update_interval); static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, show_temp_max, set_temp_max, 0); @@ -1035,6 +1135,7 @@ static struct attribute *adt7470_attr[] = { &dev_attr_alarm_mask.attr, &dev_attr_num_temp_sensors.attr, + &dev_attr_auto_update_interval.attr, &sensor_dev_attr_temp1_max.dev_attr.attr, &sensor_dev_attr_temp2_max.dev_attr.attr, &sensor_dev_attr_temp3_max.dev_attr.attr, @@ -1168,6 +1269,7 @@ static int adt7470_probe(struct i2c_client *client, } data->num_temp_sensors = -1; + data->auto_update_interval = AUTO_UPDATE_INTERVAL; i2c_set_clientdata(client, data); mutex_init(&data->lock); @@ -1188,8 +1290,16 @@ static int adt7470_probe(struct i2c_client *client, goto exit_remove; } + init_completion(&data->auto_update_stop); + data->auto_update = kthread_run(adt7470_update_thread, client, + dev_name(data->hwmon_dev)); + if (IS_ERR(data->auto_update)) + goto exit_unregister; + return 0; +exit_unregister: + hwmon_device_unregister(data->hwmon_dev); exit_remove: sysfs_remove_group(&client->dev.kobj, &data->attrs); exit_free: @@ -1202,6 +1312,8 @@ static int adt7470_remove(struct i2c_client *client) { struct adt7470_data *data = i2c_get_clientdata(client); + kthread_stop(data->auto_update); + wait_for_completion(&data->auto_update_stop); hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &data->attrs); kfree(data); -- cgit v1.2.3-70-g09d2 From e8386a0cb22f4a2d439384212c494ad0bda848fe Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 14:41:52 -0800 Subject: kprobes: support probing module __exit function Allows kprobes to probe __exit routine. This adds flags member to struct kprobe. When module is freed(kprobes hooks module_notifier to get this event), kprobes which probe the functions in that module are set to "Gone" flag to the flags member. These "Gone" probes are never be enabled. Users can check the GONE flag through debugfs. This also removes mod_refcounted, because we couldn't free a module if kprobe incremented the refcount of that module. [akpm@linux-foundation.org: document some locking] [mhiramat@redhat.com: bugfix: pass aggr_kprobe to arch_remove_kprobe] [mhiramat@redhat.com: bugfix: release old_p's insn_slot before error return] Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Signed-off-by: Masami Hiramatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kprobes.txt | 5 +- include/linux/kprobes.h | 14 +++- kernel/kprobes.c | 159 ++++++++++++++++++++++++++++++++++------------ 3 files changed, 134 insertions(+), 44 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index a79633d702b..48b3de90eb1 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -497,7 +497,10 @@ The first column provides the kernel address where the probe is inserted. The second column identifies the type of probe (k - kprobe, r - kretprobe and j - jprobe), while the third column specifies the symbol+offset of the probe. If the probed function belongs to a module, the module name -is also specified. +is also specified. Following columns show probe status. If the probe is on +a virtual address that is no longer valid (module init sections, module +virtual addresses that correspond to modules that've been unloaded), +such probes are marked with [GONE]. /debug/kprobes/enabled: Turn kprobes ON/OFF diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index b93e44ce228..d6ea19e314b 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -69,9 +69,6 @@ struct kprobe { /* list of kprobes for multi-handler support */ struct list_head list; - /* Indicates that the corresponding module has been ref counted */ - unsigned int mod_refcounted; - /*count the number of times this probe was temporarily disarmed */ unsigned long nmissed; @@ -103,8 +100,19 @@ struct kprobe { /* copy of the original instruction */ struct arch_specific_insn ainsn; + + /* Indicates various status flags. Protected by kprobe_mutex. */ + u32 flags; }; +/* Kprobe status flags */ +#define KPROBE_FLAG_GONE 1 /* breakpoint has already gone */ + +static inline int kprobe_gone(struct kprobe *p) +{ + return p->flags & KPROBE_FLAG_GONE; +} + /* * Special probe type that uses setjmp-longjmp type tricks to resume * execution at a specified entry with a matching prototype corresponding diff --git a/kernel/kprobes.c b/kernel/kprobes.c index a1e233a1958..cb732a9aa55 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -327,7 +327,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) struct kprobe *kp; list_for_each_entry_rcu(kp, &p->list, list) { - if (kp->pre_handler) { + if (kp->pre_handler && !kprobe_gone(kp)) { set_kprobe_instance(kp); if (kp->pre_handler(kp, regs)) return 1; @@ -343,7 +343,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, struct kprobe *kp; list_for_each_entry_rcu(kp, &p->list, list) { - if (kp->post_handler) { + if (kp->post_handler && !kprobe_gone(kp)) { set_kprobe_instance(kp); kp->post_handler(kp, regs, flags); reset_kprobe_instance(); @@ -545,9 +545,10 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) ap->addr = p->addr; ap->pre_handler = aggr_pre_handler; ap->fault_handler = aggr_fault_handler; - if (p->post_handler) + /* We don't care the kprobe which has gone. */ + if (p->post_handler && !kprobe_gone(p)) ap->post_handler = aggr_post_handler; - if (p->break_handler) + if (p->break_handler && !kprobe_gone(p)) ap->break_handler = aggr_break_handler; INIT_LIST_HEAD(&ap->list); @@ -566,17 +567,41 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, int ret = 0; struct kprobe *ap; + if (kprobe_gone(old_p)) { + /* + * Attempting to insert new probe at the same location that + * had a probe in the module vaddr area which already + * freed. So, the instruction slot has already been + * released. We need a new slot for the new probe. + */ + ret = arch_prepare_kprobe(old_p); + if (ret) + return ret; + } if (old_p->pre_handler == aggr_pre_handler) { copy_kprobe(old_p, p); ret = add_new_kprobe(old_p, p); + ap = old_p; } else { ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); - if (!ap) + if (!ap) { + if (kprobe_gone(old_p)) + arch_remove_kprobe(old_p); return -ENOMEM; + } add_aggr_kprobe(ap, old_p); copy_kprobe(ap, p); ret = add_new_kprobe(ap, p); } + if (kprobe_gone(old_p)) { + /* + * If the old_p has gone, its breakpoint has been disarmed. + * We have to arm it again after preparing real kprobes. + */ + ap->flags &= ~KPROBE_FLAG_GONE; + if (kprobe_enabled) + arch_arm_kprobe(ap); + } return ret; } @@ -639,8 +664,7 @@ static int __kprobes __register_kprobe(struct kprobe *p, return -EINVAL; } - p->mod_refcounted = 0; - + p->flags = 0; /* * Check if are we probing a module. */ @@ -649,16 +673,14 @@ static int __kprobes __register_kprobe(struct kprobe *p, struct module *calling_mod; calling_mod = __module_text_address(called_from); /* - * We must allow modules to probe themself and in this case - * avoid incrementing the module refcount, so as to allow - * unloading of self probing modules. + * We must hold a refcount of the probed module while updating + * its code to prohibit unexpected unloading. */ if (calling_mod != probed_mod) { if (unlikely(!try_module_get(probed_mod))) { preempt_enable(); return -EINVAL; } - p->mod_refcounted = 1; } else probed_mod = NULL; } @@ -687,8 +709,9 @@ static int __kprobes __register_kprobe(struct kprobe *p, out: mutex_unlock(&kprobe_mutex); - if (ret && probed_mod) + if (probed_mod) module_put(probed_mod); + return ret; } @@ -716,16 +739,16 @@ valid_p: list_is_singular(&old_p->list))) { /* * Only probe on the hash list. Disarm only if kprobes are - * enabled - otherwise, the breakpoint would already have - * been removed. We save on flushing icache. + * enabled and not gone - otherwise, the breakpoint would + * already have been removed. We save on flushing icache. */ - if (kprobe_enabled) + if (kprobe_enabled && !kprobe_gone(old_p)) arch_disarm_kprobe(p); hlist_del_rcu(&old_p->hlist); } else { - if (p->break_handler) + if (p->break_handler && !kprobe_gone(p)) old_p->break_handler = NULL; - if (p->post_handler) { + if (p->post_handler && !kprobe_gone(p)) { list_for_each_entry_rcu(list_p, &old_p->list, list) { if ((list_p != p) && (list_p->post_handler)) goto noclean; @@ -740,27 +763,16 @@ noclean: static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) { - struct module *mod; struct kprobe *old_p; - if (p->mod_refcounted) { - /* - * Since we've already incremented refcount, - * we don't need to disable preemption. - */ - mod = module_text_address((unsigned long)p->addr); - if (mod) - module_put(mod); - } - - if (list_empty(&p->list) || list_is_singular(&p->list)) { - if (!list_empty(&p->list)) { - /* "p" is the last child of an aggr_kprobe */ - old_p = list_entry(p->list.next, struct kprobe, list); - list_del(&p->list); - kfree(old_p); - } + if (list_empty(&p->list)) arch_remove_kprobe(p); + else if (list_is_singular(&p->list)) { + /* "p" is the last child of an aggr_kprobe */ + old_p = list_entry(p->list.next, struct kprobe, list); + list_del(&p->list); + arch_remove_kprobe(old_p); + kfree(old_p); } } @@ -1074,6 +1086,67 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, #endif /* CONFIG_KRETPROBES */ +/* Set the kprobe gone and remove its instruction buffer. */ +static void __kprobes kill_kprobe(struct kprobe *p) +{ + struct kprobe *kp; + p->flags |= KPROBE_FLAG_GONE; + if (p->pre_handler == aggr_pre_handler) { + /* + * If this is an aggr_kprobe, we have to list all the + * chained probes and mark them GONE. + */ + list_for_each_entry_rcu(kp, &p->list, list) + kp->flags |= KPROBE_FLAG_GONE; + p->post_handler = NULL; + p->break_handler = NULL; + } + /* + * Here, we can remove insn_slot safely, because no thread calls + * the original probed function (which will be freed soon) any more. + */ + arch_remove_kprobe(p); +} + +/* Module notifier call back, checking kprobes on the module */ +static int __kprobes kprobes_module_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct module *mod = data; + struct hlist_head *head; + struct hlist_node *node; + struct kprobe *p; + unsigned int i; + + if (val != MODULE_STATE_GOING) + return NOTIFY_DONE; + + /* + * module .text section will be freed. We need to + * disable kprobes which have been inserted in the section. + */ + mutex_lock(&kprobe_mutex); + for (i = 0; i < KPROBE_TABLE_SIZE; i++) { + head = &kprobe_table[i]; + hlist_for_each_entry_rcu(p, node, head, hlist) + if (within_module_core((unsigned long)p->addr, mod)) { + /* + * The vaddr this probe is installed will soon + * be vfreed buy not synced to disk. Hence, + * disarming the breakpoint isn't needed. + */ + kill_kprobe(p); + } + } + mutex_unlock(&kprobe_mutex); + return NOTIFY_DONE; +} + +static struct notifier_block kprobe_module_nb = { + .notifier_call = kprobes_module_callback, + .priority = 0 +}; + static int __init init_kprobes(void) { int i, err = 0; @@ -1130,6 +1203,9 @@ static int __init init_kprobes(void) err = arch_init_kprobes(); if (!err) err = register_die_notifier(&kprobe_exceptions_nb); + if (!err) + err = register_module_notifier(&kprobe_module_nb); + kprobes_initialized = (err == 0); if (!err) @@ -1150,10 +1226,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, else kprobe_type = "k"; if (sym) - seq_printf(pi, "%p %s %s+0x%x %s\n", p->addr, kprobe_type, - sym, offset, (modname ? modname : " ")); + seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, + sym, offset, (modname ? modname : " "), + (kprobe_gone(p) ? "[GONE]" : "")); else - seq_printf(pi, "%p %s %p\n", p->addr, kprobe_type, p->addr); + seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, + (kprobe_gone(p) ? "[GONE]" : "")); } static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) @@ -1234,7 +1312,8 @@ static void __kprobes enable_all_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) - arch_arm_kprobe(p); + if (!kprobe_gone(p)) + arch_arm_kprobe(p); } kprobe_enabled = true; @@ -1263,7 +1342,7 @@ static void __kprobes disable_all_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) { - if (!arch_trampoline_kprobe(p)) + if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) arch_disarm_kprobe(p); } } -- cgit v1.2.3-70-g09d2 From d78dd070ccda6384efeae142b116bc174f0cd9fa Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Jan 2009 14:42:40 -0800 Subject: docs: document how to write @varargs in kernel-doc Add documentation on how to use kernel-doc for function parameters that are "..." (varargs). Signed-off-by: Randy Dunlap Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-doc-nano-HOWTO.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt index c6841eee959..4115a4a80eb 100644 --- a/Documentation/kernel-doc-nano-HOWTO.txt +++ b/Documentation/kernel-doc-nano-HOWTO.txt @@ -71,6 +71,11 @@ The @argument descriptions must begin on the very next line following this opening short function description line, with no intervening empty comment lines. +If a function parameter is "..." (varargs), it should be listed in +kernel-doc notation as: + * @...: description + + Example kernel-doc data structure comment. /** -- cgit v1.2.3-70-g09d2 From 07983f0e36eab01a5385117e55154a2aa796eafc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Jan 2009 14:42:41 -0800 Subject: documentation: update header file paths Update several Documentation/ files and a few sub-dir files (only one change in each) to reflect changed header files locations. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DMA-mapping.txt | 2 +- Documentation/ioctl/ioctl-number.txt | 8 ++++---- Documentation/kernel-parameters.txt | 4 ++-- Documentation/magic-number.txt | 6 +++--- Documentation/mips/AU1xxx_IDE.README | 2 +- Documentation/powerpc/cpu_features.txt | 2 +- Documentation/x86/zero-page.txt | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) (limited to 'Documentation') diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt index c74fec8c235..b2a4d6d244d 100644 --- a/Documentation/DMA-mapping.txt +++ b/Documentation/DMA-mapping.txt @@ -26,7 +26,7 @@ mapped only for the time they are actually used and unmapped after the DMA transfer. The following API will work of course even on platforms where no such -hardware exists, see e.g. include/asm-i386/pci.h for how it is implemented on +hardware exists, see e.g. arch/x86/include/asm/pci.h for how it is implemented on top of the virt_to_bus interface. First of all, you should make sure diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 82469917443..f1d63990332 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -84,7 +84,7 @@ Code Seq# Include File Comments 'B' C0-FF advanced bbus 'C' all linux/soundcard.h -'D' all asm-s390/dasd.h +'D' all arch/s390/include/asm/dasd.h 'E' all linux/input.h 'F' all linux/fb.h 'H' all linux/hiddev.h @@ -105,7 +105,7 @@ Code Seq# Include File Comments 'S' 80-81 scsi/scsi_ioctl.h conflict! 'S' 82-FF scsi/scsi.h conflict! 'T' all linux/soundcard.h conflict! -'T' all asm-i386/ioctls.h conflict! +'T' all arch/x86/include/asm/ioctls.h conflict! 'U' 00-EF linux/drivers/usb/usb.h 'V' all linux/vt.h 'W' 00-1F linux/watchdog.h conflict! @@ -120,7 +120,7 @@ Code Seq# Include File Comments 'c' 00-7F linux/comstats.h conflict! 'c' 00-7F linux/coda.h conflict! -'c' 80-9F asm-s390/chsc.h +'c' 80-9F arch/s390/include/asm/chsc.h 'd' 00-FF linux/char/drm/drm/h conflict! 'd' 00-DF linux/video_decoder.h conflict! 'd' F0-FF linux/digi1.h @@ -170,7 +170,7 @@ Code Seq# Include File Comments 0x80 00-1F linux/fb.h 0x81 00-1F linux/videotext.h -0x89 00-06 asm-i386/sockios.h +0x89 00-06 arch/x86/include/asm/sockios.h 0x89 0B-DF linux/sockios.h 0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range 0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a2d8805c03d..7f0b694e02e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -469,8 +469,8 @@ and is between 256 and 4096 characters. It is defined in the file clearcpuid=BITNUM [X86] Disable CPUID feature X for the kernel. See - include/asm-x86/cpufeature.h for the valid bit numbers. - Note the Linux specific bits are not necessarily + arch/x86/include/asm/cpufeature.h for the valid bit + numbers. Note the Linux specific bits are not necessarily stable over kernel options, but the vendor specific ones should be. Also note that user programs calling CPUID directly diff --git a/Documentation/magic-number.txt b/Documentation/magic-number.txt index 95070028d15..505f1960754 100644 --- a/Documentation/magic-number.txt +++ b/Documentation/magic-number.txt @@ -125,14 +125,14 @@ TRIDENT_CARD_MAGIC 0x5072696E trident_card sound/oss/trident.c ROUTER_MAGIC 0x524d4157 wan_device include/linux/wanrouter.h SCC_MAGIC 0x52696368 gs_port drivers/char/scc.h SAVEKMSG_MAGIC1 0x53415645 savekmsg arch/*/amiga/config.c -GDA_MAGIC 0x58464552 gda include/asm-mips64/sn/gda.h +GDA_MAGIC 0x58464552 gda arch/mips/include/asm/sn/gda.h RED_MAGIC1 0x5a2cf071 (any) mm/slab.c STL_PORTMAGIC 0x5a7182c9 stlport include/linux/stallion.h EEPROM_MAGIC_VALUE 0x5ab478d2 lanai_dev drivers/atm/lanai.c HDLCDRV_MAGIC 0x5ac6e778 hdlcdrv_state include/linux/hdlcdrv.h EPCA_MAGIC 0x5c6df104 channel include/linux/epca.h PCXX_MAGIC 0x5c6df104 channel drivers/char/pcxx.h -KV_MAGIC 0x5f4b565f kernel_vars_s include/asm-mips64/sn/klkernvars.h +KV_MAGIC 0x5f4b565f kernel_vars_s arch/mips/include/asm/sn/klkernvars.h I810_STATE_MAGIC 0x63657373 i810_state sound/oss/i810_audio.c TRIDENT_STATE_MAGIC 0x63657373 trient_state sound/oss/trident.c M3_CARD_MAGIC 0x646e6f50 m3_card sound/oss/maestro3.c @@ -158,7 +158,7 @@ CCB_MAGIC 0xf2691ad2 ccb drivers/scsi/ncr53c8xx.c QUEUE_MAGIC_FREE 0xf7e1c9a3 queue_entry drivers/scsi/arm/queue.c QUEUE_MAGIC_USED 0xf7e1cc33 queue_entry drivers/scsi/arm/queue.c HTB_CMAGIC 0xFEFAFEF1 htb_class net/sched/sch_htb.c -NMI_MAGIC 0x48414d4d455201 nmi_s include/asm-mips64/sn/nmi.h +NMI_MAGIC 0x48414d4d455201 nmi_s arch/mips/include/asm/sn/nmi.h Note that there are also defined special per-driver magic numbers in sound memory management. See include/sound/sndmagic.h for complete list of them. Many diff --git a/Documentation/mips/AU1xxx_IDE.README b/Documentation/mips/AU1xxx_IDE.README index 25a6ed1aaa5..f54962aea84 100644 --- a/Documentation/mips/AU1xxx_IDE.README +++ b/Documentation/mips/AU1xxx_IDE.README @@ -44,7 +44,7 @@ FILES, CONFIGS AND COMPATABILITY Two files are introduced: - a) 'include/asm-mips/mach-au1x00/au1xxx_ide.h' + a) 'arch/mips/include/asm/mach-au1x00/au1xxx_ide.h' containes : struct _auide_hwif timing parameters for PIO mode 0/1/2/3/4 timing parameters for MWDMA 0/1/2 diff --git a/Documentation/powerpc/cpu_features.txt b/Documentation/powerpc/cpu_features.txt index 472739880e8..ffa4183fdb8 100644 --- a/Documentation/powerpc/cpu_features.txt +++ b/Documentation/powerpc/cpu_features.txt @@ -31,7 +31,7 @@ anyways). After detecting the processor type, the kernel patches out sections of code that shouldn't be used by writing nop's over it. Using cpufeatures requires -just 2 macros (found in include/asm-ppc/cputable.h), as seen in head.S +just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S transfer_to_handler: #ifdef CONFIG_ALTIVEC diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt index 169ad423a3d..4f913857b8a 100644 --- a/Documentation/x86/zero-page.txt +++ b/Documentation/x86/zero-page.txt @@ -3,7 +3,7 @@ protocol of kernel. These should be filled by bootloader or 16-bit real-mode setup code of the kernel. References/settings to it mainly are in: - include/asm-x86/bootparam.h + arch/x86/include/asm/bootparam.h Offset Proto Name Meaning -- cgit v1.2.3-70-g09d2 From 58cc855c395fc22db996cc893134c4c690f0f0dc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Jan 2009 14:42:42 -0800 Subject: documentation: update s390 header file paths Update Documentation/s390/ files to reflect changed header files locations. Signed-off-by: Randy Dunlap Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/s390/Debugging390.txt | 2 +- Documentation/s390/cds.txt | 2 +- Documentation/s390/s390dbf.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt index d30a281c570..10711d9f078 100644 --- a/Documentation/s390/Debugging390.txt +++ b/Documentation/s390/Debugging390.txt @@ -1402,7 +1402,7 @@ Syscalls are implemented on Linux for S390 by the Supervisor call instruction (S possibilities of these as the instruction is made up of a 0xA opcode & the second byte being the syscall number. They are traced using the simple command. TR SVC -the syscalls are defined in linux/include/asm-s390/unistd.h +the syscalls are defined in linux/arch/s390/include/asm/unistd.h e.g. to trace all file opens just do TR SVC 5 ( as this is the syscall number of open ) diff --git a/Documentation/s390/cds.txt b/Documentation/s390/cds.txt index c4b7b2bd369..480a78ef5a1 100644 --- a/Documentation/s390/cds.txt +++ b/Documentation/s390/cds.txt @@ -98,7 +98,7 @@ platform. Some of the interface routines are specific to Linux/390 and some of them can be found on other Linux platforms implementations too. Miscellaneous function prototypes, data declarations, and macro definitions can be found in the architecture specific C header file -linux/include/asm-s390/irq.h. +linux/arch/s390/include/asm/irq.h. Overview of CDS interface concepts diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt index e0542097369..2d10053dd97 100644 --- a/Documentation/s390/s390dbf.txt +++ b/Documentation/s390/s390dbf.txt @@ -2,7 +2,7 @@ S390 Debug Feature ================== files: arch/s390/kernel/debug.c - include/asm-s390/debug.h + arch/s390/include/asm/debug.h Description: ------------ -- cgit v1.2.3-70-g09d2 From 28f4d75a618e52f93d8e4a3e7bc66db8c882d679 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Jan 2009 14:42:43 -0800 Subject: documentation: how to use DOC: section blocks Add info on how to use DOC: sections in kernel-doc. DOC: sections enable the addition of inline source file comments that are general in nature instead of being specific to a function, struct, union, enum, or typedef. Signed-off-by: Randy Dunlap Cc: Johannes Berg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-doc-nano-HOWTO.txt | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt index 4115a4a80eb..d73fbd2b2b4 100644 --- a/Documentation/kernel-doc-nano-HOWTO.txt +++ b/Documentation/kernel-doc-nano-HOWTO.txt @@ -287,6 +287,32 @@ struct my_struct { }; +Including documentation blocks in source files +---------------------------------------------- + +To facilitate having source code and comments close together, you can +include kernel-doc documentation blocks that are free-form comments +instead of being kernel-doc for functions, structures, unions, +enums, or typedefs. This could be used for something like a +theory of operation for a driver or library code, for example. + +This is done by using a DOC: section keyword with a section title. E.g.: + +/** + * DOC: Theory of Operation + * + * The whizbang foobar is a dilly of a gizmo. It can do whatever you + * want it to do, at any time. It reads your mind. Here's how it works. + * + * foo bar splat + * + * The only drawback to this gizmo is that is can sometimes damage + * hardware, software, or its subject(s). + */ + +DOC: sections are used in SGML templates files as indicated below. + + How to make new SGML template files ----------------------------------- @@ -307,6 +333,9 @@ exported using EXPORT_SYMBOL. !F is replaced by the documentation, in , for the functions listed. +!P
is replaced by the contents of the DOC: +section titled
from . +Spaces are allowed in
; do not quote the
. Tim. */ -- cgit v1.2.3-70-g09d2 From 7c4be253d3a01ddc92033ec3a3812fddf703ef19 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Jan 2009 14:42:44 -0800 Subject: docs: add more early params to kernel-parameters.txt Add some (more) early_param boot options to kernel-parameters.txt. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7f0b694e02e..0543370e702 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1117,6 +1117,8 @@ and is between 256 and 4096 characters. It is defined in the file If there are multiple matching configurations changing the same attribute, the last one is used. + lmb=debug [KNL] Enable lmb debug messages. + load_ramdisk= [RAM] List of ramdisks to load from floppy See Documentation/blockdev/ramdisk.txt. @@ -1569,6 +1571,10 @@ and is between 256 and 4096 characters. It is defined in the file nr_uarts= [SERIAL] maximum number of UARTs to be registered. + ohci1394_dma=early [HW] enable debugging via the ohci1394 driver. + See Documentation/debugging-via-ohci1394.txt for more + info. + olpc_ec_timeout= [OLPC] ms delay when issuing EC commands Rather than timing out after 20 ms if an EC command is not properly ACKed, override the length -- cgit v1.2.3-70-g09d2 From ecb08d81313a3c015225236775de259d99ab47fe Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Jan 2009 14:42:44 -0800 Subject: doc: reformat some long lines in kernel-parameters.txt Reformat text to (mostly) stay within 80 columns of text. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0543370e702..3ccf1bc5aff 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1799,10 +1799,10 @@ and is between 256 and 4096 characters. It is defined in the file autoconfiguration. Ranges are in pairs (memory base and size). - dynamic_printk - Enables pr_debug()/dev_dbg() calls if - CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. These can also - be switched on/off via /dynamic_printk/modules + dynamic_printk Enables pr_debug()/dev_dbg() calls if + CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. + These can also be switched on/off via + /dynamic_printk/modules print-fatal-signals= [KNL] debug: print fatal signals @@ -1890,7 +1890,7 @@ and is between 256 and 4096 characters. It is defined in the file reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode Format: [,[,...]] - See arch/*/kernel/reboot.c or arch/*/kernel/process.c + See arch/*/kernel/reboot.c or arch/*/kernel/process.c relax_domain_level= [KNL, SMP] Set scheduler's default relax_domain_level. @@ -2438,8 +2438,8 @@ and is between 256 and 4096 characters. It is defined in the file Format: ,,,,,[,[,[,]]] - norandmaps Don't use address space randomization - Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space + norandmaps Don't use address space randomization. Equivalent to + echo 0 > /proc/sys/kernel/randomize_va_space ______________________________________________________________________ -- cgit v1.2.3-70-g09d2 From 4cb0e11b15d2badad455fcd538af0cccf05dc012 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Tue, 6 Jan 2009 14:42:47 -0800 Subject: coredump_filter: permit changing of the default filter Introduce a new kernel parameter `coredump_filter'. Setting a value to this parameter causes the default bitmask of coredump_filter to be changed. It is useful for users to change coredump_filter settings for the whole system at boot time. Without this parameter, users have to change coredump_filter settings for each /proc// in an initializing script. Signed-off-by: Hidehiro Kawai Cc: Roland McGrath Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 5 +++++ kernel/fork.c | 15 +++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 3ccf1bc5aff..0b3f6711d2f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -551,6 +551,11 @@ and is between 256 and 4096 characters. It is defined in the file not work reliably with all consoles, but is known to work with serial and VGA consoles. + coredump_filter= + [KNL] Change the default value for + /proc//coredump_filter. + See also Documentation/filesystems/proc.txt. + cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver Format: ,,,[,] diff --git a/kernel/fork.c b/kernel/fork.c index 23b91211667..7b8f2a78be3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -400,6 +400,18 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) +static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; + +static int __init coredump_filter_setup(char *s) +{ + default_dump_filter = + (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & + MMF_DUMP_FILTER_MASK; + return 1; +} + +__setup("coredump_filter=", coredump_filter_setup); + #include static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) @@ -408,8 +420,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); - mm->flags = (current->mm) ? current->mm->flags - : MMF_DUMP_FILTER_DEFAULT; + mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; mm->core_state = NULL; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); -- cgit v1.2.3-70-g09d2 From 709ac06a148a33493d3e2f9391bb746b067d96d6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 7 Jan 2009 09:54:24 -0500 Subject: Btrfs: Add Documentation/filesystem/btrfs.txt, remove old COPYING Signed-off-by: Chris Mason --- Documentation/filesystems/btrfs.txt | 91 +++++++++ fs/btrfs/COPYING | 356 ------------------------------------ fs/btrfs/INSTALL | 48 ----- 3 files changed, 91 insertions(+), 404 deletions(-) create mode 100644 Documentation/filesystems/btrfs.txt delete mode 100644 fs/btrfs/COPYING delete mode 100644 fs/btrfs/INSTALL (limited to 'Documentation') diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt new file mode 100644 index 00000000000..64087c34327 --- /dev/null +++ b/Documentation/filesystems/btrfs.txt @@ -0,0 +1,91 @@ + + BTRFS + ===== + +Btrfs is a new copy on write filesystem for Linux aimed at +implementing advanced features while focusing on fault tolerance, +repair and easy administration. Initially developed by Oracle, Btrfs +is licensed under the GPL and open for contribution from anyone. + +Linux has a wealth of filesystems to choose from, but we are facing a +number of challenges with scaling to the large storage subsystems that +are becoming common in today's data centers. Filesystems need to scale +in their ability to address and manage large storage, and also in +their ability to detect, repair and tolerate errors in the data stored +on disk. Btrfs is under heavy development, and is not suitable for +any uses other than benchmarking and review. The Btrfs disk format is +not yet finalized. + +The main Btrfs features include: + + * Extent based file storage (2^64 max file size) + * Space efficient packing of small files + * Space efficient indexed directories + * Dynamic inode allocation + * Writable snapshots + * Subvolumes (separate internal filesystem roots) + * Object level mirroring and striping + * Checksums on data and metadata (multiple algorithms available) + * Compression + * Integrated multiple device support, with several raid algorithms + * Online filesystem check (not yet implemented) + * Very fast offline filesystem check + * Efficient incremental backup and FS mirroring (not yet implemented) + * Online filesystem defragmentation + + + + MAILING LIST + ============ + +There is a Btrfs mailing list hosted on vger.kernel.org. You can +find details on how to subscribe here: + +http://vger.kernel.org/vger-lists.html#linux-btrfs + +Mailing list archives are available from gmane: + +http://dir.gmane.org/gmane.comp.file-systems.btrfs + + + + IRC + === + +Discussion of Btrfs also occurs on the #btrfs channel of the Freenode +IRC network. + + + + UTILITIES + ========= + +Userspace tools for creating and manipulating Btrfs file systems are +available from the git repository at the following location: + + http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-progs-unstable.git + git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs-unstable.git + +These include the following tools: + +mkfs.btrfs: create a filesystem + +btrfsctl: control program to create snapshots and subvolumes: + + mount /dev/sda2 /mnt + btrfsctl -s new_subvol_name /mnt + btrfsctl -s snapshot_of_default /mnt/default + btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name + btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol + ls /mnt + default snapshot_of_a_snapshot snapshot_of_new_subvol + new_subvol_name snapshot_of_default + + Snapshots and subvolumes cannot be deleted right now, but you can + rm -rf all the files and directories inside them. + +btrfsck: do a limited check of the FS extent trees. + +btrfs-debug-tree: print all of the FS metadata in text form. Example: + + btrfs-debug-tree /dev/sda2 >& big_output_file diff --git a/fs/btrfs/COPYING b/fs/btrfs/COPYING deleted file mode 100644 index ca442d313d8..00000000000 --- a/fs/btrfs/COPYING +++ /dev/null @@ -1,356 +0,0 @@ - - NOTE! This copyright does *not* cover user programs that use kernel - services by normal system calls - this is merely considered normal use - of the kernel, and does *not* fall under the heading of "derived work". - Also note that the GPL below is copyrighted by the Free Software - Foundation, but the instance of code that it refers to (the Linux - kernel) is copyrighted by me and others who actually wrote it. - - Also note that the only valid version of the GPL as far as the kernel - is concerned is _this_ particular version of the license (ie v2, not - v2.2 or v3.x or whatever), unless explicitly otherwise stated. - - Linus Torvalds - ----------------------------------------- - - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/fs/btrfs/INSTALL b/fs/btrfs/INSTALL deleted file mode 100644 index 16b45a56878..00000000000 --- a/fs/btrfs/INSTALL +++ /dev/null @@ -1,48 +0,0 @@ -Install Instructions - -Btrfs puts snapshots and subvolumes into the root directory of the FS. This -directory can only be changed by btrfsctl right now, and normal filesystem -operations do not work on it. The default subvolume is called 'default', -and you can create files and directories in mount_point/default - -Btrfs uses libcrc32c in the kernel for file and metadata checksums. You need -to compile the kernel with: - -CONFIG_LIBCRC32C=m - -libcrc32c can be static as well. Once your kernel is setup, typing make in the -btrfs module sources will build against the running kernel. When the build is -complete: - -modprobe libcrc32c -insmod btrfs.ko - -The Btrfs utility programs require libuuid to build. This can be found -in the e2fsprogs sources, and is usually available as libuuid or -e2fsprogs-devel from various distros. - -Building the utilities is just make ; make install. The programs go -into /usr/local/bin. The commands available are: - -mkfs.btrfs: create a filesystem - -btrfsctl: control program to create snapshots and subvolumes: - - mount /dev/sda2 /mnt - btrfsctl -s new_subvol_name /mnt - btrfsctl -s snapshot_of_default /mnt/default - btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name - btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol - ls /mnt - default snapshot_of_a_snapshot snapshot_of_new_subvol - new_subvol_name snapshot_of_default - - Snapshots and subvolumes cannot be deleted right now, but you can - rm -rf all the files and directories inside them. - -btrfsck: do a limited check of the FS extent trees. - -debug-tree: print all of the FS metadata in text form. Example: - - debug-tree /dev/sda2 >& big_output_file - -- cgit v1.2.3-70-g09d2 From 5e6d9f511e0188d34fa7a93ce2d0e6194442b0da Mon Sep 17 00:00:00 2001 From: Graf Yang Date: Wed, 7 Jan 2009 23:14:38 +0800 Subject: Blackfin arch: Add document about bfin-gpio Add document about bfin-gpio when requesting a pin both as gpio and gpio interrupt. Signed-off-by: Graf Yang Signed-off-by: Bryan Wu --- Documentation/blackfin/00-INDEX | 3 ++ Documentation/blackfin/bfin-gpio-notes.txt | 71 ++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 Documentation/blackfin/bfin-gpio-notes.txt (limited to 'Documentation') diff --git a/Documentation/blackfin/00-INDEX b/Documentation/blackfin/00-INDEX index 7cb3b356b24..d6840a91e1e 100644 --- a/Documentation/blackfin/00-INDEX +++ b/Documentation/blackfin/00-INDEX @@ -9,3 +9,6 @@ cachefeatures.txt Filesystems - Requirements for mounting the root file system. + +bfin-gpio-note.txt + - Notes in developing/using bfin-gpio driver. diff --git a/Documentation/blackfin/bfin-gpio-notes.txt b/Documentation/blackfin/bfin-gpio-notes.txt new file mode 100644 index 00000000000..9898c7ded7d --- /dev/null +++ b/Documentation/blackfin/bfin-gpio-notes.txt @@ -0,0 +1,71 @@ +/* + * File: Documentation/blackfin/bfin-gpio-note.txt + * Based on: + * Author: + * + * Created: $Id: bfin-gpio-note.txt 2008-11-24 16:42 grafyang $ + * Description: This file contains the notes in developing/using bfin-gpio. + * + * + * Rev: + * + * Modified: + * Copyright 2004-2008 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + */ + + +1. Blackfin GPIO introduction + + There are many GPIO pins on Blackfin. Most of these pins are muxed to + multi-functions. They can be configured as peripheral, or just as GPIO, + configured to input with interrupt enabled, or output. + + For detailed information, please see "arch/blackfin/kernel/bfin_gpio.c", + or the relevant HRM. + + +2. Avoiding resource conflict + + Followed function groups are used to avoiding resource conflict, + - Use the pin as peripheral, + int peripheral_request(unsigned short per, const char *label); + int peripheral_request_list(const unsigned short per[], const char *label); + void peripheral_free(unsigned short per); + void peripheral_free_list(const unsigned short per[]); + - Use the pin as GPIO, + int bfin_gpio_request(unsigned gpio, const char *label); + void bfin_gpio_free(unsigned gpio); + - Use the pin as GPIO interrupt, + int bfin_gpio_irq_request(unsigned gpio, const char *label); + void bfin_gpio_irq_free(unsigned gpio); + + The request functions will record the function state for a certain pin, + the free functions will clear it's function state. + Once a pin is requested, it can't be requested again before it is freed by + previous caller, otherwise kernel will dump stacks, and the request + function fail. + These functions are wrapped by other functions, most of the users need not + care. + + +3. But there are some exceptions + - Kernel permit the identical GPIO be requested both as GPIO and GPIO + interrut. + Some drivers, like gpio-keys, need this behavior. Kernel only print out + warning messages like, + bfin-gpio: GPIO 24 is already reserved by gpio-keys: BTN0, and you are +configuring it as IRQ! + + Note: Consider the case that, if there are two drivers need the + identical GPIO, one of them use it as GPIO, the other use it as + GPIO interrupt. This will really cause resource conflict. So if + there is any abnormal driver behavior, please check the bfin-gpio + warning messages. + + - Kernel permit the identical GPIO be requested from the same driver twice. + + + -- cgit v1.2.3-70-g09d2 From 3b02d332b6f15cc8f7b6a04757c86034669600e0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 Jan 2009 16:37:31 +0100 Subject: hwmon: (f71882fg) Add documentation Add some documentation about the f71882fg driver, and update the Kconfig documentation to report the new supported models. Signed-off-by: Hans de Goede Signed-off-by: Jean Delvare --- Documentation/hwmon/f71882fg | 89 ++++++++++++++++++++++++++++++++++++++++++++ drivers/hwmon/Kconfig | 5 ++- 2 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 Documentation/hwmon/f71882fg (limited to 'Documentation') diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg new file mode 100644 index 00000000000..a8321267b5b --- /dev/null +++ b/Documentation/hwmon/f71882fg @@ -0,0 +1,89 @@ +Kernel driver f71882fg +====================== + +Supported chips: + * Fintek F71882FG and F71883FG + Prefix: 'f71882fg' + Addresses scanned: none, address read from Super I/O config space + Datasheet: Available from the Fintek website + * Fintek F71862FG and F71863FG + Prefix: 'f71862fg' + Addresses scanned: none, address read from Super I/O config space + Datasheet: Available from the Fintek website + * Fintek F8000 + Prefix: 'f8000' + Addresses scanned: none, address read from Super I/O config space + Datasheet: Not public + +Author: Hans de Goede + + +Description +----------- + +Fintek F718xxFG/F8000 Super I/O chips include complete hardware monitoring +capabilities. They can monitor up to 9 voltages (3 for the F8000), 4 fans and +3 temperature sensors. + +These chips also have fan controlling features, using either DC or PWM, in +three different modes (one manual, two automatic). + +The driver assumes that no more than one chip is present, which seems +reasonable. + + +Monitoring +---------- + +The Voltage, Fan and Temperature Monitoring uses the standard sysfs +interface as documented in sysfs-interface, without any exceptions. + + +Fan Control +----------- + +Both PWM (pulse-width modulation) and DC fan speed control methods are +supported. The right one to use depends on external circuitry on the +motherboard, so the driver assumes that the BIOS set the method +properly. + +There are 2 modes to specify the speed of the fan, PWM duty cycle (or DC +voltage) mode, where 0-100% duty cycle (0-100% of 12V) is specified. And RPM +mode where the actual RPM of the fan (as measured) is controlled and the speed +gets specified as 0-100% of the fan#_full_speed file. + +Since both modes work in a 0-100% (mapped to 0-255) scale, there isn't a +whole lot of a difference when modifying fan control settings. The only +important difference is that in RPM mode the 0-100% controls the fan speed +between 0-100% of fan#_full_speed. It is assumed that if the BIOS programs +RPM mode, it will also set fan#_full_speed properly, if it does not then +fan control will not work properly, unless you set a sane fan#_full_speed +value yourself. + +Switching between these modes requires re-initializing a whole bunch of +registers, so the mode which the BIOS has set is kept. The mode is +printed when loading the driver. + +Three different fan control modes are supported; the mode number is written +to the pwm#_enable file. Note that not all modes are supported on all +chips, and some modes may only be available in RPM / PWM mode on the F8000. +Writing an unsupported mode will result in an invalid parameter error. + +* 1: Manual mode + You ask for a specific PWM duty cycle / DC voltage or a specific % of + fan#_full_speed by writing to the pwm# file. This mode is only + available on the F8000 if the fan channel is in RPM mode. + +* 2: Normal auto mode + You can define a number of temperature/fan speed trip points, which % the + fan should run at at this temp and which temp a fan should follow using the + standard sysfs interface. The number and type of trip points is chip + depended, see which files are available in sysfs. + Fan/PWM channel 3 of the F8000 is always in this mode! + +* 3: Thermostat mode (Only available on the F8000 when in duty cycle mode) + The fan speed is regulated to keep the temp the fan is mapped to between + temp#_auto_point2_temp and temp#_auto_point3_temp. + +Both of the automatic modes require that pwm1 corresponds to fan1, pwm2 to +fan2 and pwm3 to fan3. diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index c709e821f04..cc611e4b789 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -284,11 +284,12 @@ config SENSORS_F71805F will be called f71805f. config SENSORS_F71882FG - tristate "Fintek F71882FG and F71883FG" + tristate "Fintek F71862FG, F71882FG and F8000" depends on EXPERIMENTAL help If you say yes here you get support for hardware monitoring - features of the Fintek F71882FG and F71883FG Super-I/O chips. + features of the Fintek F71882FG/F71883FG, F71862FG/71863FG + and F8000 Super-I/O chips. This driver can also be built as a module. If so, the module will be called f71882fg. -- cgit v1.2.3-70-g09d2 From 6e34b187bc216fc632769fb8b906d3a29ccd8f14 Mon Sep 17 00:00:00 2001 From: Ira Snyder Date: Wed, 7 Jan 2009 16:37:32 +0100 Subject: hwmon: Add LTC4245 driver Add Linux support for the Linear Technology LTC4245 Multiple Supply Hot Swap controller I2C monitoring interface. Signed-off-by: Ira W. Snyder Acked-by: Hans de Goede Signed-off-by: Jean Delvare --- Documentation/hwmon/ltc4245 | 81 +++++++ drivers/hwmon/Kconfig | 11 + drivers/hwmon/Makefile | 1 + drivers/hwmon/ltc4245.c | 567 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 660 insertions(+) create mode 100644 Documentation/hwmon/ltc4245 create mode 100644 drivers/hwmon/ltc4245.c (limited to 'Documentation') diff --git a/Documentation/hwmon/ltc4245 b/Documentation/hwmon/ltc4245 new file mode 100644 index 00000000000..bae7a3adc5d --- /dev/null +++ b/Documentation/hwmon/ltc4245 @@ -0,0 +1,81 @@ +Kernel driver ltc4245 +===================== + +Supported chips: + * Linear Technology LTC4245 + Prefix: 'ltc4245' + Addresses scanned: 0x20-0x3f + Datasheet: + http://www.linear.com/pc/downloadDocument.do?navId=H0,C1,C1003,C1006,C1140,P19392,D13517 + +Author: Ira W. Snyder + + +Description +----------- + +The LTC4245 controller allows a board to be safely inserted and removed +from a live backplane in multiple supply systems such as CompactPCI and +PCI Express. + + +Usage Notes +----------- + +This driver does not probe for LTC4245 devices, due to the fact that some +of the possible addresses are unfriendly to probing. You will need to use +the "force" parameter to tell the driver where to find the device. + +Example: the following will load the driver for an LTC4245 at address 0x23 +on I2C bus #1: +$ modprobe ltc4245 force=1,0x23 + + +Sysfs entries +------------- + +The LTC4245 has built-in limits for over and under current warnings. This +makes it very likely that the reference circuit will be used. + +This driver uses the values in the datasheet to change the register values +into the values specified in the sysfs-interface document. The current readings +rely on the sense resistors listed in Table 2: "Sense Resistor Values". + +in1_input 12v input voltage (mV) +in2_input 5v input voltage (mV) +in3_input 3v input voltage (mV) +in4_input Vee (-12v) input voltage (mV) + +in1_min_alarm 12v input undervoltage alarm +in2_min_alarm 5v input undervoltage alarm +in3_min_alarm 3v input undervoltage alarm +in4_min_alarm Vee (-12v) input undervoltage alarm + +curr1_input 12v current (mA) +curr2_input 5v current (mA) +curr3_input 3v current (mA) +curr4_input Vee (-12v) current (mA) + +curr1_max_alarm 12v overcurrent alarm +curr2_max_alarm 5v overcurrent alarm +curr3_max_alarm 3v overcurrent alarm +curr4_max_alarm Vee (-12v) overcurrent alarm + +in5_input 12v output voltage (mV) +in6_input 5v output voltage (mV) +in7_input 3v output voltage (mV) +in8_input Vee (-12v) output voltage (mV) + +in5_min_alarm 12v output undervoltage alarm +in6_min_alarm 5v output undervoltage alarm +in7_min_alarm 3v output undervoltage alarm +in8_min_alarm Vee (-12v) output undervoltage alarm + +in9_input GPIO #1 voltage data +in10_input GPIO #2 voltage data +in11_input GPIO #3 voltage data + +power1_input 12v power usage (mW) +power2_input 5v power usage (mW) +power3_input 3v power usage (mW) +power4_input Vee (-12v) power usage (mW) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index cc611e4b789..1ef1205b4e8 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -549,6 +549,17 @@ config SENSORS_LM93 This driver can also be built as a module. If so, the module will be called lm93. +config SENSORS_LTC4245 + tristate "Linear Technology LTC4245" + depends on I2C && EXPERIMENTAL + default n + help + If you say yes here you get support for Linear Technology LTC4245 + Multiple Supply Hot Swap Controller I2C interface. + + This driver can also be built as a module. If so, the module will + be called ltc4245. + config SENSORS_MAX1111 tristate "Maxim MAX1111 Multichannel, Serial 8-bit ADC chip" depends on SPI_MASTER diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 58fc5be5355..8fd124eff64 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_SENSORS_LM87) += lm87.o obj-$(CONFIG_SENSORS_LM90) += lm90.o obj-$(CONFIG_SENSORS_LM92) += lm92.o obj-$(CONFIG_SENSORS_LM93) += lm93.o +obj-$(CONFIG_SENSORS_LTC4245) += ltc4245.o obj-$(CONFIG_SENSORS_MAX1111) += max1111.o obj-$(CONFIG_SENSORS_MAX1619) += max1619.o obj-$(CONFIG_SENSORS_MAX6650) += max6650.o diff --git a/drivers/hwmon/ltc4245.c b/drivers/hwmon/ltc4245.c new file mode 100644 index 00000000000..034b2c51584 --- /dev/null +++ b/drivers/hwmon/ltc4245.c @@ -0,0 +1,567 @@ +/* + * Driver for Linear Technology LTC4245 I2C Multiple Supply Hot Swap Controller + * + * Copyright (C) 2008 Ira W. Snyder + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This driver is based on the ds1621 and ina209 drivers. + * + * Datasheet: + * http://www.linear.com/pc/downloadDocument.do?navId=H0,C1,C1003,C1006,C1140,P19392,D13517 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Valid addresses are 0x20 - 0x3f + * + * For now, we do not probe, since some of these addresses + * are known to be unfriendly to probing */ +static const unsigned short normal_i2c[] = { I2C_CLIENT_END }; + +/* Insmod parameters */ +I2C_CLIENT_INSMOD_1(ltc4245); + +/* Here are names of the chip's registers (a.k.a. commands) */ +enum ltc4245_cmd { + LTC4245_STATUS = 0x00, /* readonly */ + LTC4245_ALERT = 0x01, + LTC4245_CONTROL = 0x02, + LTC4245_ON = 0x03, + LTC4245_FAULT1 = 0x04, + LTC4245_FAULT2 = 0x05, + LTC4245_GPIO = 0x06, + LTC4245_ADCADR = 0x07, + + LTC4245_12VIN = 0x10, + LTC4245_12VSENSE = 0x11, + LTC4245_12VOUT = 0x12, + LTC4245_5VIN = 0x13, + LTC4245_5VSENSE = 0x14, + LTC4245_5VOUT = 0x15, + LTC4245_3VIN = 0x16, + LTC4245_3VSENSE = 0x17, + LTC4245_3VOUT = 0x18, + LTC4245_VEEIN = 0x19, + LTC4245_VEESENSE = 0x1a, + LTC4245_VEEOUT = 0x1b, + LTC4245_GPIOADC1 = 0x1c, + LTC4245_GPIOADC2 = 0x1d, + LTC4245_GPIOADC3 = 0x1e, +}; + +struct ltc4245_data { + struct device *hwmon_dev; + + struct mutex update_lock; + bool valid; + unsigned long last_updated; /* in jiffies */ + + /* Control registers */ + u8 cregs[0x08]; + + /* Voltage registers */ + u8 vregs[0x0f]; +}; + +static struct ltc4245_data *ltc4245_update_device(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ltc4245_data *data = i2c_get_clientdata(client); + s32 val; + int i; + + mutex_lock(&data->update_lock); + + if (time_after(jiffies, data->last_updated + HZ) || !data->valid) { + + dev_dbg(&client->dev, "Starting ltc4245 update\n"); + + /* Read control registers -- 0x00 to 0x07 */ + for (i = 0; i < ARRAY_SIZE(data->cregs); i++) { + val = i2c_smbus_read_byte_data(client, i); + if (unlikely(val < 0)) + data->cregs[i] = 0; + else + data->cregs[i] = val; + } + + /* Read voltage registers -- 0x10 to 0x1f */ + for (i = 0; i < ARRAY_SIZE(data->vregs); i++) { + val = i2c_smbus_read_byte_data(client, i+0x10); + if (unlikely(val < 0)) + data->vregs[i] = 0; + else + data->vregs[i] = val; + } + + data->last_updated = jiffies; + data->valid = 1; + } + + mutex_unlock(&data->update_lock); + + return data; +} + +/* Return the voltage from the given register in millivolts */ +static int ltc4245_get_voltage(struct device *dev, u8 reg) +{ + struct ltc4245_data *data = ltc4245_update_device(dev); + const u8 regval = data->vregs[reg - 0x10]; + u32 voltage = 0; + + switch (reg) { + case LTC4245_12VIN: + case LTC4245_12VOUT: + voltage = regval * 55; + break; + case LTC4245_5VIN: + case LTC4245_5VOUT: + voltage = regval * 22; + break; + case LTC4245_3VIN: + case LTC4245_3VOUT: + voltage = regval * 15; + break; + case LTC4245_VEEIN: + case LTC4245_VEEOUT: + voltage = regval * -55; + break; + case LTC4245_GPIOADC1: + case LTC4245_GPIOADC2: + case LTC4245_GPIOADC3: + voltage = regval * 10; + break; + default: + /* If we get here, the developer messed up */ + WARN_ON_ONCE(1); + break; + } + + return voltage; +} + +/* Return the current in the given sense register in milliAmperes */ +static unsigned int ltc4245_get_current(struct device *dev, u8 reg) +{ + struct ltc4245_data *data = ltc4245_update_device(dev); + const u8 regval = data->vregs[reg - 0x10]; + unsigned int voltage; + unsigned int curr; + + /* The strange looking conversions that follow are fixed-point + * math, since we cannot do floating point in the kernel. + * + * Step 1: convert sense register to microVolts + * Step 2: convert voltage to milliAmperes + * + * If you play around with the V=IR equation, you come up with + * the following: X uV / Y mOhm == Z mA + * + * With the resistors that are fractions of a milliOhm, we multiply + * the voltage and resistance by 10, to shift the decimal point. + * Now we can use the normal division operator again. + */ + + switch (reg) { + case LTC4245_12VSENSE: + voltage = regval * 250; /* voltage in uV */ + curr = voltage / 50; /* sense resistor 50 mOhm */ + break; + case LTC4245_5VSENSE: + voltage = regval * 125; /* voltage in uV */ + curr = (voltage * 10) / 35; /* sense resistor 3.5 mOhm */ + break; + case LTC4245_3VSENSE: + voltage = regval * 125; /* voltage in uV */ + curr = (voltage * 10) / 25; /* sense resistor 2.5 mOhm */ + break; + case LTC4245_VEESENSE: + voltage = regval * 250; /* voltage in uV */ + curr = voltage / 100; /* sense resistor 100 mOhm */ + break; + default: + /* If we get here, the developer messed up */ + WARN_ON_ONCE(1); + curr = 0; + break; + } + + return curr; +} + +static ssize_t ltc4245_show_voltage(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct sensor_device_attribute *attr = to_sensor_dev_attr(da); + const int voltage = ltc4245_get_voltage(dev, attr->index); + + return snprintf(buf, PAGE_SIZE, "%d\n", voltage); +} + +static ssize_t ltc4245_show_current(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct sensor_device_attribute *attr = to_sensor_dev_attr(da); + const unsigned int curr = ltc4245_get_current(dev, attr->index); + + return snprintf(buf, PAGE_SIZE, "%u\n", curr); +} + +static ssize_t ltc4245_show_power(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct sensor_device_attribute *attr = to_sensor_dev_attr(da); + const unsigned int curr = ltc4245_get_current(dev, attr->index); + const int output_voltage = ltc4245_get_voltage(dev, attr->index+1); + + /* current in mA * voltage in mV == power in uW */ + const unsigned int power = abs(output_voltage * curr); + + return snprintf(buf, PAGE_SIZE, "%u\n", power); +} + +static ssize_t ltc4245_show_alarm(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct sensor_device_attribute_2 *attr = to_sensor_dev_attr_2(da); + struct ltc4245_data *data = ltc4245_update_device(dev); + const u8 reg = data->cregs[attr->index]; + const u32 mask = attr->nr; + + return snprintf(buf, PAGE_SIZE, "%u\n", (reg & mask) ? 1 : 0); +} + +/* These macros are used below in constructing device attribute objects + * for use with sysfs_create_group() to make a sysfs device file + * for each register. + */ + +#define LTC4245_VOLTAGE(name, ltc4245_cmd_idx) \ + static SENSOR_DEVICE_ATTR(name, S_IRUGO, \ + ltc4245_show_voltage, NULL, ltc4245_cmd_idx) + +#define LTC4245_CURRENT(name, ltc4245_cmd_idx) \ + static SENSOR_DEVICE_ATTR(name, S_IRUGO, \ + ltc4245_show_current, NULL, ltc4245_cmd_idx) + +#define LTC4245_POWER(name, ltc4245_cmd_idx) \ + static SENSOR_DEVICE_ATTR(name, S_IRUGO, \ + ltc4245_show_power, NULL, ltc4245_cmd_idx) + +#define LTC4245_ALARM(name, mask, reg) \ + static SENSOR_DEVICE_ATTR_2(name, S_IRUGO, \ + ltc4245_show_alarm, NULL, (mask), reg) + +/* Construct a sensor_device_attribute structure for each register */ + +/* Input voltages */ +LTC4245_VOLTAGE(in1_input, LTC4245_12VIN); +LTC4245_VOLTAGE(in2_input, LTC4245_5VIN); +LTC4245_VOLTAGE(in3_input, LTC4245_3VIN); +LTC4245_VOLTAGE(in4_input, LTC4245_VEEIN); + +/* Input undervoltage alarms */ +LTC4245_ALARM(in1_min_alarm, (1 << 0), LTC4245_FAULT1); +LTC4245_ALARM(in2_min_alarm, (1 << 1), LTC4245_FAULT1); +LTC4245_ALARM(in3_min_alarm, (1 << 2), LTC4245_FAULT1); +LTC4245_ALARM(in4_min_alarm, (1 << 3), LTC4245_FAULT1); + +/* Currents (via sense resistor) */ +LTC4245_CURRENT(curr1_input, LTC4245_12VSENSE); +LTC4245_CURRENT(curr2_input, LTC4245_5VSENSE); +LTC4245_CURRENT(curr3_input, LTC4245_3VSENSE); +LTC4245_CURRENT(curr4_input, LTC4245_VEESENSE); + +/* Overcurrent alarms */ +LTC4245_ALARM(curr1_max_alarm, (1 << 4), LTC4245_FAULT1); +LTC4245_ALARM(curr2_max_alarm, (1 << 5), LTC4245_FAULT1); +LTC4245_ALARM(curr3_max_alarm, (1 << 6), LTC4245_FAULT1); +LTC4245_ALARM(curr4_max_alarm, (1 << 7), LTC4245_FAULT1); + +/* Output voltages */ +LTC4245_VOLTAGE(in5_input, LTC4245_12VOUT); +LTC4245_VOLTAGE(in6_input, LTC4245_5VOUT); +LTC4245_VOLTAGE(in7_input, LTC4245_3VOUT); +LTC4245_VOLTAGE(in8_input, LTC4245_VEEOUT); + +/* Power Bad alarms */ +LTC4245_ALARM(in5_min_alarm, (1 << 0), LTC4245_FAULT2); +LTC4245_ALARM(in6_min_alarm, (1 << 1), LTC4245_FAULT2); +LTC4245_ALARM(in7_min_alarm, (1 << 2), LTC4245_FAULT2); +LTC4245_ALARM(in8_min_alarm, (1 << 3), LTC4245_FAULT2); + +/* GPIO voltages */ +LTC4245_VOLTAGE(in9_input, LTC4245_GPIOADC1); +LTC4245_VOLTAGE(in10_input, LTC4245_GPIOADC2); +LTC4245_VOLTAGE(in11_input, LTC4245_GPIOADC3); + +/* Power Consumption (virtual) */ +LTC4245_POWER(power1_input, LTC4245_12VSENSE); +LTC4245_POWER(power2_input, LTC4245_5VSENSE); +LTC4245_POWER(power3_input, LTC4245_3VSENSE); +LTC4245_POWER(power4_input, LTC4245_VEESENSE); + +/* Finally, construct an array of pointers to members of the above objects, + * as required for sysfs_create_group() + */ +static struct attribute *ltc4245_attributes[] = { + &sensor_dev_attr_in1_input.dev_attr.attr, + &sensor_dev_attr_in2_input.dev_attr.attr, + &sensor_dev_attr_in3_input.dev_attr.attr, + &sensor_dev_attr_in4_input.dev_attr.attr, + + &sensor_dev_attr_in1_min_alarm.dev_attr.attr, + &sensor_dev_attr_in2_min_alarm.dev_attr.attr, + &sensor_dev_attr_in3_min_alarm.dev_attr.attr, + &sensor_dev_attr_in4_min_alarm.dev_attr.attr, + + &sensor_dev_attr_curr1_input.dev_attr.attr, + &sensor_dev_attr_curr2_input.dev_attr.attr, + &sensor_dev_attr_curr3_input.dev_attr.attr, + &sensor_dev_attr_curr4_input.dev_attr.attr, + + &sensor_dev_attr_curr1_max_alarm.dev_attr.attr, + &sensor_dev_attr_curr2_max_alarm.dev_attr.attr, + &sensor_dev_attr_curr3_max_alarm.dev_attr.attr, + &sensor_dev_attr_curr4_max_alarm.dev_attr.attr, + + &sensor_dev_attr_in5_input.dev_attr.attr, + &sensor_dev_attr_in6_input.dev_attr.attr, + &sensor_dev_attr_in7_input.dev_attr.attr, + &sensor_dev_attr_in8_input.dev_attr.attr, + + &sensor_dev_attr_in5_min_alarm.dev_attr.attr, + &sensor_dev_attr_in6_min_alarm.dev_attr.attr, + &sensor_dev_attr_in7_min_alarm.dev_attr.attr, + &sensor_dev_attr_in8_min_alarm.dev_attr.attr, + + &sensor_dev_attr_in9_input.dev_attr.attr, + &sensor_dev_attr_in10_input.dev_attr.attr, + &sensor_dev_attr_in11_input.dev_attr.attr, + + &sensor_dev_attr_power1_input.dev_attr.attr, + &sensor_dev_attr_power2_input.dev_attr.attr, + &sensor_dev_attr_power3_input.dev_attr.attr, + &sensor_dev_attr_power4_input.dev_attr.attr, + + NULL, +}; + +static const struct attribute_group ltc4245_group = { + .attrs = ltc4245_attributes, +}; + +static int ltc4245_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct ltc4245_data *data; + int ret; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto out_kzalloc; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + + /* Initialize the LTC4245 chip */ + /* TODO */ + + /* Register sysfs hooks */ + ret = sysfs_create_group(&client->dev.kobj, <c4245_group); + if (ret) + goto out_sysfs_create_group; + + data->hwmon_dev = hwmon_device_register(&client->dev); + if (IS_ERR(data->hwmon_dev)) { + ret = PTR_ERR(data->hwmon_dev); + goto out_hwmon_device_register; + } + + return 0; + +out_hwmon_device_register: + sysfs_remove_group(&client->dev.kobj, <c4245_group); +out_sysfs_create_group: + kfree(data); +out_kzalloc: + return ret; +} + +static int ltc4245_remove(struct i2c_client *client) +{ + struct ltc4245_data *data = i2c_get_clientdata(client); + + hwmon_device_unregister(data->hwmon_dev); + sysfs_remove_group(&client->dev.kobj, <c4245_group); + + kfree(data); + + return 0; +} + +/* Check that some bits in a control register appear at all possible + * locations without changing value + * + * @client: the i2c client to use + * @reg: the register to read + * @bits: the bits to check (0xff checks all bits, + * 0x03 checks only the last two bits) + * + * return -ERRNO if the register read failed + * return -ENODEV if the register value doesn't stay constant at all + * possible addresses + * + * return 0 for success + */ +static int ltc4245_check_control_reg(struct i2c_client *client, u8 reg, u8 bits) +{ + int i; + s32 v, voff1, voff2; + + /* Read register and check for error */ + v = i2c_smbus_read_byte_data(client, reg); + if (v < 0) + return v; + + v &= bits; + + for (i = 0x00; i < 0xff; i += 0x20) { + + voff1 = i2c_smbus_read_byte_data(client, reg + i); + if (voff1 < 0) + return voff1; + + voff2 = i2c_smbus_read_byte_data(client, reg + i + 0x08); + if (voff2 < 0) + return voff2; + + voff1 &= bits; + voff2 &= bits; + + if (v != voff1 || v != voff2) + return -ENODEV; + } + + return 0; +} + +static int ltc4245_detect(struct i2c_client *client, + int kind, + struct i2c_board_info *info) +{ + struct i2c_adapter *adapter = client->adapter; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -ENODEV; + + if (kind < 0) { /* probed detection - check the chip type */ + s32 v; /* 8 bits from the chip, or -ERRNO */ + + /* Chip registers 0x00-0x07 are control registers + * Chip registers 0x10-0x1f are data registers + * + * Address bits b7-b5 are ignored. This makes the chip "repeat" + * in steps of 0x20. Any control registers should appear with + * the same values across all duplicated addresses. + * + * Register 0x02 bit b2 is reserved, expect 0 + * Register 0x07 bits b7 to b4 are reserved, expect 0 + * + * Registers 0x01, 0x02 are control registers and should not + * change on their own. + * + * Register 0x06 bits b6 and b7 are control bits, and should + * not change on their own. + * + * Register 0x07 bits b3 to b0 are control bits, and should + * not change on their own. + */ + + /* read register 0x02 reserved bit, expect 0 */ + v = i2c_smbus_read_byte_data(client, LTC4245_CONTROL); + if (v < 0 || (v & 0x04) != 0) + return -ENODEV; + + /* read register 0x07 reserved bits, expect 0 */ + v = i2c_smbus_read_byte_data(client, LTC4245_ADCADR); + if (v < 0 || (v & 0xf0) != 0) + return -ENODEV; + + /* check that the alert register appears at all locations */ + if (ltc4245_check_control_reg(client, LTC4245_ALERT, 0xff)) + return -ENODEV; + + /* check that the control register appears at all locations */ + if (ltc4245_check_control_reg(client, LTC4245_CONTROL, 0xff)) + return -ENODEV; + + /* check that register 0x06 bits b6 and b7 stay constant */ + if (ltc4245_check_control_reg(client, LTC4245_GPIO, 0xc0)) + return -ENODEV; + + /* check that register 0x07 bits b3-b0 stay constant */ + if (ltc4245_check_control_reg(client, LTC4245_ADCADR, 0x0f)) + return -ENODEV; + } + + strlcpy(info->type, "ltc4245", I2C_NAME_SIZE); + dev_info(&adapter->dev, "ltc4245 %s at address 0x%02x\n", + kind < 0 ? "probed" : "forced", + client->addr); + + return 0; +} + +static const struct i2c_device_id ltc4245_id[] = { + { "ltc4245", ltc4245 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ltc4245_id); + +/* This is the driver that will be inserted */ +static struct i2c_driver ltc4245_driver = { + .class = I2C_CLASS_HWMON, + .driver = { + .name = "ltc4245", + }, + .probe = ltc4245_probe, + .remove = ltc4245_remove, + .id_table = ltc4245_id, + .detect = ltc4245_detect, + .address_data = &addr_data, +}; + +static int __init ltc4245_init(void) +{ + return i2c_add_driver(<c4245_driver); +} + +static void __exit ltc4245_exit(void) +{ + i2c_del_driver(<c4245_driver); +} + +MODULE_AUTHOR("Ira W. Snyder "); +MODULE_DESCRIPTION("LTC4245 driver"); +MODULE_LICENSE("GPL"); + +module_init(ltc4245_init); +module_exit(ltc4245_exit); -- cgit v1.2.3-70-g09d2 From b4da93e4b0ffc261c3530fe938aefd52854aa84c Mon Sep 17 00:00:00 2001 From: Jean-Marc Spaggiari Date: Wed, 7 Jan 2009 16:37:32 +0100 Subject: hwmon: (it87) Add support for the ITE IT8720F Allow it87.c to handle IT8720 chipset like IT8718 in order to retrieve voltage, temperatures and fans speed from sensors tools. Also updating the related documentation. Signed-off-by: Jean-Marc Spaggiari Signed-off-by: Jean Delvare --- Documentation/hwmon/it87 | 20 ++++++++++++-------- drivers/hwmon/Kconfig | 3 ++- drivers/hwmon/it87.c | 30 +++++++++++++++++++++--------- 3 files changed, 35 insertions(+), 18 deletions(-) (limited to 'Documentation') diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87 index 042c0415140..659315d98e0 100644 --- a/Documentation/hwmon/it87 +++ b/Documentation/hwmon/it87 @@ -26,6 +26,10 @@ Supported chips: Datasheet: Publicly available at the ITE website http://www.ite.com.tw/product_info/file/pc/IT8718F_V0.2.zip http://www.ite.com.tw/product_info/file/pc/IT8718F_V0%203_(for%20C%20version).zip + * IT8720F + Prefix: 'it8720' + Addresses scanned: from Super I/O config space (8 I/O ports) + Datasheet: Not yet publicly available. * SiS950 [clone of IT8705F] Prefix: 'it87' Addresses scanned: from Super I/O config space (8 I/O ports) @@ -71,7 +75,7 @@ Description ----------- This driver implements support for the IT8705F, IT8712F, IT8716F, -IT8718F, IT8726F and SiS950 chips. +IT8718F, IT8720F, IT8726F and SiS950 chips. These chips are 'Super I/O chips', supporting floppy disks, infrared ports, joysticks and other miscellaneous stuff. For hardware monitoring, they @@ -84,19 +88,19 @@ the IT8716F and late IT8712F have 6. They are shared with other functions though, so the functionality may not be available on a given system. The driver dumbly assume it is there. -The IT8718F also features VID inputs (up to 8 pins) but the value is -stored in the Super-I/O configuration space. Due to technical limitations, +The IT8718F and IT8720F also features VID inputs (up to 8 pins) but the value +is stored in the Super-I/O configuration space. Due to technical limitations, this value can currently only be read once at initialization time, so the driver won't notice and report changes in the VID value. The two upper VID bits share their pins with voltage inputs (in5 and in6) so you can't have both on a given board. -The IT8716F, IT8718F and later IT8712F revisions have support for +The IT8716F, IT8718F, IT8720F and later IT8712F revisions have support for 2 additional fans. The additional fans are supported by the driver. -The IT8716F and IT8718F, and late IT8712F and IT8705F also have optional -16-bit tachometer counters for fans 1 to 3. This is better (no more fan -clock divider mess) but not compatible with the older chips and +The IT8716F, IT8718F and IT8720F, and late IT8712F and IT8705F also have +optional 16-bit tachometer counters for fans 1 to 3. This is better (no more +fan clock divider mess) but not compatible with the older chips and revisions. The 16-bit tachometer mode is enabled by the driver when one of the above chips is detected. @@ -122,7 +126,7 @@ zero'; this is important for negative voltage measurements. All voltage inputs can measure voltages between 0 and 4.08 volts, with a resolution of 0.016 volt. The battery voltage in8 does not have limit registers. -The VID lines (IT8712F/IT8716F/IT8718F) encode the core voltage value: +The VID lines (IT8712F/IT8716F/IT8718F/IT8720F) encode the core voltage value: the voltage level your processor should work with. This is hardcoded by the mainboard and/or processor itself. It is a value in volts. diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 1ef1205b4e8..aba01b4ceca 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -400,7 +400,8 @@ config SENSORS_IT87 select HWMON_VID help If you say yes here you get support for ITE IT8705F, IT8712F, - IT8716F, IT8718F and IT8726F sensor chips, and the SiS960 clone. + IT8716F, IT8718F, IT8720F and IT8726F sensor chips, and the + SiS960 clone. This driver can also be built as a module. If so, the module will be called it87. diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index b74c95735f9..0e0d692f0c9 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -14,6 +14,7 @@ IT8712F Super I/O chip w/LPC interface IT8716F Super I/O chip w/LPC interface IT8718F Super I/O chip w/LPC interface + IT8720F Super I/O chip w/LPC interface IT8726F Super I/O chip w/LPC interface Sis950 A clone of the IT8705F @@ -52,7 +53,7 @@ #define DRVNAME "it87" -enum chips { it87, it8712, it8716, it8718 }; +enum chips { it87, it8712, it8716, it8718, it8720 }; static unsigned short force_id; module_param(force_id, ushort, 0); @@ -64,7 +65,10 @@ static struct platform_device *pdev; #define DEV 0x07 /* Register: Logical device select */ #define VAL 0x2f /* The value to read/write */ #define PME 0x04 /* The device with the fan registers in it */ -#define GPIO 0x07 /* The device with the IT8718F VID value in it */ + +/* The device with the IT8718F/IT8720F VID value in it */ +#define GPIO 0x07 + #define DEVID 0x20 /* Register: Device ID */ #define DEVREV 0x22 /* Register: Device Revision */ @@ -113,6 +117,7 @@ superio_exit(void) #define IT8705F_DEVID 0x8705 #define IT8716F_DEVID 0x8716 #define IT8718F_DEVID 0x8718 +#define IT8720F_DEVID 0x8720 #define IT8726F_DEVID 0x8726 #define IT87_ACT_REG 0x30 #define IT87_BASE_REG 0x60 @@ -150,8 +155,8 @@ static int fix_pwm_polarity; #define IT87_REG_ALARM2 0x02 #define IT87_REG_ALARM3 0x03 -/* The IT8718F has the VID value in a different register, in Super-I/O - configuration space. */ +/* The IT8718F and IT8720F have the VID value in a different register, in + Super-I/O configuration space. */ #define IT87_REG_VID 0x0a /* The IT8705F and IT8712F earlier than revision 0x08 use register 0x0b for fan divisors. Later IT8712F revisions must use 16-bit tachometer @@ -282,7 +287,8 @@ static inline int has_16bit_fans(const struct it87_data *data) return (data->type == it87 && data->revision >= 0x03) || (data->type == it8712 && data->revision >= 0x08) || data->type == it8716 - || data->type == it8718; + || data->type == it8718 + || data->type == it8720; } static int it87_probe(struct platform_device *pdev); @@ -992,6 +998,9 @@ static int __init it87_find(unsigned short *address, case IT8718F_DEVID: sio_data->type = it8718; break; + case IT8720F_DEVID: + sio_data->type = it8720; + break; case 0xffff: /* No device at all */ goto exit; default: @@ -1022,7 +1031,8 @@ static int __init it87_find(unsigned short *address, int reg; superio_select(GPIO); - if (chip_type == it8718) + if ((chip_type == it8718) || + (chip_type == it8720)) sio_data->vid_value = superio_inb(IT87_SIO_VID_REG); reg = superio_inb(IT87_SIO_PINX2_REG); @@ -1068,6 +1078,7 @@ static int __devinit it87_probe(struct platform_device *pdev) "it8712", "it8716", "it8718", + "it8720", }; res = platform_get_resource(pdev, IORESOURCE_IO, 0); @@ -1226,7 +1237,7 @@ static int __devinit it87_probe(struct platform_device *pdev) } if (data->type == it8712 || data->type == it8716 - || data->type == it8718) { + || data->type == it8718 || data->type == it8720) { data->vrm = vid_which_vrm(); /* VID reading from Super-I/O config space if available */ data->vid = sio_data->vid_value; @@ -1513,7 +1524,8 @@ static struct it87_data *it87_update_device(struct device *dev) data->sensor = it87_read_value(data, IT87_REG_TEMP_ENABLE); /* The 8705 does not have VID capability. - The 8718 does not use IT87_REG_VID for the same purpose. */ + The 8718 and the 8720 don't use IT87_REG_VID for the + same purpose. */ if (data->type == it8712 || data->type == it8716) { data->vid = it87_read_value(data, IT87_REG_VID); /* The older IT8712F revisions had only 5 VID pins, @@ -1608,7 +1620,7 @@ static void __exit sm_it87_exit(void) MODULE_AUTHOR("Chris Gauthron, " "Jean Delvare "); -MODULE_DESCRIPTION("IT8705F/8712F/8716F/8718F/8726F, SiS950 driver"); +MODULE_DESCRIPTION("IT8705F/8712F/8716F/8718F/8720F/8726F, SiS950 driver"); module_param(update_vbat, bool, 0); MODULE_PARM_DESC(update_vbat, "Update vbat if set else return powerup value"); module_param(fix_pwm_polarity, bool, 0); -- cgit v1.2.3-70-g09d2 From 0589c2de643ef71a684ba6d219532f9e2a3e554b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 Jan 2009 16:37:33 +0100 Subject: hwmon: Deprecate the fscher and fscpos drivers Now that the new merged fschmd driver has gained support for the watchdog integrated into these IC's, there is no more reason to keep the old fscher and fscpos drivers around, so mark them as deprecated. Signed-off-by: Hans de Goede Signed-off-by: Jean Delvare --- Documentation/feature-removal-schedule.txt | 8 ++++++++ drivers/hwmon/Kconfig | 20 ++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 2193be53e77..5ddbe350487 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -318,6 +318,14 @@ Who: Jean Delvare --------------------------- +What: fscher and fscpos drivers +When: June 2009 +Why: Deprecated by the new fschmd driver. +Who: Hans de Goede + Jean Delvare + +--------------------------- + What: SELinux "compat_net" functionality When: 2.6.30 at the earliest Why: In 2.6.18 the Secmark concept was introduced to replace the "compat_net" diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 91975148f4b..3c34fb5e419 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -305,9 +305,13 @@ config SENSORS_F75375S will be called f75375s. config SENSORS_FSCHER - tristate "FSC Hermes" + tristate "FSC Hermes (DEPRECATED)" depends on X86 && I2C help + This driver is DEPRECATED please use the new merged fschmd + ("FSC Poseidon, Scylla, Hermes, Heimdall and Heracles") driver + instead. + If you say yes here you get support for Fujitsu Siemens Computers Hermes sensor chips. @@ -315,9 +319,13 @@ config SENSORS_FSCHER will be called fscher. config SENSORS_FSCPOS - tristate "FSC Poseidon" + tristate "FSC Poseidon (DEPRECATED)" depends on X86 && I2C help + This driver is DEPRECATED please use the new merged fschmd + ("FSC Poseidon, Scylla, Hermes, Heimdall and Heracles") driver + instead. + If you say yes here you get support for Fujitsu Siemens Computers Poseidon sensor chips. @@ -326,15 +334,15 @@ config SENSORS_FSCPOS config SENSORS_FSCHMD tristate "FSC Poseidon, Scylla, Hermes, Heimdall and Heracles" - depends on X86 && I2C && EXPERIMENTAL + depends on X86 && I2C help If you say yes here you get support for various Fujitsu Siemens Computers sensor chips, including support for the integrated watchdog. - This is a new merged driver for FSC sensor chips which is intended - as a replacement for the fscpos, fscscy and fscher drivers and adds - support for several other FCS sensor chips. + This is a merged driver for FSC sensor chips replacing the fscpos, + fscscy and fscher drivers and adding support for several other FSC + sensor chips. This driver can also be built as a module. If so, the module will be called fschmd. -- cgit v1.2.3-70-g09d2 From 2b7300513b98e05058a803de3beb8a1c0a0c61d9 Mon Sep 17 00:00:00 2001 From: Kaiwan N Billimoria Date: Wed, 7 Jan 2009 16:37:34 +0100 Subject: hwmon: (lm70) Code streamlining and cleanup This fixes a byteswap bug in the LM70 temperature sensor driver, which was previously covered up by a converse bug in the driver for the LM70EVAL-LLP board (which is also fixed). Other fixes: doc updates, remove an annoying msleep(), and improve three-wire protocol handling. Signed-off-by: Kaiwan N Billimoria [ dbrownell@users.sourceforge.net: doc and whitespace tweaks ] Signed-off-by: David Brownell Signed-off-by: Jean Delvare --- Documentation/hwmon/lm70 | 4 ++++ Documentation/spi/spi-lm70llp | 10 ++++++++++ drivers/hwmon/lm70.c | 9 +++++---- drivers/spi/spi_lm70llp.c | 33 ++++++++++++--------------------- 4 files changed, 31 insertions(+), 25 deletions(-) (limited to 'Documentation') diff --git a/Documentation/hwmon/lm70 b/Documentation/hwmon/lm70 index 2bdd3feebf5..b8d1a521e68 100644 --- a/Documentation/hwmon/lm70 +++ b/Documentation/hwmon/lm70 @@ -25,6 +25,10 @@ complement digital temperature (sent via the SIO line), is available in the driver for interpretation. This driver makes use of the kernel's in-core SPI support. +As a real (in-tree) example of this "SPI protocol driver" interfacing +with a "SPI master controller driver", see drivers/spi/spi_lm70llp.c +and its associated documentation. + Thanks to --------- Jean Delvare for mentoring the hwmon-side driver diff --git a/Documentation/spi/spi-lm70llp b/Documentation/spi/spi-lm70llp index 154bd02220b..34a9cfd746b 100644 --- a/Documentation/spi/spi-lm70llp +++ b/Documentation/spi/spi-lm70llp @@ -13,10 +13,20 @@ Description This driver provides glue code connecting a National Semiconductor LM70 LLP temperature sensor evaluation board to the kernel's SPI core subsystem. +This is a SPI master controller driver. It can be used in conjunction with +(layered under) the LM70 logical driver (a "SPI protocol driver"). In effect, this driver turns the parallel port interface on the eval board into a SPI bus with a single device, which will be driven by the generic LM70 driver (drivers/hwmon/lm70.c). + +Hardware Interfacing +-------------------- +The schematic for this particular board (the LM70EVAL-LLP) is +available (on page 4) here: + + http://www.national.com/appinfo/tempsensors/files/LM70LLPEVALmanual.pdf + The hardware interfacing on the LM70 LLP eval board is as follows: Parallel LM70 LLP diff --git a/drivers/hwmon/lm70.c b/drivers/hwmon/lm70.c index d435f003292..9f9741b1d2b 100644 --- a/drivers/hwmon/lm70.c +++ b/drivers/hwmon/lm70.c @@ -65,10 +65,9 @@ static ssize_t lm70_sense_temp(struct device *dev, "spi_write_then_read failed with status %d\n", status); goto out; } - dev_dbg(dev, "rxbuf[1] : 0x%x rxbuf[0] : 0x%x\n", rxbuf[1], rxbuf[0]); - - raw = (rxbuf[1] << 8) + rxbuf[0]; - dev_dbg(dev, "raw=0x%x\n", raw); + raw = (rxbuf[0] << 8) + rxbuf[1]; + dev_dbg(dev, "rxbuf[0] : 0x%02x rxbuf[1] : 0x%02x raw=0x%04x\n", + rxbuf[0], rxbuf[1], raw); /* * The "raw" temperature read into rxbuf[] is a 16-bit signed 2's @@ -109,6 +108,8 @@ static int __devinit lm70_probe(struct spi_device *spi) if ((spi->mode & (SPI_CPOL|SPI_CPHA)) || !(spi->mode & SPI_3WIRE)) return -EINVAL; + /* NOTE: we assume 8-bit words, and convert to 16 bits manually */ + p_lm70 = kzalloc(sizeof *p_lm70, GFP_KERNEL); if (!p_lm70) return -ENOMEM; diff --git a/drivers/spi/spi_lm70llp.c b/drivers/spi/spi_lm70llp.c index af6526767e2..568c781ad91 100644 --- a/drivers/spi/spi_lm70llp.c +++ b/drivers/spi/spi_lm70llp.c @@ -1,5 +1,5 @@ /* - * spi_lm70llp.c - driver for lm70llp eval board for the LM70 sensor + * spi_lm70llp.c - driver for LM70EVAL-LLP board for the LM70 sensor * * Copyright (C) 2006 Kaiwan N Billimoria * @@ -40,8 +40,12 @@ * master controller driver. The hwmon/lm70 driver is a "SPI protocol * driver", layered on top of this one and usable without the lm70llp. * + * Datasheet and Schematic: * The LM70 is a temperature sensor chip from National Semiconductor; its * datasheet is available at http://www.national.com/pf/LM/LM70.html + * The schematic for this particular board (the LM70EVAL-LLP) is + * available (on page 4) here: + * http://www.national.com/appinfo/tempsensors/files/LM70LLPEVALmanual.pdf * * Also see Documentation/spi/spi-lm70llp. The SPI<->parport code here is * (heavily) based on spi-butterfly by David Brownell. @@ -64,7 +68,7 @@ * * Note that parport pin 13 actually gets inverted by the transistor * arrangement which lets either the parport or the LM70 drive the - * SI/SO signal. + * SI/SO signal (see the schematic for details). */ #define DRVNAME "spi-lm70llp" @@ -106,12 +110,16 @@ static inline struct spi_lm70llp *spidev_to_pp(struct spi_device *spi) static inline void deassertCS(struct spi_lm70llp *pp) { u8 data = parport_read_data(pp->port); + + data &= ~0x80; /* pull D7/SI-out low while de-asserted */ parport_write_data(pp->port, data | nCS); } static inline void assertCS(struct spi_lm70llp *pp) { u8 data = parport_read_data(pp->port); + + data |= 0x80; /* pull D7/SI-out high so lm70 drives SO-in */ parport_write_data(pp->port, data & ~nCS); } @@ -184,22 +192,7 @@ static void lm70_chipselect(struct spi_device *spi, int value) */ static u32 lm70_txrx(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits) { - static u32 sio=0; - static int first_time=1; - - /* First time: perform SPI bitbang and return the LSB of - * the result of the SPI call. - */ - if (first_time) { - sio = bitbang_txrx_be_cpha0(spi, nsecs, 0, word, bits); - first_time=0; - return (sio & 0x00ff); - } - /* Return the MSB of the result of the SPI call */ - else { - first_time=1; - return (sio >> 8); - } + return bitbang_txrx_be_cpha0(spi, nsecs, 0, word, bits); } static void spi_lm70llp_attach(struct parport *p) @@ -293,10 +286,9 @@ static void spi_lm70llp_attach(struct parport *p) status = -ENODEV; goto out_bitbang_stop; } - pp->spidev_lm70->bits_per_word = 16; + pp->spidev_lm70->bits_per_word = 8; lm70llp = pp; - return; out_bitbang_stop: @@ -326,7 +318,6 @@ static void spi_lm70llp_detach(struct parport *p) /* power down */ parport_write_data(pp->port, 0); - msleep(10); parport_release(pp->pd); parport_unregister_device(pp->pd); -- cgit v1.2.3-70-g09d2 From c8ac32e4711639c81e5f4d4cd78c8f21675a2bae Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Wed, 7 Jan 2009 16:37:34 +0100 Subject: hwmon: (lm70) Add TI TMP121 support The Texas Instruments TMP121 is a SPI temperature sensor very similar to the LM70, with slightly higher resolution. This patch extends the LM70 driver to support the TMP121. The TMP123 differs in pin assign- ment. Signed-off-by: Manuel Lauss Signed-off-by: Jean Delvare --- Documentation/hwmon/lm70 | 8 ++++- drivers/hwmon/Kconfig | 5 +-- drivers/hwmon/lm70.c | 84 ++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 84 insertions(+), 13 deletions(-) (limited to 'Documentation') diff --git a/Documentation/hwmon/lm70 b/Documentation/hwmon/lm70 index b8d1a521e68..0d240291e3c 100644 --- a/Documentation/hwmon/lm70 +++ b/Documentation/hwmon/lm70 @@ -1,9 +1,11 @@ Kernel driver lm70 ================== -Supported chip: +Supported chips: * National Semiconductor LM70 Datasheet: http://www.national.com/pf/LM/LM70.html + * Texas Instruments TMP121/TMP123 + Information: http://focus.ti.com/docs/prod/folders/print/tmp121.html Author: Kaiwan N Billimoria @@ -29,6 +31,10 @@ As a real (in-tree) example of this "SPI protocol driver" interfacing with a "SPI master controller driver", see drivers/spi/spi_lm70llp.c and its associated documentation. +The TMP121/TMP123 are very similar; main differences are 4 wire SPI inter- +face (read only) and 13-bit temperature data (0.0625 degrees celsius reso- +lution). + Thanks to --------- Jean Delvare for mentoring the hwmon-side driver diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 3c34fb5e419..4b33bc82cc2 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -428,11 +428,12 @@ config SENSORS_LM63 will be called lm63. config SENSORS_LM70 - tristate "National Semiconductor LM70" + tristate "National Semiconductor LM70 / Texas Instruments TMP121" depends on SPI_MASTER && EXPERIMENTAL help If you say yes here you get support for the National Semiconductor - LM70 digital temperature sensor chip. + LM70 and Texas Instruments TMP121/TMP123 digital temperature + sensor chips. This driver can also be built as a module. If so, the module will be called lm70. diff --git a/drivers/hwmon/lm70.c b/drivers/hwmon/lm70.c index 9f9741b1d2b..ae6204f3321 100644 --- a/drivers/hwmon/lm70.c +++ b/drivers/hwmon/lm70.c @@ -37,9 +37,13 @@ #define DRVNAME "lm70" +#define LM70_CHIP_LM70 0 /* original NS LM70 */ +#define LM70_CHIP_TMP121 1 /* TI TMP121/TMP123 */ + struct lm70 { struct device *hwmon_dev; struct mutex lock; + unsigned int chip; }; /* sysfs hook function */ @@ -47,7 +51,7 @@ static ssize_t lm70_sense_temp(struct device *dev, struct device_attribute *attr, char *buf) { struct spi_device *spi = to_spi_device(dev); - int status, val; + int status, val = 0; u8 rxbuf[2]; s16 raw=0; struct lm70 *p_lm70 = dev_get_drvdata(&spi->dev); @@ -70,6 +74,7 @@ static ssize_t lm70_sense_temp(struct device *dev, rxbuf[0], rxbuf[1], raw); /* + * LM70: * The "raw" temperature read into rxbuf[] is a 16-bit signed 2's * complement value. Only the MSB 11 bits (1 sign + 10 temperature * bits) are meaningful; the LSB 5 bits are to be discarded. @@ -79,8 +84,21 @@ static ssize_t lm70_sense_temp(struct device *dev, * by 0.25. Also multiply by 1000 to represent in millidegrees * Celsius. * So it's equivalent to multiplying by 0.25 * 1000 = 250. + * + * TMP121/TMP123: + * 13 bits of 2's complement data, discard LSB 3 bits, + * resolution 0.0625 degrees celsius. */ - val = ((int)raw/32) * 250; + switch (p_lm70->chip) { + case LM70_CHIP_LM70: + val = ((int)raw / 32) * 250; + break; + + case LM70_CHIP_TMP121: + val = ((int)raw / 8) * 625 / 10; + break; + } + status = sprintf(buf, "%d\n", val); /* millidegrees Celsius */ out: mutex_unlock(&p_lm70->lock); @@ -92,22 +110,31 @@ static DEVICE_ATTR(temp1_input, S_IRUGO, lm70_sense_temp, NULL); static ssize_t lm70_show_name(struct device *dev, struct device_attribute *devattr, char *buf) { - return sprintf(buf, "lm70\n"); + struct lm70 *p_lm70 = dev_get_drvdata(dev); + int ret; + + switch (p_lm70->chip) { + case LM70_CHIP_LM70: + ret = sprintf(buf, "lm70\n"); + break; + case LM70_CHIP_TMP121: + ret = sprintf(buf, "tmp121\n"); + break; + default: + ret = -EINVAL; + } + return ret; } static DEVICE_ATTR(name, S_IRUGO, lm70_show_name, NULL); /*----------------------------------------------------------------------*/ -static int __devinit lm70_probe(struct spi_device *spi) +static int __devinit common_probe(struct spi_device *spi, int chip) { struct lm70 *p_lm70; int status; - /* signaling is SPI_MODE_0 on a 3-wire link (shared SI/SO) */ - if ((spi->mode & (SPI_CPOL|SPI_CPHA)) || !(spi->mode & SPI_3WIRE)) - return -EINVAL; - /* NOTE: we assume 8-bit words, and convert to 16 bits manually */ p_lm70 = kzalloc(sizeof *p_lm70, GFP_KERNEL); @@ -115,6 +142,7 @@ static int __devinit lm70_probe(struct spi_device *spi) return -ENOMEM; mutex_init(&p_lm70->lock); + p_lm70->chip = chip; /* sysfs hook */ p_lm70->hwmon_dev = hwmon_device_register(&spi->dev); @@ -142,6 +170,24 @@ out_dev_reg_failed: return status; } +static int __devinit lm70_probe(struct spi_device *spi) +{ + /* signaling is SPI_MODE_0 on a 3-wire link (shared SI/SO) */ + if ((spi->mode & (SPI_CPOL | SPI_CPHA)) || !(spi->mode & SPI_3WIRE)) + return -EINVAL; + + return common_probe(spi, LM70_CHIP_LM70); +} + +static int __devinit tmp121_probe(struct spi_device *spi) +{ + /* signaling is SPI_MODE_0 with only MISO connected */ + if (spi->mode & (SPI_CPOL | SPI_CPHA)) + return -EINVAL; + + return common_probe(spi, LM70_CHIP_TMP121); +} + static int __devexit lm70_remove(struct spi_device *spi) { struct lm70 *p_lm70 = dev_get_drvdata(&spi->dev); @@ -155,6 +201,15 @@ static int __devexit lm70_remove(struct spi_device *spi) return 0; } +static struct spi_driver tmp121_driver = { + .driver = { + .name = "tmp121", + .owner = THIS_MODULE, + }, + .probe = tmp121_probe, + .remove = __devexit_p(lm70_remove), +}; + static struct spi_driver lm70_driver = { .driver = { .name = "lm70", @@ -166,17 +221,26 @@ static struct spi_driver lm70_driver = { static int __init init_lm70(void) { - return spi_register_driver(&lm70_driver); + int ret = spi_register_driver(&lm70_driver); + if (ret) + return ret; + + ret = spi_register_driver(&tmp121_driver); + if (ret) + spi_unregister_driver(&lm70_driver); + + return ret; } static void __exit cleanup_lm70(void) { spi_unregister_driver(&lm70_driver); + spi_unregister_driver(&tmp121_driver); } module_init(init_lm70); module_exit(cleanup_lm70); MODULE_AUTHOR("Kaiwan N Billimoria"); -MODULE_DESCRIPTION("National Semiconductor LM70 Linux driver"); +MODULE_DESCRIPTION("NS LM70 / TI TMP121/TMP123 Linux driver"); MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From 77fa49d94a75b5f9702c70b4fbe27b08b21317b9 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 7 Jan 2009 16:37:35 +0100 Subject: hwmon: Fix various typos Signed-off-by: Jean Delvare Acked-by: Hans de Goede Acked-by: David Hubbard --- Documentation/hwmon/lm85 | 2 +- drivers/hwmon/it87.c | 2 +- drivers/hwmon/w83627ehf.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/hwmon/lm85 b/Documentation/hwmon/lm85 index 40062074129..a13680871bc 100644 --- a/Documentation/hwmon/lm85 +++ b/Documentation/hwmon/lm85 @@ -164,7 +164,7 @@ configured individually according to the following options. temperature. (PWM value from 0 to 255) * pwm#_auto_pwm_minctl - this flags selects for temp#_auto_temp_off temperature - the bahaviour of fans. Write 1 to let fans spinning at + the behaviour of fans. Write 1 to let fans spinning at pwm#_auto_pwm_min or write 0 to let them off. NOTE: It has been reported that there is a bug in the LM85 that causes the flag diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index 88e71f195ec..95a99c590da 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -1386,7 +1386,7 @@ static void __devinit it87_init_device(struct platform_device *pdev) it87_write_value(data, IT87_REG_TEMP_HIGH(i), 127); } - /* Check if temperature channnels are reset manually or by some reason */ + /* Check if temperature channels are reset manually or by some reason */ tmp = it87_read_value(data, IT87_REG_TEMP_ENABLE); if ((tmp & 0x3f) == 0) { /* Temp1,Temp3=thermistor; Temp2=thermal diode */ diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index a3a01dc35a3..cb808d01536 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -503,7 +503,7 @@ static struct w83627ehf_data *w83627ehf_update_device(struct device *dev) } for (i = 0; i < 4; i++) { - /* pwmcfg, tolarance mapped for i=0, i=1 to same reg */ + /* pwmcfg, tolerance mapped for i=0, i=1 to same reg */ if (i != 1) { pwmcfg = w83627ehf_read_value(data, W83627EHF_REG_PWM_ENABLE[i]); -- cgit v1.2.3-70-g09d2 From d4f373e57d3916814110968c5ea1155a8d972b5a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 10 Nov 2008 14:07:45 -0500 Subject: USB: usb-storage: add "quirks=" module parameter This patch (as1163b) adds a "quirks=" module parameter to usb-storage. This will allow people to make short-term changes to their unusual_devs list without rebuilding the entire driver. Testing will become much easier, and less-sophisticated users will be able to access their buggy devices after a simple config-file change instead of having to wait for a new kernel release. The patch also adds a documentation entry for usb-storage's "delay_use" parameter, which has been around for years but but was never listed among the kernel parameters. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 29 +++++++++ drivers/usb/storage/usb.c | 113 ++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0b3f6711d2f..8eb6e35405c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -91,6 +91,7 @@ parameter is applicable: SUSPEND System suspend states are enabled. FTRACE Function tracing enabled. TS Appropriate touchscreen support is enabled. + UMS USB Mass Storage support is enabled. USB USB support is enabled. USBHID USB Human Interface Device support is enabled. V4L Video For Linux support is enabled. @@ -2383,6 +2384,34 @@ and is between 256 and 4096 characters. It is defined in the file usbhid.mousepoll= [USBHID] The interval which mice are to be polled at. + usb-storage.delay_use= + [UMS] The delay in seconds before a new device is + scanned for Logical Units (default 5). + + usb-storage.quirks= + [UMS] A list of quirks entries to supplement or + override the built-in unusual_devs list. List + entries are separated by commas. Each entry has + the form VID:PID:Flags where VID and PID are Vendor + and Product ID values (4-digit hex numbers) and + Flags is a set of characters, each corresponding + to a common usb-storage quirk flag as follows: + c = FIX_CAPACITY (decrease the reported + device capacity by one sector); + i = IGNORE_DEVICE (don't bind to this + device); + l = NOT_LOCKABLE (don't try to lock and + unlock ejectable media); + m = MAX_SECTORS_64 (don't transfer more + than 64 sectors = 32 KB at a time); + r = IGNORE_RESIDUE (the device reports + bogus residue values); + s = SINGLE_LUN (the device has only one + Logical Unit); + w = NO_WP_DETECT (don't test whether the + medium is write-protected). + Example: quirks=0419:aaf5:rl,0421:0433:rc + add_efi_memmap [EFI; x86-32,X86-64] Include EFI memory map in kernel's map of available physical RAM. diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 27016fd2cad..eb1a53a3e5c 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -113,6 +113,16 @@ static unsigned int delay_use = 5; module_param(delay_use, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(delay_use, "seconds to delay before using a new device"); +static char *quirks; +module_param(quirks, charp, S_IRUGO); +MODULE_PARM_DESC(quirks, "supplemental list of device IDs and their quirks"); + +struct quirks_entry { + u16 vid, pid; + u32 fflags; +}; +static struct quirks_entry *quirks_list, *quirks_end; + /* * The entries in this table correspond, line for line, @@ -473,6 +483,30 @@ static int associate_dev(struct us_data *us, struct usb_interface *intf) return 0; } +/* Adjust device flags based on the "quirks=" module parameter */ +static void adjust_quirks(struct us_data *us) +{ + u16 vid, pid; + struct quirks_entry *q; + unsigned int mask = (US_FL_FIX_CAPACITY | US_FL_IGNORE_DEVICE | + US_FL_NOT_LOCKABLE | US_FL_MAX_SECTORS_64 | + US_FL_IGNORE_RESIDUE | US_FL_SINGLE_LUN | + US_FL_NO_WP_DETECT); + + vid = le16_to_cpu(us->pusb_dev->descriptor.idVendor); + pid = le16_to_cpu(us->pusb_dev->descriptor.idProduct); + + for (q = quirks_list; q != quirks_end; ++q) { + if (q->vid == vid && q->pid == pid) { + us->fflags = (us->fflags & ~mask) | q->fflags; + dev_info(&us->pusb_intf->dev, "Quirks match for " + "vid %04x pid %04x: %x\n", + vid, pid, q->fflags); + break; + } + } +} + /* Find an unusual_dev descriptor (always succeeds in the current code) */ static struct us_unusual_dev *find_unusual(const struct usb_device_id *id) { @@ -497,6 +531,7 @@ static int get_device_info(struct us_data *us, const struct usb_device_id *id) idesc->bInterfaceProtocol : unusual_dev->useTransport; us->fflags = USB_US_ORIG_FLAGS(id->driver_info); + adjust_quirks(us); if (us->fflags & US_FL_IGNORE_DEVICE) { printk(KERN_INFO USB_STORAGE "device ignored\n"); @@ -1061,10 +1096,88 @@ static struct usb_driver usb_storage_driver = { .soft_unbind = 1, }; +/* Works only for digits and letters, but small and fast */ +#define TOLOWER(x) ((x) | 0x20) + +static void __init parse_quirks(void) +{ + int n, i; + char *p; + + if (!quirks) + return; + + /* Count the ':' characters to get 2 * the number of entries */ + n = 0; + for (p = quirks; *p; ++p) { + if (*p == ':') + ++n; + } + n /= 2; + if (n == 0) + return; /* Don't allocate 0 bytes */ + + quirks_list = kmalloc(n * sizeof(*quirks_list), GFP_KERNEL); + if (!quirks_list) + return; + + p = quirks; + quirks_end = quirks_list; + for (i = 0; i < n && *p; ++i) { + unsigned f = 0; + + /* Each entry consists of VID:PID:flags */ + quirks_end->vid = simple_strtoul(p, &p, 16); + if (*p != ':') + goto skip_to_next; + quirks_end->pid = simple_strtoul(p+1, &p, 16); + if (*p != ':') + goto skip_to_next; + + while (*++p && *p != ',') { + switch (TOLOWER(*p)) { + case 'c': + f |= US_FL_FIX_CAPACITY; + break; + case 'i': + f |= US_FL_IGNORE_DEVICE; + break; + case 'l': + f |= US_FL_NOT_LOCKABLE; + break; + case 'm': + f |= US_FL_MAX_SECTORS_64; + break; + case 'r': + f |= US_FL_IGNORE_RESIDUE; + break; + case 's': + f |= US_FL_SINGLE_LUN; + break; + case 'w': + f |= US_FL_NO_WP_DETECT; + break; + /* Ignore unrecognized flag characters */ + } + } + quirks_end->fflags = f; + ++quirks_end; + + skip_to_next: + /* Entries are separated by commas */ + while (*p) { + if (*p++ == ',') + break; + } + } /* for (i = 0; ...) */ +} + static int __init usb_stor_init(void) { int retval; + printk(KERN_INFO "Initializing USB Mass Storage driver...\n"); + parse_quirks(); /* register the driver, return usb_register return code if error */ retval = usb_register(&usb_storage_driver); -- cgit v1.2.3-70-g09d2 From 9ac39f28b5237a629e41ccfc1f73d3a55723045c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 12 Nov 2008 16:19:49 -0500 Subject: USB: add asynchronous autosuspend/autoresume support This patch (as1160b) adds support routines for asynchronous autosuspend and autoresume, with accompanying documentation updates. There already are several potential users of this interface, and others are likely to arise as autosuspend support becomes more widespread. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/power-management.txt | 22 +++++++-- drivers/usb/core/driver.c | 86 ++++++++++++++++++++++++++++++++++ drivers/usb/core/hub.c | 5 +- drivers/usb/core/usb.c | 1 + drivers/usb/core/usb.h | 1 + include/linux/usb.h | 9 ++++ 6 files changed, 117 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt index e48ea1d5101..ad642615ad4 100644 --- a/Documentation/usb/power-management.txt +++ b/Documentation/usb/power-management.txt @@ -313,11 +313,13 @@ three of the methods listed above. In addition, a driver indicates that it supports autosuspend by setting the .supports_autosuspend flag in its usb_driver structure. It is then responsible for informing the USB core whenever one of its interfaces becomes busy or idle. The -driver does so by calling these three functions: +driver does so by calling these five functions: int usb_autopm_get_interface(struct usb_interface *intf); void usb_autopm_put_interface(struct usb_interface *intf); int usb_autopm_set_interface(struct usb_interface *intf); + int usb_autopm_get_interface_async(struct usb_interface *intf); + void usb_autopm_put_interface_async(struct usb_interface *intf); The functions work by maintaining a counter in the usb_interface structure. When intf->pm_usage_count is > 0 then the interface is @@ -330,10 +332,12 @@ associated with the device itself rather than any of its interfaces. This field is used only by the USB core.) The driver owns intf->pm_usage_count; it can modify the value however -and whenever it likes. A nice aspect of the usb_autopm_* routines is -that the changes they make are protected by the usb_device structure's -PM mutex (udev->pm_mutex); however drivers may change pm_usage_count -without holding the mutex. +and whenever it likes. A nice aspect of the non-async usb_autopm_* +routines is that the changes they make are protected by the usb_device +structure's PM mutex (udev->pm_mutex); however drivers may change +pm_usage_count without holding the mutex. Drivers using the async +routines are responsible for their own synchronization and mutual +exclusion. usb_autopm_get_interface() increments pm_usage_count and attempts an autoresume if the new value is > 0 and the @@ -348,6 +352,14 @@ without holding the mutex. is suspended, and it attempts an autosuspend if the value is <= 0 and the device isn't suspended. + usb_autopm_get_interface_async() and + usb_autopm_put_interface_async() do almost the same things as + their non-async counterparts. The differences are: they do + not acquire the PM mutex, and they use a workqueue to do their + jobs. As a result they can be called in an atomic context, + such as an URB's completion handler, but when they return the + device will not generally not yet be in the desired state. + There also are a couple of utility routines drivers can use: usb_autopm_enable() sets pm_usage_cnt to 0 and then calls diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 8c081308b0e..23b3c7e79d4 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1341,6 +1341,19 @@ void usb_autosuspend_work(struct work_struct *work) usb_autopm_do_device(udev, 0); } +/* usb_autoresume_work - callback routine to autoresume a USB device */ +void usb_autoresume_work(struct work_struct *work) +{ + struct usb_device *udev = + container_of(work, struct usb_device, autoresume); + + /* Wake it up, let the drivers do their thing, and then put it + * back to sleep. + */ + if (usb_autopm_do_device(udev, 1) == 0) + usb_autopm_do_device(udev, -1); +} + /** * usb_autosuspend_device - delayed autosuspend of a USB device and its interfaces * @udev: the usb_device to autosuspend @@ -1491,6 +1504,45 @@ void usb_autopm_put_interface(struct usb_interface *intf) } EXPORT_SYMBOL_GPL(usb_autopm_put_interface); +/** + * usb_autopm_put_interface_async - decrement a USB interface's PM-usage counter + * @intf: the usb_interface whose counter should be decremented + * + * This routine does essentially the same thing as + * usb_autopm_put_interface(): it decrements @intf's usage counter and + * queues a delayed autosuspend request if the counter is <= 0. The + * difference is that it does not acquire the device's pm_mutex; + * callers must handle all synchronization issues themselves. + * + * Typically a driver would call this routine during an URB's completion + * handler, if no more URBs were pending. + * + * This routine can run in atomic context. + */ +void usb_autopm_put_interface_async(struct usb_interface *intf) +{ + struct usb_device *udev = interface_to_usbdev(intf); + int status = 0; + + if (intf->condition == USB_INTERFACE_UNBOUND) { + status = -ENODEV; + } else { + udev->last_busy = jiffies; + --intf->pm_usage_cnt; + if (udev->autosuspend_disabled || udev->autosuspend_delay < 0) + status = -EPERM; + else if (intf->pm_usage_cnt <= 0 && + !timer_pending(&udev->autosuspend.timer)) { + queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend, + round_jiffies_relative( + udev->autosuspend_delay)); + } + } + dev_vdbg(&intf->dev, "%s: status %d cnt %d\n", + __func__, status, intf->pm_usage_cnt); +} +EXPORT_SYMBOL_GPL(usb_autopm_put_interface_async); + /** * usb_autopm_get_interface - increment a USB interface's PM-usage counter * @intf: the usb_interface whose counter should be incremented @@ -1536,6 +1588,37 @@ int usb_autopm_get_interface(struct usb_interface *intf) } EXPORT_SYMBOL_GPL(usb_autopm_get_interface); +/** + * usb_autopm_get_interface_async - increment a USB interface's PM-usage counter + * @intf: the usb_interface whose counter should be incremented + * + * This routine does much the same thing as + * usb_autopm_get_interface(): it increments @intf's usage counter and + * queues an autoresume request if the result is > 0. The differences + * are that it does not acquire the device's pm_mutex (callers must + * handle all synchronization issues themselves), and it does not + * autoresume the device directly (it only queues a request). After a + * successful call, the device will generally not yet be resumed. + * + * This routine can run in atomic context. + */ +int usb_autopm_get_interface_async(struct usb_interface *intf) +{ + struct usb_device *udev = interface_to_usbdev(intf); + int status = 0; + + if (intf->condition == USB_INTERFACE_UNBOUND) + status = -ENODEV; + else if (udev->autoresume_disabled) + status = -EPERM; + else if (++intf->pm_usage_cnt > 0 && udev->state == USB_STATE_SUSPENDED) + queue_work(ksuspend_usb_wq, &udev->autoresume); + dev_vdbg(&intf->dev, "%s: status %d cnt %d\n", + __func__, status, intf->pm_usage_cnt); + return status; +} +EXPORT_SYMBOL_GPL(usb_autopm_get_interface_async); + /** * usb_autopm_set_interface - set a USB interface's autosuspend state * @intf: the usb_interface whose state should be set @@ -1563,6 +1646,9 @@ EXPORT_SYMBOL_GPL(usb_autopm_set_interface); void usb_autosuspend_work(struct work_struct *work) {} +void usb_autoresume_work(struct work_struct *work) +{} + #endif /* CONFIG_USB_SUSPEND */ /** diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b19cbfcd51d..95fb3104ba4 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1374,8 +1374,9 @@ static void usb_stop_pm(struct usb_device *udev) usb_autosuspend_device(udev->parent); usb_pm_unlock(udev); - /* Stop any autosuspend requests already submitted */ - cancel_rearming_delayed_work(&udev->autosuspend); + /* Stop any autosuspend or autoresume requests already submitted */ + cancel_delayed_work_sync(&udev->autosuspend); + cancel_work_sync(&udev->autoresume); } #else diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 400fa4cc9a3..44f2fc750b6 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -402,6 +402,7 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent, #ifdef CONFIG_PM mutex_init(&dev->pm_mutex); INIT_DELAYED_WORK(&dev->autosuspend, usb_autosuspend_work); + INIT_WORK(&dev->autoresume, usb_autoresume_work); dev->autosuspend_delay = usb_autosuspend_delay * HZ; dev->connect_time = jiffies; dev->active_duration = -jiffies; diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index 9a1a45ac3ad..b60ebb4de1a 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -45,6 +45,7 @@ extern int usb_suspend(struct device *dev, pm_message_t msg); extern int usb_resume(struct device *dev); extern void usb_autosuspend_work(struct work_struct *work); +extern void usb_autoresume_work(struct work_struct *work); extern int usb_port_suspend(struct usb_device *dev); extern int usb_port_resume(struct usb_device *dev); extern int usb_external_suspend_device(struct usb_device *udev, diff --git a/include/linux/usb.h b/include/linux/usb.h index f72aa51f7bc..859a88e6ce9 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -398,6 +398,7 @@ struct usb_tt; * @urbnum: number of URBs submitted for the whole device * @active_duration: total time device is not suspended * @autosuspend: for delayed autosuspends + * @autoresume: for autoresumes requested while in_interrupt * @pm_mutex: protects PM operations * @last_busy: time of last use * @autosuspend_delay: in jiffies @@ -476,6 +477,7 @@ struct usb_device { #ifdef CONFIG_PM struct delayed_work autosuspend; + struct work_struct autoresume; struct mutex pm_mutex; unsigned long last_busy; @@ -513,6 +515,8 @@ extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id); extern int usb_autopm_set_interface(struct usb_interface *intf); extern int usb_autopm_get_interface(struct usb_interface *intf); extern void usb_autopm_put_interface(struct usb_interface *intf); +extern int usb_autopm_get_interface_async(struct usb_interface *intf); +extern void usb_autopm_put_interface_async(struct usb_interface *intf); static inline void usb_autopm_enable(struct usb_interface *intf) { @@ -539,8 +543,13 @@ static inline int usb_autopm_set_interface(struct usb_interface *intf) static inline int usb_autopm_get_interface(struct usb_interface *intf) { return 0; } +static inline int usb_autopm_get_interface_async(struct usb_interface *intf) +{ return 0; } + static inline void usb_autopm_put_interface(struct usb_interface *intf) { } +static inline void usb_autopm_put_interface_async(struct usb_interface *intf) +{ } static inline void usb_autopm_enable(struct usb_interface *intf) { } static inline void usb_autopm_disable(struct usb_interface *intf) -- cgit v1.2.3-70-g09d2 From c838ea4626d6e982489ff519f9ecf5e1649ca90b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Dec 2008 10:40:06 -0500 Subject: USB: storage: make the "quirks=" module parameter writable This patch (as1190) makes usb-storage's "quirks=" module parameter writable, so that users can add entries for their devices at runtime with no need to reboot or reload usb-storage. New codes are added for the SANE_SENSE, CAPACITY_HEURISTICS, and CAPACITY_OK flags. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 7 ++ drivers/usb/storage/usb.c | 169 +++++++++++++++--------------------- 2 files changed, 76 insertions(+), 100 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8eb6e35405c..a58fc8b7339 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2396,14 +2396,21 @@ and is between 256 and 4096 characters. It is defined in the file and Product ID values (4-digit hex numbers) and Flags is a set of characters, each corresponding to a common usb-storage quirk flag as follows: + a = SANE_SENSE (collect more than 18 bytes + of sense data); c = FIX_CAPACITY (decrease the reported device capacity by one sector); + h = CAPACITY_HEURISTICS (decrease the + reported device capacity by one + sector if the number is odd); i = IGNORE_DEVICE (don't bind to this device); l = NOT_LOCKABLE (don't try to lock and unlock ejectable media); m = MAX_SECTORS_64 (don't transfer more than 64 sectors = 32 KB at a time); + o = CAPACITY_OK (accept the capacity + reported by the device); r = IGNORE_RESIDUE (the device reports bogus residue values); s = SINGLE_LUN (the device has only one diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 80e234bf4e5..4becf495ca2 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -111,16 +111,10 @@ static unsigned int delay_use = 5; module_param(delay_use, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(delay_use, "seconds to delay before using a new device"); -static char *quirks; -module_param(quirks, charp, S_IRUGO); +static char quirks[128]; +module_param_string(quirks, quirks, sizeof(quirks), S_IRUGO | S_IWUSR); MODULE_PARM_DESC(quirks, "supplemental list of device IDs and their quirks"); -struct quirks_entry { - u16 vid, pid; - u32 fflags; -}; -static struct quirks_entry *quirks_list, *quirks_end; - /* * The entries in this table correspond, line for line, @@ -481,28 +475,80 @@ static int associate_dev(struct us_data *us, struct usb_interface *intf) return 0; } +/* Works only for digits and letters, but small and fast */ +#define TOLOWER(x) ((x) | 0x20) + /* Adjust device flags based on the "quirks=" module parameter */ static void adjust_quirks(struct us_data *us) { - u16 vid, pid; - struct quirks_entry *q; - unsigned int mask = (US_FL_FIX_CAPACITY | US_FL_IGNORE_DEVICE | + char *p; + u16 vid = le16_to_cpu(us->pusb_dev->descriptor.idVendor); + u16 pid = le16_to_cpu(us->pusb_dev->descriptor.idProduct); + unsigned f = 0; + unsigned int mask = (US_FL_SANE_SENSE | US_FL_FIX_CAPACITY | + US_FL_CAPACITY_HEURISTICS | US_FL_IGNORE_DEVICE | US_FL_NOT_LOCKABLE | US_FL_MAX_SECTORS_64 | - US_FL_IGNORE_RESIDUE | US_FL_SINGLE_LUN | - US_FL_NO_WP_DETECT); - - vid = le16_to_cpu(us->pusb_dev->descriptor.idVendor); - pid = le16_to_cpu(us->pusb_dev->descriptor.idProduct); - - for (q = quirks_list; q != quirks_end; ++q) { - if (q->vid == vid && q->pid == pid) { - us->fflags = (us->fflags & ~mask) | q->fflags; - dev_info(&us->pusb_intf->dev, "Quirks match for " - "vid %04x pid %04x: %x\n", - vid, pid, q->fflags); + US_FL_CAPACITY_OK | US_FL_IGNORE_RESIDUE | + US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT); + + p = quirks; + while (*p) { + /* Each entry consists of VID:PID:flags */ + if (vid == simple_strtoul(p, &p, 16) && + *p == ':' && + pid == simple_strtoul(p+1, &p, 16) && + *p == ':') break; + + /* Move forward to the next entry */ + while (*p) { + if (*p++ == ',') + break; } } + if (!*p) /* No match */ + return; + + /* Collect the flags */ + while (*++p && *p != ',') { + switch (TOLOWER(*p)) { + case 'a': + f |= US_FL_SANE_SENSE; + break; + case 'c': + f |= US_FL_FIX_CAPACITY; + break; + case 'h': + f |= US_FL_CAPACITY_HEURISTICS; + break; + case 'i': + f |= US_FL_IGNORE_DEVICE; + break; + case 'l': + f |= US_FL_NOT_LOCKABLE; + break; + case 'm': + f |= US_FL_MAX_SECTORS_64; + break; + case 'o': + f |= US_FL_CAPACITY_OK; + break; + case 'r': + f |= US_FL_IGNORE_RESIDUE; + break; + case 's': + f |= US_FL_SINGLE_LUN; + break; + case 'w': + f |= US_FL_NO_WP_DETECT; + break; + /* Ignore unrecognized flag characters */ + } + } + us->fflags = (us->fflags & ~mask) | f; + dev_info(&us->pusb_intf->dev, "Quirks match for " + "vid %04x pid %04x: %x\n", + vid, pid, f); } /* Find an unusual_dev descriptor (always succeeds in the current code) */ @@ -1092,88 +1138,11 @@ static struct usb_driver usb_storage_driver = { .soft_unbind = 1, }; -/* Works only for digits and letters, but small and fast */ -#define TOLOWER(x) ((x) | 0x20) - -static void __init parse_quirks(void) -{ - int n, i; - char *p; - - if (!quirks) - return; - - /* Count the ':' characters to get 2 * the number of entries */ - n = 0; - for (p = quirks; *p; ++p) { - if (*p == ':') - ++n; - } - n /= 2; - if (n == 0) - return; /* Don't allocate 0 bytes */ - - quirks_list = kmalloc(n * sizeof(*quirks_list), GFP_KERNEL); - if (!quirks_list) - return; - - p = quirks; - quirks_end = quirks_list; - for (i = 0; i < n && *p; ++i) { - unsigned f = 0; - - /* Each entry consists of VID:PID:flags */ - quirks_end->vid = simple_strtoul(p, &p, 16); - if (*p != ':') - goto skip_to_next; - quirks_end->pid = simple_strtoul(p+1, &p, 16); - if (*p != ':') - goto skip_to_next; - - while (*++p && *p != ',') { - switch (TOLOWER(*p)) { - case 'c': - f |= US_FL_FIX_CAPACITY; - break; - case 'i': - f |= US_FL_IGNORE_DEVICE; - break; - case 'l': - f |= US_FL_NOT_LOCKABLE; - break; - case 'm': - f |= US_FL_MAX_SECTORS_64; - break; - case 'r': - f |= US_FL_IGNORE_RESIDUE; - break; - case 's': - f |= US_FL_SINGLE_LUN; - break; - case 'w': - f |= US_FL_NO_WP_DETECT; - break; - /* Ignore unrecognized flag characters */ - } - } - quirks_end->fflags = f; - ++quirks_end; - - skip_to_next: - /* Entries are separated by commas */ - while (*p) { - if (*p++ == ',') - break; - } - } /* for (i = 0; ...) */ -} - static int __init usb_stor_init(void) { int retval; printk(KERN_INFO "Initializing USB Mass Storage driver...\n"); - parse_quirks(); /* register the driver, return usb_register return code if error */ retval = usb_register(&usb_storage_driver); -- cgit v1.2.3-70-g09d2 From 0d695913b0016b362a84a8bb6d6e28f8d90a70e2 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:32 -0800 Subject: wimax: documentation for the stack wimax documentation Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- Documentation/wimax/README.wimax | 81 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 Documentation/wimax/README.wimax (limited to 'Documentation') diff --git a/Documentation/wimax/README.wimax b/Documentation/wimax/README.wimax new file mode 100644 index 00000000000..b78c4378084 --- /dev/null +++ b/Documentation/wimax/README.wimax @@ -0,0 +1,81 @@ + + Linux kernel WiMAX stack + + (C) 2008 Intel Corporation < linux-wimax@intel.com > + + This provides a basic Linux kernel WiMAX stack to provide a common + control API for WiMAX devices, usable from kernel and user space. + +1. Design + + The WiMAX stack is designed to provide for common WiMAX control + services to current and future WiMAX devices from any vendor. + + Because currently there is only one and we don't know what would be the + common services, the APIs it currently provides are very minimal. + However, it is done in such a way that it is easily extensible to + accommodate future requirements. + + The stack works by embedding a struct wimax_dev in your device's + control structures. This provides a set of callbacks that the WiMAX + stack will call in order to implement control operations requested by + the user. As well, the stack provides API functions that the driver + calls to notify about changes of state in the device. + + The stack exports the API calls needed to control the device to user + space using generic netlink as a marshalling mechanism. You can access + them using your own code or use the wrappers provided for your + convenience in libwimax (in the wimax-tools package). + + For detailed information on the stack, please see + include/linux/wimax.h. + +2. Usage + + For usage in a driver (registration, API, etc) please refer to the + instructions in the header file include/linux/wimax.h. + + When a device is registered with the WiMAX stack, a set of debugfs + files will appear in /sys/kernel/debug/wimax:wmxX can tweak for + control. + +2.1. Obtaining debug information: debugfs entries + + The WiMAX stack is compiled, by default, with debug messages that can + be used to diagnose issues. By default, said messages are disabled. + + The drivers will register debugfs entries that allow the user to tweak + debug settings. + + Each driver, when registering with the stack, will cause a debugfs + directory named wimax:DEVICENAME to be created; optionally, it might + create more subentries below it. + +2.1.1. Increasing debug output + + The files named *dl_* indicate knobs for controlling the debug output + of different submodules of the WiMAX stack: + * +# find /sys/kernel/debug/wimax\:wmx0 -name \*dl_\* +/sys/kernel/debug/wimax:wmx0/wimax_dl_stack +/sys/kernel/debug/wimax:wmx0/wimax_dl_op_rfkill +/sys/kernel/debug/wimax:wmx0/wimax_dl_op_reset +/sys/kernel/debug/wimax:wmx0/wimax_dl_op_msg +/sys/kernel/debug/wimax:wmx0/wimax_dl_id_table +/sys/kernel/debug/wimax:wmx0/wimax_dl_debugfs +/sys/kernel/debug/wimax:wmx0/.... # other driver specific files + + NOTE: Of course, if debugfs is mounted in a directory other than + /sys/kernel/debug, those paths will change. + + By reading the file you can obtain the current value of said debug + level; by writing to it, you can set it. + + To increase the debug level of, for example, the id-table submodule, + just write: + +$ echo 3 > /sys/kernel/debug/wimax:wmx0/wimax_dl_id_table + + Increasing numbers yield increasing debug information; for details of + what is printed and the available levels, check the source. The code + uses 0 for disabled and increasing values until 8. -- cgit v1.2.3-70-g09d2 From b0c83ae1de01880075955c7224e751440688ec74 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Tue, 23 Dec 2008 16:18:24 -0800 Subject: wimax: Makefile, Kconfig and docbook linkage for the stack This patch provides Makefile and KConfig for the WiMAX stack, integrating them into the networking stack's Makefile, Kconfig and doc-book templates. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- Documentation/DocBook/networking.tmpl | 8 ++++++++ net/Kconfig | 2 ++ net/Makefile | 1 + net/wimax/Kconfig | 38 +++++++++++++++++++++++++++++++++++ net/wimax/Makefile | 13 ++++++++++++ 5 files changed, 62 insertions(+) create mode 100644 net/wimax/Kconfig create mode 100644 net/wimax/Makefile (limited to 'Documentation') diff --git a/Documentation/DocBook/networking.tmpl b/Documentation/DocBook/networking.tmpl index 627707a3cb9..59ad69a9d77 100644 --- a/Documentation/DocBook/networking.tmpl +++ b/Documentation/DocBook/networking.tmpl @@ -74,6 +74,14 @@ !Enet/sunrpc/rpcb_clnt.c !Enet/sunrpc/clnt.c + WiMAX +!Enet/wimax/op-msg.c +!Enet/wimax/op-reset.c +!Enet/wimax/op-rfkill.c +!Enet/wimax/stack.c +!Iinclude/net/wimax.h +!Iinclude/linux/wimax.h + diff --git a/net/Kconfig b/net/Kconfig index 6ec2cce7c16..bf2776018f7 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -254,6 +254,8 @@ source "net/mac80211/Kconfig" endif # WIRELESS +source "net/wimax/Kconfig" + source "net/rfkill/Kconfig" source "net/9p/Kconfig" diff --git a/net/Makefile b/net/Makefile index ba4460432b7..0fcce89d716 100644 --- a/net/Makefile +++ b/net/Makefile @@ -63,3 +63,4 @@ endif ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o endif +obj-$(CONFIG_WIMAX) += wimax/ diff --git a/net/wimax/Kconfig b/net/wimax/Kconfig new file mode 100644 index 00000000000..0bdbb692820 --- /dev/null +++ b/net/wimax/Kconfig @@ -0,0 +1,38 @@ +# +# WiMAX LAN device configuration +# + +menuconfig WIMAX + tristate "WiMAX Wireless Broadband support" + help + + Select to configure support for devices that provide + wireless broadband connectivity using the WiMAX protocol + (IEEE 802.16). + + Please note that most of these devices require signing up + for a service plan with a provider. + + The different WiMAX drivers can be enabled in the menu entry + + Device Drivers > Network device support > WiMAX Wireless + Broadband devices + + If unsure, it is safe to select M (module). + +config WIMAX_DEBUG_LEVEL + int "WiMAX debug level" + depends on WIMAX + default 8 + help + + Select the maximum debug verbosity level to be compiled into + the WiMAX stack code. + + By default, debug messages are disabled at runtime and can + be selectively enabled for different parts of the code using + the sysfs debug-levels file. + + If set at zero, this will compile out all the debug code. + + It is recommended that it is left at 8. diff --git a/net/wimax/Makefile b/net/wimax/Makefile new file mode 100644 index 00000000000..5b80b941c2c --- /dev/null +++ b/net/wimax/Makefile @@ -0,0 +1,13 @@ + +obj-$(CONFIG_WIMAX) += wimax.o + +wimax-y := \ + id-table.o \ + op-msg.o \ + op-reset.o \ + op-rfkill.o \ + stack.o + +wimax-$(CONFIG_DEBUG_FS) += debugfs.o + + -- cgit v1.2.3-70-g09d2 From 3e91029ae049852c153da3fc355ab255ea7e2e0a Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:42 -0800 Subject: i2400m: documentation and instructions for usage The driver for the i2400m is a stacked driver. There is a core driver, the bus-generic driver that has no knowledge or dependencies on how the device is connected to the system; it only knows how to speak the device protocol. Then there are the bus-specific drivers (for USB and SDIO) that provide backends for the generic driver to communicate with the device. The bus generic driver connects to the network and WiMAX stacks on the top side, and on the bottom to the bus-specific drivers. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- Documentation/wimax/README.i2400m | 260 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 Documentation/wimax/README.i2400m (limited to 'Documentation') diff --git a/Documentation/wimax/README.i2400m b/Documentation/wimax/README.i2400m new file mode 100644 index 00000000000..7dffd8919cb --- /dev/null +++ b/Documentation/wimax/README.i2400m @@ -0,0 +1,260 @@ + + Driver for the Intel Wireless Wimax Connection 2400m + + (C) 2008 Intel Corporation < linux-wimax@intel.com > + + This provides a driver for the Intel Wireless WiMAX Connection 2400m + and a basic Linux kernel WiMAX stack. + +1. Requirements + + * Linux installation with Linux kernel 2.6.22 or newer (if building + from a separate tree) + * Intel i2400m Echo Peak or Baxter Peak; this includes the Intel + Wireless WiMAX/WiFi Link 5x50 series. + * build tools: + + Linux kernel development package for the target kernel; to + build against your currently running kernel, you need to have + the kernel development package corresponding to the running + image installed (usually if your kernel is named + linux-VERSION, the development package is called + linux-dev-VERSION or linux-headers-VERSION). + + GNU C Compiler, make + +2. Compilation and installation + +2.1. Compilation of the drivers included in the kernel + + Configure the kernel; to enable the WiMAX drivers select Drivers > + Networking Drivers > WiMAX device support. Enable all of them as + modules (easier). + + If USB or SDIO are not enabled in the kernel configuration, the options + to build the i2400m USB or SDIO drivers will not show. Enable said + subsystems and go back to the WiMAX menu to enable the drivers. + + Compile and install your kernel as usual. + +2.2. Compilation of the drivers distributed as an standalone module + + To compile + +$ cd source/directory +$ make + + Once built you can load and unload using the provided load.sh script; + load.sh will load the modules, load.sh u will unload them. + + To install in the default kernel directories (and enable auto loading + when the device is plugged): + +$ make install +$ depmod -a + + If your kernel development files are located in a non standard + directory or if you want to build for a kernel that is not the + currently running one, set KDIR to the right location: + +$ make KDIR=/path/to/kernel/dev/tree + + For more information, please contact linux-wimax@intel.com. + +3. Installing the firmware + + The firmware can be obtained from http://linuxwimax.org or might have + been supplied with your hardware. + + It has to be installed in the target system: + * +$ cp FIRMWAREFILE.sbcf /lib/firmware/i2400m-fw-BUSTYPE-1.3.sbcf + + * NOTE: if your firmware came in an .rpm or .deb file, just install + it as normal, with the rpm (rpm -i FIRMWARE.rpm) or dpkg + (dpkg -i FIRMWARE.deb) commands. No further action is needed. + * BUSTYPE will be usb or sdio, depending on the hardware you have. + Each hardware type comes with its own firmware and will not work + with other types. + +4. Design + + This package contains two major parts: a WiMAX kernel stack and a + driver for the Intel i2400m. + + The WiMAX stack is designed to provide for common WiMAX control + services to current and future WiMAX devices from any vendor; please + see README.wimax for details. + + The i2400m kernel driver is broken up in two main parts: the bus + generic driver and the bus-specific drivers. The bus generic driver + forms the drivercore and contain no knowledge of the actual method we + use to connect to the device. The bus specific drivers are just the + glue to connect the bus-generic driver and the device. Currently only + USB and SDIO are supported. See drivers/net/wimax/i2400m/i2400m.h for + more information. + + The bus generic driver is logically broken up in two parts: OS-glue and + hardware-glue. The OS-glue interfaces with Linux. The hardware-glue + interfaces with the device on using an interface provided by the + bus-specific driver. The reason for this breakup is to be able to + easily reuse the hardware-glue to write drivers for other OSes; note + the hardware glue part is written as a native Linux driver; no + abstraction layers are used, so to port to another OS, the Linux kernel + API calls should be replaced with the target OS's. + +5. Usage + + To load the driver, follow the instructions in the install section; + once the driver is loaded, plug in the device (unless it is permanently + plugged in). The driver will enumerate the device, upload the firmware + and output messages in the kernel log (dmesg, /var/log/messages or + /var/log/kern.log) such as: + +... +i2400m_usb 5-4:1.0: firmware interface version 8.0.0 +i2400m_usb 5-4:1.0: WiMAX interface wmx0 (00:1d:e1:01:94:2c) ready + + At this point the device is ready to work. + + Current versions require the Intel WiMAX Network Service in userspace + to make things work. See the network service's README for instructions + on how to scan, connect and disconnect. + +5.1. Module parameters + + Module parameters can be set at kernel or module load time or by + echoing values: + +$ echo VALUE > /sys/module/MODULENAME/parameters/PARAMETERNAME + + To make changes permanent, for example, for the i2400m module, you can + also create a file named /etc/modprobe.d/i2400m containing: + +options i2400m idle_mode_disabled=1 + + To find which parameters are supported by a module, run: + +$ modinfo path/to/module.ko + + During kernel bootup (if the driver is linked in the kernel), specify + the following to the kernel command line: + +i2400m.PARAMETER=VALUE + +5.1.1. i2400m: idle_mode_disabled + + The i2400m module supports a parameter to disable idle mode. This + parameter, once set, will take effect only when the device is + reinitialized by the driver (eg: following a reset or a reconnect). + +5.2. Debug operations: debugfs entries + + The driver will register debugfs entries that allow the user to tweak + debug settings. There are three main container directories where + entries are placed, which correspond to the three blocks a i2400m WiMAX + driver has: + * /sys/kernel/debug/wimax:DEVNAME/ for the generic WiMAX stack + controls + * /sys/kernel/debug/wimax:DEVNAME/i2400m for the i2400m generic + driver controls + * /sys/kernel/debug/wimax:DEVNAME/i2400m-usb (or -sdio) for the + bus-specific i2400m-usb or i2400m-sdio controls). + + Of course, if debugfs is mounted in a directory other than + /sys/kernel/debug, those paths will change. + +5.2.1. Increasing debug output + + The files named *dl_* indicate knobs for controlling the debug output + of different submodules: + * +# find /sys/kernel/debug/wimax\:wmx0 -name \*dl_\* +/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_tx +/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_rx +/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_notif +/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_fw +/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_usb +/sys/kernel/debug/wimax:wmx0/i2400m/dl_tx +/sys/kernel/debug/wimax:wmx0/i2400m/dl_rx +/sys/kernel/debug/wimax:wmx0/i2400m/dl_rfkill +/sys/kernel/debug/wimax:wmx0/i2400m/dl_netdev +/sys/kernel/debug/wimax:wmx0/i2400m/dl_fw +/sys/kernel/debug/wimax:wmx0/i2400m/dl_debugfs +/sys/kernel/debug/wimax:wmx0/i2400m/dl_driver +/sys/kernel/debug/wimax:wmx0/i2400m/dl_control +/sys/kernel/debug/wimax:wmx0/wimax_dl_stack +/sys/kernel/debug/wimax:wmx0/wimax_dl_op_rfkill +/sys/kernel/debug/wimax:wmx0/wimax_dl_op_reset +/sys/kernel/debug/wimax:wmx0/wimax_dl_op_msg +/sys/kernel/debug/wimax:wmx0/wimax_dl_id_table +/sys/kernel/debug/wimax:wmx0/wimax_dl_debugfs + + By reading the file you can obtain the current value of said debug + level; by writing to it, you can set it. + + To increase the debug level of, for example, the i2400m's generic TX + engine, just write: + +$ echo 3 > /sys/kernel/debug/wimax:wmx0/i2400m/dl_tx + + Increasing numbers yield increasing debug information; for details of + what is printed and the available levels, check the source. The code + uses 0 for disabled and increasing values until 8. + +5.2.2. RX and TX statistics + + The i2400m/rx_stats and i2400m/tx_stats provide statistics about the + data reception/delivery from the device: + +$ cat /sys/kernel/debug/wimax:wmx0/i2400m/rx_stats +45 1 3 34 3104 48 480 + + The numbers reported are + * packets/RX-buffer: total, min, max + * RX-buffers: total RX buffers received, accumulated RX buffer size + in bytes, min size received, max size received + + Thus, to find the average buffer size received, divide accumulated + RX-buffer / total RX-buffers. + + To clear the statistics back to 0, write anything to the rx_stats file: + +$ echo 1 > /sys/kernel/debug/wimax:wmx0/i2400m_rx_stats + + Likewise for TX. + + Note the packets this debug file refers to are not network packet, but + packets in the sense of the device-specific protocol for communication + to the host. See drivers/net/wimax/i2400m/tx.c. + +5.2.3. Tracing messages received from user space + + To echo messages received from user space into the trace pipe that the + i2400m driver creates, set the debug file i2400m/trace_msg_from_user to + 1: + * +$ echo 1 > /sys/kernel/debug/wimax:wmx0/i2400m/trace_msg_from_user + +5.2.4. Performing a device reset + + By writing a 0, a 1 or a 2 to the file + /sys/kernel/debug/wimax:wmx0/reset, the driver performs a warm (without + disconnecting from the bus), cold (disconnecting from the bus) or bus + (bus specific) reset on the device. + +5.2.5. Asking the device to enter power saving mode + + By writing any value to the /sys/kernel/debug/wimax:wmx0 file, the + device will attempt to enter power saving mode. + +6. Troubleshooting + +6.1. Driver complains about 'i2400m-fw-usb-1.2.sbcf: request failed' + + If upon connecting the device, the following is output in the kernel + log: + +i2400m_usb 5-4:1.0: fw i2400m-fw-usb-1.3.sbcf: request failed: -2 + + This means that the driver cannot locate the firmware file named + /lib/firmware/i2400m-fw-usb-1.2.sbcf. Check that the file is present in + the right location. -- cgit v1.2.3-70-g09d2 From e8de1481fd7126ee9e93d6889da6f00c05e1e019 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 22 Oct 2008 19:55:31 -0700 Subject: resource: allow MMIO exclusivity for device drivers Device drivers that use pci_request_regions() (and similar APIs) have a reasonable expectation that they are the only ones accessing their device. As part of the e1000e hunt, we were afraid that some userland (X or some bootsplash stuff) was mapping the MMIO region that the driver thought it had exclusively via /dev/mem or via various sysfs resource mappings. This patch adds the option for device drivers to cause their reserved regions to the "banned from /dev/mem use" list, so now both kernel memory and device-exclusive MMIO regions are banned. NOTE: This is only active when CONFIG_STRICT_DEVMEM is set. In addition to the config option, a kernel parameter iomem=relaxed is provided for the cases where developers want to diagnose, in the field, drivers issues from userspace. Reviewed-by: Matthew Wilcox Signed-off-by: Arjan van de Ven Signed-off-by: Jesse Barnes --- Documentation/kernel-parameters.txt | 4 ++ arch/x86/mm/init_32.c | 2 + arch/x86/mm/init_64.c | 2 + drivers/net/e1000e/netdev.c | 2 +- drivers/pci/pci-sysfs.c | 3 + drivers/pci/pci.c | 107 ++++++++++++++++++++++++++++++++---- include/linux/ioport.h | 11 +++- include/linux/pci.h | 3 + kernel/resource.c | 61 +++++++++++++++++++- 9 files changed, 176 insertions(+), 19 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0b3f6711d2f..0072fabb1dd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -918,6 +918,10 @@ and is between 256 and 4096 characters. It is defined in the file inttest= [IA64] + iomem= Disable strict checking of access to MMIO memory + strict regions from userspace. + relaxed + iommu= [x86] off force diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 544d724caee..88f1b10de3b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -328,6 +328,8 @@ int devmem_is_allowed(unsigned long pagenr) { if (pagenr <= 256) return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; if (!page_is_ram(pagenr)) return 1; return 0; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 54c437e9654..23f68e77ad1 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -888,6 +888,8 @@ int devmem_is_allowed(unsigned long pagenr) { if (pagenr <= 256) return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; if (!page_is_ram(pagenr)) return 1; return 0; diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index d4639facd1b..91817d0afca 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -4807,7 +4807,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev, } } - err = pci_request_selected_regions(pdev, + err = pci_request_selected_regions_exclusive(pdev, pci_select_bars(pdev, IORESOURCE_MEM), e1000e_driver_name); if (err) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 388440e0d22..d5cdccf27a6 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -620,6 +620,9 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, vma->vm_pgoff += start >> PAGE_SHIFT; mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io; + if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(start)) + return -EINVAL; + return pci_mmap_page_range(pdev, vma, mmap_type, write_combine); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2cfa41e367a..47663dc0daf 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1395,7 +1395,8 @@ void pci_release_region(struct pci_dev *pdev, int bar) * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. */ -int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) +static int __pci_request_region(struct pci_dev *pdev, int bar, const char *res_name, + int exclusive) { struct pci_devres *dr; @@ -1408,8 +1409,9 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) goto err_out; } else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { - if (!request_mem_region(pci_resource_start(pdev, bar), - pci_resource_len(pdev, bar), res_name)) + if (!__request_mem_region(pci_resource_start(pdev, bar), + pci_resource_len(pdev, bar), res_name, + exclusive)) goto err_out; } @@ -1427,6 +1429,47 @@ err_out: return -EBUSY; } +/** + * pci_request_region - Reserved PCI I/O and memory resource + * @pdev: PCI device whose resources are to be reserved + * @bar: BAR to be reserved + * @res_name: Name to be associated with resource. + * + * Mark the PCI region associated with PCI device @pdev BR @bar as + * being reserved by owner @res_name. Do not access any + * address inside the PCI regions unless this call returns + * successfully. + * + * Returns 0 on success, or %EBUSY on error. A warning + * message is also printed on failure. + */ +int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) +{ + return __pci_request_region(pdev, bar, res_name, 0); +} + +/** + * pci_request_region_exclusive - Reserved PCI I/O and memory resource + * @pdev: PCI device whose resources are to be reserved + * @bar: BAR to be reserved + * @res_name: Name to be associated with resource. + * + * Mark the PCI region associated with PCI device @pdev BR @bar as + * being reserved by owner @res_name. Do not access any + * address inside the PCI regions unless this call returns + * successfully. + * + * Returns 0 on success, or %EBUSY on error. A warning + * message is also printed on failure. + * + * The key difference that _exclusive makes it that userspace is + * explicitly not allowed to map the resource via /dev/mem or + * sysfs. + */ +int pci_request_region_exclusive(struct pci_dev *pdev, int bar, const char *res_name) +{ + return __pci_request_region(pdev, bar, res_name, IORESOURCE_EXCLUSIVE); +} /** * pci_release_selected_regions - Release selected PCI I/O and memory resources * @pdev: PCI device whose resources were previously reserved @@ -1444,20 +1487,14 @@ void pci_release_selected_regions(struct pci_dev *pdev, int bars) pci_release_region(pdev, i); } -/** - * pci_request_selected_regions - Reserve selected PCI I/O and memory resources - * @pdev: PCI device whose resources are to be reserved - * @bars: Bitmask of BARs to be requested - * @res_name: Name to be associated with resource - */ -int pci_request_selected_regions(struct pci_dev *pdev, int bars, - const char *res_name) +int __pci_request_selected_regions(struct pci_dev *pdev, int bars, + const char *res_name, int excl) { int i; for (i = 0; i < 6; i++) if (bars & (1 << i)) - if(pci_request_region(pdev, i, res_name)) + if (__pci_request_region(pdev, i, res_name, excl)) goto err_out; return 0; @@ -1469,6 +1506,26 @@ err_out: return -EBUSY; } + +/** + * pci_request_selected_regions - Reserve selected PCI I/O and memory resources + * @pdev: PCI device whose resources are to be reserved + * @bars: Bitmask of BARs to be requested + * @res_name: Name to be associated with resource + */ +int pci_request_selected_regions(struct pci_dev *pdev, int bars, + const char *res_name) +{ + return __pci_request_selected_regions(pdev, bars, res_name, 0); +} + +int pci_request_selected_regions_exclusive(struct pci_dev *pdev, + int bars, const char *res_name) +{ + return __pci_request_selected_regions(pdev, bars, res_name, + IORESOURCE_EXCLUSIVE); +} + /** * pci_release_regions - Release reserved PCI I/O and memory resources * @pdev: PCI device whose resources were previously reserved by pci_request_regions @@ -1501,6 +1558,29 @@ int pci_request_regions(struct pci_dev *pdev, const char *res_name) return pci_request_selected_regions(pdev, ((1 << 6) - 1), res_name); } +/** + * pci_request_regions_exclusive - Reserved PCI I/O and memory resources + * @pdev: PCI device whose resources are to be reserved + * @res_name: Name to be associated with resource. + * + * Mark all PCI regions associated with PCI device @pdev as + * being reserved by owner @res_name. Do not access any + * address inside the PCI regions unless this call returns + * successfully. + * + * pci_request_regions_exclusive() will mark the region so that + * /dev/mem and the sysfs MMIO access will not be allowed. + * + * Returns 0 on success, or %EBUSY on error. A warning + * message is also printed on failure. + */ +int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name) +{ + return pci_request_selected_regions_exclusive(pdev, + ((1 << 6) - 1), res_name); +} + + /** * pci_set_master - enables bus-mastering for device dev * @dev: the PCI device to enable @@ -2149,10 +2229,13 @@ EXPORT_SYMBOL(pci_find_capability); EXPORT_SYMBOL(pci_bus_find_capability); EXPORT_SYMBOL(pci_release_regions); EXPORT_SYMBOL(pci_request_regions); +EXPORT_SYMBOL(pci_request_regions_exclusive); EXPORT_SYMBOL(pci_release_region); EXPORT_SYMBOL(pci_request_region); +EXPORT_SYMBOL(pci_request_region_exclusive); EXPORT_SYMBOL(pci_release_selected_regions); EXPORT_SYMBOL(pci_request_selected_regions); +EXPORT_SYMBOL(pci_request_selected_regions_exclusive); EXPORT_SYMBOL(pci_set_master); EXPORT_SYMBOL(pci_set_mwi); EXPORT_SYMBOL(pci_try_set_mwi); diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 041e95aac2b..f6bb2ca8e3b 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -49,6 +49,7 @@ struct resource_list { #define IORESOURCE_SIZEALIGN 0x00020000 /* size indicates alignment */ #define IORESOURCE_STARTALIGN 0x00040000 /* start field is alignment */ +#define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 #define IORESOURCE_AUTO 0x40000000 @@ -133,13 +134,16 @@ static inline unsigned long resource_type(struct resource *res) } /* Convenience shorthand with allocation */ -#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name)) -#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name)) +#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) +#define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) +#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) +#define request_mem_region_exclusive(start,n,name) \ + __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) #define rename_region(region, newname) do { (region)->name = (newname); } while (0) extern struct resource * __request_region(struct resource *, resource_size_t start, - resource_size_t n, const char *name); + resource_size_t n, const char *name, int relaxed); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) @@ -175,6 +179,7 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); +extern int iomem_is_exclusive(u64 addr); #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 59a3dc2059d..bfcb39ca887 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -686,10 +686,13 @@ void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(struct pci_dev *, u8, u8)); #define HAVE_PCI_REQ_REGIONS 2 int __must_check pci_request_regions(struct pci_dev *, const char *); +int __must_check pci_request_regions_exclusive(struct pci_dev *, const char *); void pci_release_regions(struct pci_dev *); int __must_check pci_request_region(struct pci_dev *, int, const char *); +int __must_check pci_request_region_exclusive(struct pci_dev *, int, const char *); void pci_release_region(struct pci_dev *, int); int pci_request_selected_regions(struct pci_dev *, int, const char *); +int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *); void pci_release_selected_regions(struct pci_dev *, int); /* drivers/pci/bus.c */ diff --git a/kernel/resource.c b/kernel/resource.c index e633106b12f..ca6a1536b20 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -623,7 +623,7 @@ resource_size_t resource_alignment(struct resource *res) */ struct resource * __request_region(struct resource *parent, resource_size_t start, resource_size_t n, - const char *name) + const char *name, int flags) { struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); @@ -634,6 +634,7 @@ struct resource * __request_region(struct resource *parent, res->start = start; res->end = start + n - 1; res->flags = IORESOURCE_BUSY; + res->flags |= flags; write_lock(&resource_lock); @@ -679,7 +680,7 @@ int __check_region(struct resource *parent, resource_size_t start, { struct resource * res; - res = __request_region(parent, start, n, "check-region"); + res = __request_region(parent, start, n, "check-region", 0); if (!res) return -EBUSY; @@ -776,7 +777,7 @@ struct resource * __devm_request_region(struct device *dev, dr->start = start; dr->n = n; - res = __request_region(parent, start, n, name); + res = __request_region(parent, start, n, name, 0); if (res) devres_add(dev, dr); else @@ -876,3 +877,57 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size) return err; } + +#ifdef CONFIG_STRICT_DEVMEM +static int strict_iomem_checks = 1; +#else +static int strict_iomem_checks; +#endif + +/* + * check if an address is reserved in the iomem resource tree + * returns 1 if reserved, 0 if not reserved. + */ +int iomem_is_exclusive(u64 addr) +{ + struct resource *p = &iomem_resource; + int err = 0; + loff_t l; + int size = PAGE_SIZE; + + if (!strict_iomem_checks) + return 0; + + addr = addr & PAGE_MASK; + + read_lock(&resource_lock); + for (p = p->child; p ; p = r_next(NULL, p, &l)) { + /* + * We can probably skip the resources without + * IORESOURCE_IO attribute? + */ + if (p->start >= addr + size) + break; + if (p->end < addr) + continue; + if (p->flags & IORESOURCE_BUSY && + p->flags & IORESOURCE_EXCLUSIVE) { + err = 1; + break; + } + } + read_unlock(&resource_lock); + + return err; +} + +static int __init strict_iomem(char *str) +{ + if (strstr(str, "relaxed")) + strict_iomem_checks = 0; + if (strstr(str, "strict")) + strict_iomem_checks = 1; + return 1; +} + +__setup("iomem=", strict_iomem); -- cgit v1.2.3-70-g09d2 From 6a479079c07211bf348ac8a79754f26bea258f26 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 23 Dec 2008 03:08:29 +0000 Subject: PCI: Add pci_clear_master() as opposite of pci_set_master() During an online device reset it may be useful to disable bus-mastering. pci_disable_device() does that, and far more besides, so is not suitable for an online reset. Add pci_clear_master() which does just this. Signed-off-by: Ben Hutchings Reviewed-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- Documentation/PCI/pci.txt | 3 ++- drivers/pci/pci.c | 39 ++++++++++++++++++++++++++++----------- include/linux/pci.h | 1 + 3 files changed, 31 insertions(+), 12 deletions(-) (limited to 'Documentation') diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt index fd4907a2968..7f6de6ea5b4 100644 --- a/Documentation/PCI/pci.txt +++ b/Documentation/PCI/pci.txt @@ -294,7 +294,8 @@ NOTE: pci_enable_device() can fail! Check the return value. pci_set_master() will enable DMA by setting the bus master bit in the PCI_COMMAND register. It also fixes the latency timer value if -it's set to something bogus by the BIOS. +it's set to something bogus by the BIOS. pci_clear_master() will +disable DMA by clearing the bus master bit. If the PCI device can use the PCI Memory-Write-Invalidate transaction, call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c824dc8d617..f3fd55df67d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1667,6 +1667,22 @@ int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name) ((1 << 6) - 1), res_name); } +static void __pci_set_master(struct pci_dev *dev, bool enable) +{ + u16 old_cmd, cmd; + + pci_read_config_word(dev, PCI_COMMAND, &old_cmd); + if (enable) + cmd = old_cmd | PCI_COMMAND_MASTER; + else + cmd = old_cmd & ~PCI_COMMAND_MASTER; + if (cmd != old_cmd) { + dev_dbg(&dev->dev, "%s bus mastering\n", + enable ? "enabling" : "disabling"); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + dev->is_busmaster = enable; +} /** * pci_set_master - enables bus-mastering for device dev @@ -1675,21 +1691,21 @@ int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name) * Enables bus-mastering on the device and calls pcibios_set_master() * to do the needed arch specific settings. */ -void -pci_set_master(struct pci_dev *dev) +void pci_set_master(struct pci_dev *dev) { - u16 cmd; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - if (! (cmd & PCI_COMMAND_MASTER)) { - dev_dbg(&dev->dev, "enabling bus mastering\n"); - cmd |= PCI_COMMAND_MASTER; - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - dev->is_busmaster = 1; + __pci_set_master(dev, true); pcibios_set_master(dev); } +/** + * pci_clear_master - disables bus-mastering for device dev + * @dev: the PCI device to disable + */ +void pci_clear_master(struct pci_dev *dev) +{ + __pci_set_master(dev, false); +} + #ifdef PCI_DISABLE_MWI int pci_set_mwi(struct pci_dev *dev) { @@ -2346,6 +2362,7 @@ EXPORT_SYMBOL(pci_release_selected_regions); EXPORT_SYMBOL(pci_request_selected_regions); EXPORT_SYMBOL(pci_request_selected_regions_exclusive); EXPORT_SYMBOL(pci_set_master); +EXPORT_SYMBOL(pci_clear_master); EXPORT_SYMBOL(pci_set_mwi); EXPORT_SYMBOL(pci_try_set_mwi); EXPORT_SYMBOL(pci_clear_mwi); diff --git a/include/linux/pci.h b/include/linux/pci.h index 7cbecef19bb..0f6d2bb1df9 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -642,6 +642,7 @@ static inline int pci_is_managed(struct pci_dev *pdev) void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); +void pci_clear_master(struct pci_dev *dev); int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state); #define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); -- cgit v1.2.3-70-g09d2 From 4f628248a578585472e19e4cba2c604643af8c6c Mon Sep 17 00:00:00 2001 From: Jike Song Date: Mon, 5 Jan 2009 14:57:03 +0800 Subject: kbuild: reintroduce ALLSOURCE_ARCHS support for tags/cscope This patch reintroduce the ALLSOURCE_ARCHS support for tags/TAGS/ cscope targets. The Kbuild previously has this feature, but after moving the targets into scripts/tags.sh, ALLSOURCE_ARCHS disappears. It's something like this: $ make ALLSOURCE_ARCHS="x86 mips arm" tags cscope Signed-off-by: Jike Song Signed-off-by: Sam Ravnborg --- Documentation/kbuild/kbuild.txt | 7 +++++++ scripts/tags.sh | 18 +++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index 51771847e81..923f9ddee8f 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -124,3 +124,10 @@ KBUILD_EXTRA_SYMBOLS -------------------------------------------------- For modules use symbols from another modules. See more details in modules.txt. + +ALLSOURCE_ARCHS +-------------------------------------------------- +For tags/TAGS/cscope targets, you can specify more than one archs +to be included in the databases, separated by blankspace. e.g. + + $ make ALLSOURCE_ARCHS="x86 mips arm" tags diff --git a/scripts/tags.sh b/scripts/tags.sh index 9e3451d2c3a..fdbe78bb5e2 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -24,6 +24,11 @@ else tree=${srctree}/ fi +# Detect if ALLSOURCE_ARCHS is set. If not, we assume SRCARCH +if [ "${ALLSOURCE_ARCHS}" = "" ]; then + ALLSOURCE_ARCHS=${SRCARCH} +fi + # find sources in arch/$ARCH find_arch_sources() { @@ -54,26 +59,29 @@ find_other_sources() find_sources() { find_arch_sources $1 "$2" - find_include_sources "$2" - find_other_sources "$2" } all_sources() { - find_sources $SRCARCH '*.[chS]' + for arch in $ALLSOURCE_ARCHS + do + find_sources $arch '*.[chS]' + done if [ ! -z "$archinclude" ]; then find_arch_include_sources $archinclude '*.[chS]' fi + find_include_sources '*.[chS]' + find_other_sources '*.[chS]' } all_kconfigs() { - find_sources $SRCARCH 'Kconfig*' + find_sources $ALLSOURCE_ARCHS 'Kconfig*' } all_defconfigs() { - find_sources $SRCARCH "defconfig" + find_sources $ALLSOURCE_ARCHS "defconfig" } docscope() -- cgit v1.2.3-70-g09d2 From baa91878ab9b0f1cdb7ab03b53ee2e4389245644 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 6 Jan 2009 15:12:27 +0100 Subject: kbuild: fix typos (s/bin_shipped/bin.o_shipped/) in Documentation The text always mentions ...bin.o_shipped, just the example makefiles actually use ...bin_shipped. It was corrected in one place some time ago, these ones seem to have been forgotten. Signed-off-by: Wolfram Sang Signed-off-by: Sam Ravnborg --- Documentation/kbuild/modules.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt index 1821c077b43..b1096da953c 100644 --- a/Documentation/kbuild/modules.txt +++ b/Documentation/kbuild/modules.txt @@ -253,7 +253,7 @@ following files: # Module specific targets genbin: - echo "X" > 8123_bin_shipped + echo "X" > 8123_bin.o_shipped In example 2, we are down to two fairly simple files and for simple @@ -279,7 +279,7 @@ following files: # Module specific targets genbin: - echo "X" > 8123_bin_shipped + echo "X" > 8123_bin.o_shipped endif -- cgit v1.2.3-70-g09d2 From 8feae13110d60cc6287afabc2887366b0eb226c2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: NOMMU: Make VMAs per MM as for MMU-mode linux Make VMAs per mm_struct as for MMU-mode linux. This solves two problems: (1) In SYSV SHM where nattch for a segment does not reflect the number of shmat's (and forks) done. (2) In mmap() where the VMA's vm_mm is set to point to the parent mm by an exec'ing process when VM_EXECUTABLE is specified, regardless of the fact that a VMA might be shared and already have its vm_mm assigned to another process or a dead process. A new struct (vm_region) is introduced to track a mapped region and to remember the circumstances under which it may be shared and the vm_list_struct structure is discarded as it's no longer required. This patch makes the following additional changes: (1) Regions are now allocated with alloc_pages() rather than kmalloc() and with no recourse to __GFP_COMP, so the pages are not composite. Instead, each page has a reference on it held by the region. Anything else that is interested in such a page will have to get a reference on it to retain it. When the pages are released due to unmapping, each page is passed to put_page() and will be freed when the page usage count reaches zero. (2) Excess pages are trimmed after an allocation as the allocation must be made as a power-of-2 quantity of pages. (3) VMAs are added to the parent MM's R/B tree and mmap lists. As an MM may end up with overlapping VMAs within the tree, the VMA struct address is appended to the sort key. (4) Non-anonymous VMAs are now added to the backing inode's prio list. (5) Holes may be punched in anonymous VMAs with munmap(), releasing parts of the backing region. The VMA and region structs will be split if necessary. (6) sys_shmdt() only releases one attachment to a SYSV IPC shared memory segment instead of all the attachments at that addresss. Multiple shmat()'s return the same address under NOMMU-mode instead of different virtual addresses as under MMU-mode. (7) Core dumping for ELF-FDPIC requires fewer exceptions for NOMMU-mode. (8) /proc/maps is now the global list of mapped regions, and may list bits that aren't actually mapped anywhere. (9) /proc/meminfo gains a line (tagged "MmapCopy") that indicates the amount of RAM currently allocated by mmap to hold mappable regions that can't be mapped directly. These are copies of the backing device or file if not anonymous. These changes make NOMMU mode more similar to MMU mode. The downside is that NOMMU mode requires some extra memory to track things over NOMMU without this patch (VMAs are no longer shared, and there are now region structs). Signed-off-by: David Howells Tested-by: Mike Frysinger Acked-by: Paul Mundt --- Documentation/nommu-mmap.txt | 18 +- arch/arm/include/asm/mmu.h | 1 - arch/blackfin/include/asm/mmu.h | 1 - arch/blackfin/kernel/ptrace.c | 6 +- arch/blackfin/kernel/traps.c | 11 +- arch/frv/kernel/ptrace.c | 11 +- arch/h8300/include/asm/mmu.h | 1 - arch/m68knommu/include/asm/mmu.h | 1 - arch/sh/include/asm/mmu.h | 1 - fs/binfmt_elf_fdpic.c | 27 +- fs/proc/internal.h | 2 - fs/proc/meminfo.c | 6 + fs/proc/nommu.c | 71 ++- fs/proc/task_nommu.c | 108 +++-- include/asm-frv/mmu.h | 1 - include/asm-m32r/mmu.h | 1 - include/linux/mm.h | 18 +- include/linux/mm_types.h | 18 +- ipc/shm.c | 12 + kernel/fork.c | 4 +- lib/Kconfig.debug | 7 + mm/mmap.c | 10 + mm/nommu.c | 960 +++++++++++++++++++++++++++------------ 23 files changed, 860 insertions(+), 436 deletions(-) (limited to 'Documentation') diff --git a/Documentation/nommu-mmap.txt b/Documentation/nommu-mmap.txt index 7714f57caad..02b89dcf38a 100644 --- a/Documentation/nommu-mmap.txt +++ b/Documentation/nommu-mmap.txt @@ -109,12 +109,18 @@ and it's also much more restricted in the latter case: FURTHER NOTES ON NO-MMU MMAP ============================ - (*) A request for a private mapping of less than a page in size may not return - a page-aligned buffer. This is because the kernel calls kmalloc() to - allocate the buffer, not get_free_page(). - - (*) A list of all the mappings on the system is visible through /proc/maps in - no-MMU mode. + (*) A request for a private mapping of a file may return a buffer that is not + page-aligned. This is because XIP may take place, and the data may not be + paged aligned in the backing store. + + (*) A request for an anonymous mapping will always be page aligned. If + possible the size of the request should be a power of two otherwise some + of the space may be wasted as the kernel must allocate a power-of-2 + granule but will only discard the excess if appropriately configured as + this has an effect on fragmentation. + + (*) A list of all the private copy and anonymous mappings on the system is + visible through /proc/maps in no-MMU mode. (*) A list of all the mappings in use by a process is visible through /proc//maps in no-MMU mode. diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 53099d4ee42..b561584d04a 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -24,7 +24,6 @@ typedef struct { * modified for 2.6 by Hyok S. Choi */ typedef struct { - struct vm_list_struct *vmlist; unsigned long end_brk; } mm_context_t; diff --git a/arch/blackfin/include/asm/mmu.h b/arch/blackfin/include/asm/mmu.h index 757e43906ed..dbfd686360e 100644 --- a/arch/blackfin/include/asm/mmu.h +++ b/arch/blackfin/include/asm/mmu.h @@ -10,7 +10,6 @@ struct sram_list_struct { }; typedef struct { - struct vm_list_struct *vmlist; unsigned long end_brk; unsigned long stack_start; diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index d2d38853663..594e325b40e 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -160,15 +160,15 @@ put_reg(struct task_struct *task, int regno, unsigned long data) static inline int is_user_addr_valid(struct task_struct *child, unsigned long start, unsigned long len) { - struct vm_list_struct *vml; + struct vm_area_struct *vma; struct sram_list_struct *sraml; /* overflow */ if (start + len < start) return -EIO; - for (vml = child->mm->context.vmlist; vml; vml = vml->next) - if (start >= vml->vma->vm_start && start + len < vml->vma->vm_end) + vma = find_vma(child->mm, start); + if (vma && start >= vma->vm_start && start + len <= vma->vm_end) return 0; for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next) diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c index 17d8e417289..5b0667da8d0 100644 --- a/arch/blackfin/kernel/traps.c +++ b/arch/blackfin/kernel/traps.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -83,6 +84,7 @@ static void decode_address(char *buf, unsigned long address) struct mm_struct *mm; unsigned long flags, offset; unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic(); + struct rb_node *n; #ifdef CONFIG_KALLSYMS unsigned long symsize; @@ -128,9 +130,10 @@ static void decode_address(char *buf, unsigned long address) if (!mm) continue; - vml = mm->context.vmlist; - while (vml) { - struct vm_area_struct *vma = vml->vma; + for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { + struct vm_area_struct *vma; + + vma = rb_entry(n, struct vm_area_struct, vm_rb); if (address >= vma->vm_start && address < vma->vm_end) { char _tmpbuf[256]; @@ -176,8 +179,6 @@ static void decode_address(char *buf, unsigned long address) goto done; } - - vml = vml->next; } if (!in_atomic) mmput(mm); diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c index 709e9bdc612..5e7d401d21e 100644 --- a/arch/frv/kernel/ptrace.c +++ b/arch/frv/kernel/ptrace.c @@ -69,7 +69,8 @@ static inline int put_reg(struct task_struct *task, int regno, } /* - * check that an address falls within the bounds of the target process's memory mappings + * check that an address falls within the bounds of the target process's memory + * mappings */ static inline int is_user_addr_valid(struct task_struct *child, unsigned long start, unsigned long len) @@ -79,11 +80,11 @@ static inline int is_user_addr_valid(struct task_struct *child, return -EIO; return 0; #else - struct vm_list_struct *vml; + struct vm_area_struct *vma; - for (vml = child->mm->context.vmlist; vml; vml = vml->next) - if (start >= vml->vma->vm_start && start + len <= vml->vma->vm_end) - return 0; + vma = find_vma(child->mm, start); + if (vma && start >= vma->vm_start && start + len <= vma->vm_end) + return 0; return -EIO; #endif diff --git a/arch/h8300/include/asm/mmu.h b/arch/h8300/include/asm/mmu.h index 2ce06ea4610..31309969df7 100644 --- a/arch/h8300/include/asm/mmu.h +++ b/arch/h8300/include/asm/mmu.h @@ -4,7 +4,6 @@ /* Copyright (C) 2002, David McCullough */ typedef struct { - struct vm_list_struct *vmlist; unsigned long end_brk; } mm_context_t; diff --git a/arch/m68knommu/include/asm/mmu.h b/arch/m68knommu/include/asm/mmu.h index 5fa6b68353b..e2da1e6f09f 100644 --- a/arch/m68knommu/include/asm/mmu.h +++ b/arch/m68knommu/include/asm/mmu.h @@ -4,7 +4,6 @@ /* Copyright (C) 2002, David McCullough */ typedef struct { - struct vm_list_struct *vmlist; unsigned long end_brk; } mm_context_t; diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h index fdcb93bc6d1..6c43625bb1a 100644 --- a/arch/sh/include/asm/mmu.h +++ b/arch/sh/include/asm/mmu.h @@ -9,7 +9,6 @@ typedef struct { mm_context_id_t id; void *vdso; #else - struct vm_list_struct *vmlist; unsigned long end_brk; #endif #ifdef CONFIG_BINFMT_ELF_FDPIC diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index aa5b43205e3..22baf1b1349 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1567,11 +1567,9 @@ end_coredump: static int elf_fdpic_dump_segments(struct file *file, size_t *size, unsigned long *limit, unsigned long mm_flags) { - struct vm_list_struct *vml; - - for (vml = current->mm->context.vmlist; vml; vml = vml->next) { - struct vm_area_struct *vma = vml->vma; + struct vm_area_struct *vma; + for (vma = current->mm->mmap; vma; vma = vma->vm_next) { if (!maydump(vma, mm_flags)) continue; @@ -1617,9 +1615,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, elf_fpxregset_t *xfpu = NULL; #endif int thread_status_size = 0; -#ifndef CONFIG_MMU - struct vm_list_struct *vml; -#endif elf_addr_t *auxv; unsigned long mm_flags; @@ -1685,13 +1680,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, fill_prstatus(prstatus, current, signr); elf_core_copy_regs(&prstatus->pr_reg, regs); -#ifdef CONFIG_MMU segs = current->mm->map_count; -#else - segs = 0; - for (vml = current->mm->context.vmlist; vml; vml = vml->next) - segs++; -#endif #ifdef ELF_CORE_EXTRA_PHDRS segs += ELF_CORE_EXTRA_PHDRS; #endif @@ -1766,20 +1755,10 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, mm_flags = current->mm->flags; /* write program headers for segments dump */ - for ( -#ifdef CONFIG_MMU - vma = current->mm->mmap; vma; vma = vma->vm_next -#else - vml = current->mm->context.vmlist; vml; vml = vml->next -#endif - ) { + for (vma = current->mm->mmap; vma; vma = vma->vm_next) { struct elf_phdr phdr; size_t sz; -#ifndef CONFIG_MMU - vma = vml->vma; -#endif - sz = vma->vm_end - vma->vm_start; phdr.p_type = PT_LOAD; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 3e8aeb8b61c..cd53ff83849 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -41,8 +41,6 @@ do { \ (vmi)->used = 0; \ (vmi)->largest_chunk = 0; \ } while(0) - -extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); #endif extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index b1675c4e66d..43d23948384 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -73,6 +73,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "HighFree: %8lu kB\n" "LowTotal: %8lu kB\n" "LowFree: %8lu kB\n" +#endif +#ifndef CONFIG_MMU + "MmapCopy: %8lu kB\n" #endif "SwapTotal: %8lu kB\n" "SwapFree: %8lu kB\n" @@ -115,6 +118,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(i.freehigh), K(i.totalram-i.totalhigh), K(i.freeram-i.freehigh), +#endif +#ifndef CONFIG_MMU + K((unsigned long) atomic_read(&mmap_pages_allocated)), #endif K(i.totalswap), K(i.freeswap), diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 3f87d263294..b446d7ad0b0 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -33,33 +33,33 @@ #include "internal.h" /* - * display a single VMA to a sequenced file + * display a single region to a sequenced file */ -int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) +static int nommu_region_show(struct seq_file *m, struct vm_region *region) { unsigned long ino = 0; struct file *file; dev_t dev = 0; int flags, len; - flags = vma->vm_flags; - file = vma->vm_file; + flags = region->vm_flags; + file = region->vm_file; if (file) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = region->vm_file->f_path.dentry->d_inode; dev = inode->i_sb->s_dev; ino = inode->i_ino; } seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", - vma->vm_start, - vma->vm_end, + region->vm_start, + region->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', - ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, + ((loff_t)region->vm_pgoff) << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); if (file) { @@ -75,61 +75,54 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) } /* - * display a list of all the VMAs the kernel knows about + * display a list of all the REGIONs the kernel knows about * - nommu kernals have a single flat list */ -static int nommu_vma_list_show(struct seq_file *m, void *v) +static int nommu_region_list_show(struct seq_file *m, void *_p) { - struct vm_area_struct *vma; + struct rb_node *p = _p; - vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb); - return nommu_vma_show(m, vma); + return nommu_region_show(m, rb_entry(p, struct vm_region, vm_rb)); } -static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) +static void *nommu_region_list_start(struct seq_file *m, loff_t *_pos) { - struct rb_node *_rb; + struct rb_node *p; loff_t pos = *_pos; - void *next = NULL; - down_read(&nommu_vma_sem); + down_read(&nommu_region_sem); - for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) { - if (pos == 0) { - next = _rb; - break; - } - pos--; - } - - return next; + for (p = rb_first(&nommu_region_tree); p; p = rb_next(p)) + if (pos-- == 0) + return p; + return NULL; } -static void nommu_vma_list_stop(struct seq_file *m, void *v) +static void nommu_region_list_stop(struct seq_file *m, void *v) { - up_read(&nommu_vma_sem); + up_read(&nommu_region_sem); } -static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos) +static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos) { (*pos)++; return rb_next((struct rb_node *) v); } -static const struct seq_operations proc_nommu_vma_list_seqop = { - .start = nommu_vma_list_start, - .next = nommu_vma_list_next, - .stop = nommu_vma_list_stop, - .show = nommu_vma_list_show +static struct seq_operations proc_nommu_region_list_seqop = { + .start = nommu_region_list_start, + .next = nommu_region_list_next, + .stop = nommu_region_list_stop, + .show = nommu_region_list_show }; -static int proc_nommu_vma_list_open(struct inode *inode, struct file *file) +static int proc_nommu_region_list_open(struct inode *inode, struct file *file) { - return seq_open(file, &proc_nommu_vma_list_seqop); + return seq_open(file, &proc_nommu_region_list_seqop); } -static const struct file_operations proc_nommu_vma_list_operations = { - .open = proc_nommu_vma_list_open, +static const struct file_operations proc_nommu_region_list_operations = { + .open = proc_nommu_region_list_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -137,7 +130,7 @@ static const struct file_operations proc_nommu_vma_list_operations = { static int __init proc_nommu_init(void) { - proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations); + proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations); return 0; } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index d4a8be32b90..ca4a48d0d31 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -15,25 +15,25 @@ */ void task_mem(struct seq_file *m, struct mm_struct *mm) { - struct vm_list_struct *vml; + struct vm_area_struct *vma; + struct rb_node *p; unsigned long bytes = 0, sbytes = 0, slack = 0; down_read(&mm->mmap_sem); - for (vml = mm->context.vmlist; vml; vml = vml->next) { - if (!vml->vma) - continue; + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { + vma = rb_entry(p, struct vm_area_struct, vm_rb); - bytes += kobjsize(vml); + bytes += kobjsize(vma); if (atomic_read(&mm->mm_count) > 1 || - atomic_read(&vml->vma->vm_usage) > 1 - ) { - sbytes += kobjsize((void *) vml->vma->vm_start); - sbytes += kobjsize(vml->vma); + vma->vm_region || + vma->vm_flags & VM_MAYSHARE) { + sbytes += kobjsize((void *) vma->vm_start); + if (vma->vm_region) + sbytes += kobjsize(vma->vm_region); } else { - bytes += kobjsize((void *) vml->vma->vm_start); - bytes += kobjsize(vml->vma); - slack += kobjsize((void *) vml->vma->vm_start) - - (vml->vma->vm_end - vml->vma->vm_start); + bytes += kobjsize((void *) vma->vm_start); + slack += kobjsize((void *) vma->vm_start) - + (vma->vm_end - vma->vm_start); } } @@ -70,13 +70,14 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) unsigned long task_vsize(struct mm_struct *mm) { - struct vm_list_struct *tbp; + struct vm_area_struct *vma; + struct rb_node *p; unsigned long vsize = 0; down_read(&mm->mmap_sem); - for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { - if (tbp->vma) - vsize += kobjsize((void *) tbp->vma->vm_start); + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { + vma = rb_entry(p, struct vm_area_struct, vm_rb); + vsize += vma->vm_region->vm_end - vma->vm_region->vm_start; } up_read(&mm->mmap_sem); return vsize; @@ -85,16 +86,15 @@ unsigned long task_vsize(struct mm_struct *mm) int task_statm(struct mm_struct *mm, int *shared, int *text, int *data, int *resident) { - struct vm_list_struct *tbp; + struct vm_area_struct *vma; + struct rb_node *p; int size = kobjsize(mm); down_read(&mm->mmap_sem); - for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { - size += kobjsize(tbp); - if (tbp->vma) { - size += kobjsize(tbp->vma); - size += kobjsize((void *) tbp->vma->vm_start); - } + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { + vma = rb_entry(p, struct vm_area_struct, vm_rb); + size += kobjsize(vma); + size += kobjsize((void *) vma->vm_start); } size += (*text = mm->end_code - mm->start_code); @@ -104,21 +104,63 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return size; } +/* + * display a single VMA to a sequenced file + */ +static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) +{ + unsigned long ino = 0; + struct file *file; + dev_t dev = 0; + int flags, len; + + flags = vma->vm_flags; + file = vma->vm_file; + + if (file) { + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + dev = inode->i_sb->s_dev; + ino = inode->i_ino; + } + + seq_printf(m, + "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + vma->vm_start, + vma->vm_end, + flags & VM_READ ? 'r' : '-', + flags & VM_WRITE ? 'w' : '-', + flags & VM_EXEC ? 'x' : '-', + flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', + vma->vm_pgoff << PAGE_SHIFT, + MAJOR(dev), MINOR(dev), ino, &len); + + if (file) { + len = 25 + sizeof(void *) * 6 - len; + if (len < 1) + len = 1; + seq_printf(m, "%*c", len, ' '); + seq_path(m, &file->f_path, ""); + } + + seq_putc(m, '\n'); + return 0; +} + /* * display mapping lines for a particular process's /proc/pid/maps */ -static int show_map(struct seq_file *m, void *_vml) +static int show_map(struct seq_file *m, void *_p) { - struct vm_list_struct *vml = _vml; + struct rb_node *p = _p; - return nommu_vma_show(m, vml->vma); + return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb)); } static void *m_start(struct seq_file *m, loff_t *pos) { struct proc_maps_private *priv = m->private; - struct vm_list_struct *vml; struct mm_struct *mm; + struct rb_node *p; loff_t n = *pos; /* pin the task and mm whilst we play with them */ @@ -134,9 +176,9 @@ static void *m_start(struct seq_file *m, loff_t *pos) } /* start from the Nth VMA */ - for (vml = mm->context.vmlist; vml; vml = vml->next) + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) if (n-- == 0) - return vml; + return p; return NULL; } @@ -152,12 +194,12 @@ static void m_stop(struct seq_file *m, void *_vml) } } -static void *m_next(struct seq_file *m, void *_vml, loff_t *pos) +static void *m_next(struct seq_file *m, void *_p, loff_t *pos) { - struct vm_list_struct *vml = _vml; + struct rb_node *p = _p; (*pos)++; - return vml ? vml->next : NULL; + return p ? rb_next(p) : NULL; } static const struct seq_operations proc_pid_maps_ops = { diff --git a/include/asm-frv/mmu.h b/include/asm-frv/mmu.h index 22c03714fb1..86ca0e86e7d 100644 --- a/include/asm-frv/mmu.h +++ b/include/asm-frv/mmu.h @@ -22,7 +22,6 @@ typedef struct { unsigned long dtlb_ptd_mapping; /* [DAMR5] PTD mapping for dtlb cached PGE */ #else - struct vm_list_struct *vmlist; unsigned long end_brk; #endif diff --git a/include/asm-m32r/mmu.h b/include/asm-m32r/mmu.h index d9bd724479c..150cb92bb66 100644 --- a/include/asm-m32r/mmu.h +++ b/include/asm-m32r/mmu.h @@ -4,7 +4,6 @@ #if !defined(CONFIG_MMU) typedef struct { - struct vm_list_struct *vmlist; unsigned long end_brk; } mm_context_t; diff --git a/include/linux/mm.h b/include/linux/mm.h index 4a3d28c8644..b91a73fd1bc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -56,19 +56,9 @@ extern unsigned long mmap_min_addr; extern struct kmem_cache *vm_area_cachep; -/* - * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is - * disabled, then there's a single shared list of VMAs maintained by the - * system, and mm's subscribe to these individually - */ -struct vm_list_struct { - struct vm_list_struct *next; - struct vm_area_struct *vma; -}; - #ifndef CONFIG_MMU -extern struct rb_root nommu_vma_tree; -extern struct rw_semaphore nommu_vma_sem; +extern struct rb_root nommu_region_tree; +extern struct rw_semaphore nommu_region_sem; extern unsigned int kobjsize(const void *objp); #endif @@ -1061,6 +1051,7 @@ extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, enum memmap_context); extern void setup_per_zone_pages_min(void); extern void mem_init(void); +extern void __init mmap_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); @@ -1072,6 +1063,9 @@ extern void setup_per_cpu_pageset(void); static inline void setup_per_cpu_pageset(void) {} #endif +/* nommu.c */ +extern atomic_t mmap_pages_allocated; + /* prio_tree.c */ void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9cfc9b627fd..1c1e0d3a171 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -96,6 +96,22 @@ struct page { #endif /* WANT_PAGE_VIRTUAL */ }; +/* + * A region containing a mapping of a non-memory backed file under NOMMU + * conditions. These are held in a global tree and are pinned by the VMAs that + * map parts of them. + */ +struct vm_region { + struct rb_node vm_rb; /* link in global region tree */ + unsigned long vm_flags; /* VMA vm_flags */ + unsigned long vm_start; /* start address of region */ + unsigned long vm_end; /* region initialised to here */ + unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ + struct file *vm_file; /* the backing file or NULL */ + + atomic_t vm_usage; /* region usage count */ +}; + /* * This struct defines a memory VMM memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -152,7 +168,7 @@ struct vm_area_struct { unsigned long vm_truncate_count;/* truncate_count or restart_addr */ #ifndef CONFIG_MMU - atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */ + struct vm_region *vm_region; /* NOMMU mapping region */ #endif #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ diff --git a/ipc/shm.c b/ipc/shm.c index b125b560240..d0ab5527bf4 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -990,6 +990,7 @@ asmlinkage long sys_shmdt(char __user *shmaddr) */ vma = find_vma(mm, addr); +#ifdef CONFIG_MMU while (vma) { next = vma->vm_next; @@ -1034,6 +1035,17 @@ asmlinkage long sys_shmdt(char __user *shmaddr) vma = next; } +#else /* CONFIG_MMU */ + /* under NOMMU conditions, the exact address to be destroyed must be + * given */ + retval = -EINVAL; + if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + retval = 0; + } + +#endif + up_write(&mm->mmap_sem); return retval; } diff --git a/kernel/fork.c b/kernel/fork.c index 7b8f2a78be3..0bce4a43bb3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1481,12 +1481,10 @@ void __init proc_caches_init(void) fs_cachep = kmem_cache_create("fs_cache", sizeof(struct fs_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - vm_area_cachep = kmem_cache_create("vm_area_struct", - sizeof(struct vm_area_struct), 0, - SLAB_PANIC, NULL); mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + mmap_init(); } /* diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2e75478e9c6..d0a32aab03f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -512,6 +512,13 @@ config DEBUG_VIRTUAL If unsure, say N. +config DEBUG_NOMMU_REGIONS + bool "Debug the global anon/private NOMMU mapping region tree" + depends on DEBUG_KERNEL && !MMU + help + This option causes the global tree of anonymous and private mapping + regions to be regularly checked for invalid topology. + config DEBUG_WRITECOUNT bool "Debug filesystem writers count" depends on DEBUG_KERNEL diff --git a/mm/mmap.c b/mm/mmap.c index a910c045cfd..749623196cb 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2472,3 +2472,13 @@ void mm_drop_all_locks(struct mm_struct *mm) mutex_unlock(&mm_all_locks_mutex); } + +/* + * initialise the VMA slab + */ +void __init mmap_init(void) +{ + vm_area_cachep = kmem_cache_create("vm_area_struct", + sizeof(struct vm_area_struct), 0, + SLAB_PANIC, NULL); +} diff --git a/mm/nommu.c b/mm/nommu.c index 23f355bbe26..0d363dfcf10 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -6,7 +6,7 @@ * * See Documentation/nommu-mmap.txt * - * Copyright (c) 2004-2005 David Howells + * Copyright (c) 2004-2008 David Howells * Copyright (c) 2000-2003 David McCullough * Copyright (c) 2000-2001 D Jeff Dionne * Copyright (c) 2002 Greg Ungerer @@ -33,6 +33,28 @@ #include #include #include +#include "internal.h" + +static inline __attribute__((format(printf, 1, 2))) +void no_printk(const char *fmt, ...) +{ +} + +#if 0 +#define kenter(FMT, ...) \ + printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) +#define kleave(FMT, ...) \ + printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) +#define kdebug(FMT, ...) \ + printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__) +#else +#define kenter(FMT, ...) \ + no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) +#define kleave(FMT, ...) \ + no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) +#define kdebug(FMT, ...) \ + no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__) +#endif #include "internal.h" @@ -46,12 +68,15 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int heap_stack_gap = 0; +atomic_t mmap_pages_allocated; + EXPORT_SYMBOL(mem_map); EXPORT_SYMBOL(num_physpages); -/* list of shareable VMAs */ -struct rb_root nommu_vma_tree = RB_ROOT; -DECLARE_RWSEM(nommu_vma_sem); +/* list of mapped, potentially shareable regions */ +static struct kmem_cache *vm_region_jar; +struct rb_root nommu_region_tree = RB_ROOT; +DECLARE_RWSEM(nommu_region_sem); struct vm_operations_struct generic_file_vm_ops = { }; @@ -400,129 +425,174 @@ asmlinkage unsigned long sys_brk(unsigned long brk) return mm->brk = brk; } -#ifdef DEBUG -static void show_process_blocks(void) +/* + * initialise the VMA and region record slabs + */ +void __init mmap_init(void) { - struct vm_list_struct *vml; - - printk("Process blocks %d:", current->pid); - - for (vml = ¤t->mm->context.vmlist; vml; vml = vml->next) { - printk(" %p: %p", vml, vml->vma); - if (vml->vma) - printk(" (%d @%lx #%d)", - kobjsize((void *) vml->vma->vm_start), - vml->vma->vm_start, - atomic_read(&vml->vma->vm_usage)); - printk(vml->next ? " ->" : ".\n"); - } + vm_region_jar = kmem_cache_create("vm_region_jar", + sizeof(struct vm_region), 0, + SLAB_PANIC, NULL); + vm_area_cachep = kmem_cache_create("vm_area_struct", + sizeof(struct vm_area_struct), 0, + SLAB_PANIC, NULL); } -#endif /* DEBUG */ /* - * add a VMA into a process's mm_struct in the appropriate place in the list - * - should be called with mm->mmap_sem held writelocked + * validate the region tree + * - the caller must hold the region lock */ -static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml) +#ifdef CONFIG_DEBUG_NOMMU_REGIONS +static noinline void validate_nommu_regions(void) { - struct vm_list_struct **ppv; + struct vm_region *region, *last; + struct rb_node *p, *lastp; - for (ppv = ¤t->mm->context.vmlist; *ppv; ppv = &(*ppv)->next) - if ((*ppv)->vma->vm_start > vml->vma->vm_start) - break; + lastp = rb_first(&nommu_region_tree); + if (!lastp) + return; + + last = rb_entry(lastp, struct vm_region, vm_rb); + if (unlikely(last->vm_end <= last->vm_start)) + BUG(); + + while ((p = rb_next(lastp))) { + region = rb_entry(p, struct vm_region, vm_rb); + last = rb_entry(lastp, struct vm_region, vm_rb); + + if (unlikely(region->vm_end <= region->vm_start)) + BUG(); + if (unlikely(region->vm_start < last->vm_end)) + BUG(); - vml->next = *ppv; - *ppv = vml; + lastp = p; + } } +#else +#define validate_nommu_regions() do {} while(0) +#endif /* - * look up the first VMA in which addr resides, NULL if none - * - should be called with mm->mmap_sem at least held readlocked + * add a region into the global tree */ -struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +static void add_nommu_region(struct vm_region *region) { - struct vm_list_struct *loop, *vml; + struct vm_region *pregion; + struct rb_node **p, *parent; - /* search the vm_start ordered list */ - vml = NULL; - for (loop = mm->context.vmlist; loop; loop = loop->next) { - if (loop->vma->vm_start > addr) - break; - vml = loop; + validate_nommu_regions(); + + BUG_ON(region->vm_start & ~PAGE_MASK); + + parent = NULL; + p = &nommu_region_tree.rb_node; + while (*p) { + parent = *p; + pregion = rb_entry(parent, struct vm_region, vm_rb); + if (region->vm_start < pregion->vm_start) + p = &(*p)->rb_left; + else if (region->vm_start > pregion->vm_start) + p = &(*p)->rb_right; + else if (pregion == region) + return; + else + BUG(); } - if (vml && vml->vma->vm_end > addr) - return vml->vma; + rb_link_node(®ion->vm_rb, parent, p); + rb_insert_color(®ion->vm_rb, &nommu_region_tree); - return NULL; + validate_nommu_regions(); } -EXPORT_SYMBOL(find_vma); /* - * find a VMA - * - we don't extend stack VMAs under NOMMU conditions + * delete a region from the global tree */ -struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) +static void delete_nommu_region(struct vm_region *region) { - return find_vma(mm, addr); -} + BUG_ON(!nommu_region_tree.rb_node); -int expand_stack(struct vm_area_struct *vma, unsigned long address) -{ - return -ENOMEM; + validate_nommu_regions(); + rb_erase(®ion->vm_rb, &nommu_region_tree); + validate_nommu_regions(); } /* - * look up the first VMA exactly that exactly matches addr - * - should be called with mm->mmap_sem at least held readlocked + * free a contiguous series of pages */ -static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm, - unsigned long addr) +static void free_page_series(unsigned long from, unsigned long to) { - struct vm_list_struct *vml; - - /* search the vm_start ordered list */ - for (vml = mm->context.vmlist; vml; vml = vml->next) { - if (vml->vma->vm_start == addr) - return vml->vma; - if (vml->vma->vm_start > addr) - break; + for (; from < to; from += PAGE_SIZE) { + struct page *page = virt_to_page(from); + + kdebug("- free %lx", from); + atomic_dec(&mmap_pages_allocated); + if (page_count(page) != 1) + kdebug("free page %p [%d]", page, page_count(page)); + put_page(page); } - - return NULL; } /* - * find a VMA in the global tree + * release a reference to a region + * - the caller must hold the region semaphore, which this releases + * - the region may not have been added to the tree yet, in which case vm_end + * will equal vm_start */ -static inline struct vm_area_struct *find_nommu_vma(unsigned long start) +static void __put_nommu_region(struct vm_region *region) + __releases(nommu_region_sem) { - struct vm_area_struct *vma; - struct rb_node *n = nommu_vma_tree.rb_node; + kenter("%p{%d}", region, atomic_read(®ion->vm_usage)); - while (n) { - vma = rb_entry(n, struct vm_area_struct, vm_rb); + BUG_ON(!nommu_region_tree.rb_node); - if (start < vma->vm_start) - n = n->rb_left; - else if (start > vma->vm_start) - n = n->rb_right; - else - return vma; + if (atomic_dec_and_test(®ion->vm_usage)) { + if (region->vm_end > region->vm_start) + delete_nommu_region(region); + up_write(&nommu_region_sem); + + if (region->vm_file) + fput(region->vm_file); + + /* IO memory and memory shared directly out of the pagecache + * from ramfs/tmpfs mustn't be released here */ + if (region->vm_flags & VM_MAPPED_COPY) { + kdebug("free series"); + free_page_series(region->vm_start, region->vm_end); + } + kmem_cache_free(vm_region_jar, region); + } else { + up_write(&nommu_region_sem); } +} - return NULL; +/* + * release a reference to a region + */ +static void put_nommu_region(struct vm_region *region) +{ + down_write(&nommu_region_sem); + __put_nommu_region(region); } /* - * add a VMA in the global tree + * add a VMA into a process's mm_struct in the appropriate place in the list + * and tree and add to the address space's page tree also if not an anonymous + * page + * - should be called with mm->mmap_sem held writelocked */ -static void add_nommu_vma(struct vm_area_struct *vma) +static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) { - struct vm_area_struct *pvma; + struct vm_area_struct *pvma, **pp; struct address_space *mapping; - struct rb_node **p = &nommu_vma_tree.rb_node; - struct rb_node *parent = NULL; + struct rb_node **p, *parent; + + kenter(",%p", vma); + + BUG_ON(!vma->vm_region); + + mm->map_count++; + vma->vm_mm = mm; /* add the VMA to the mapping */ if (vma->vm_file) { @@ -533,42 +603,62 @@ static void add_nommu_vma(struct vm_area_struct *vma) flush_dcache_mmap_unlock(mapping); } - /* add the VMA to the master list */ + /* add the VMA to the tree */ + parent = NULL; + p = &mm->mm_rb.rb_node; while (*p) { parent = *p; pvma = rb_entry(parent, struct vm_area_struct, vm_rb); - if (vma->vm_start < pvma->vm_start) { + /* sort by: start addr, end addr, VMA struct addr in that order + * (the latter is necessary as we may get identical VMAs) */ + if (vma->vm_start < pvma->vm_start) p = &(*p)->rb_left; - } - else if (vma->vm_start > pvma->vm_start) { + else if (vma->vm_start > pvma->vm_start) p = &(*p)->rb_right; - } - else { - /* mappings are at the same address - this can only - * happen for shared-mem chardevs and shared file - * mappings backed by ramfs/tmpfs */ - BUG_ON(!(pvma->vm_flags & VM_SHARED)); - - if (vma < pvma) - p = &(*p)->rb_left; - else if (vma > pvma) - p = &(*p)->rb_right; - else - BUG(); - } + else if (vma->vm_end < pvma->vm_end) + p = &(*p)->rb_left; + else if (vma->vm_end > pvma->vm_end) + p = &(*p)->rb_right; + else if (vma < pvma) + p = &(*p)->rb_left; + else if (vma > pvma) + p = &(*p)->rb_right; + else + BUG(); } rb_link_node(&vma->vm_rb, parent, p); - rb_insert_color(&vma->vm_rb, &nommu_vma_tree); + rb_insert_color(&vma->vm_rb, &mm->mm_rb); + + /* add VMA to the VMA list also */ + for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) { + if (pvma->vm_start > vma->vm_start) + break; + if (pvma->vm_start < vma->vm_start) + continue; + if (pvma->vm_end < vma->vm_end) + break; + } + + vma->vm_next = *pp; + *pp = vma; } /* - * delete a VMA from the global list + * delete a VMA from its owning mm_struct and address space */ -static void delete_nommu_vma(struct vm_area_struct *vma) +static void delete_vma_from_mm(struct vm_area_struct *vma) { + struct vm_area_struct **pp; struct address_space *mapping; + struct mm_struct *mm = vma->vm_mm; + + kenter("%p", vma); + + mm->map_count--; + if (mm->mmap_cache == vma) + mm->mmap_cache = NULL; /* remove the VMA from the mapping */ if (vma->vm_file) { @@ -579,8 +669,115 @@ static void delete_nommu_vma(struct vm_area_struct *vma) flush_dcache_mmap_unlock(mapping); } - /* remove from the master list */ - rb_erase(&vma->vm_rb, &nommu_vma_tree); + /* remove from the MM's tree and list */ + rb_erase(&vma->vm_rb, &mm->mm_rb); + for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) { + if (*pp == vma) { + *pp = vma->vm_next; + break; + } + } + + vma->vm_mm = NULL; +} + +/* + * destroy a VMA record + */ +static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) +{ + kenter("%p", vma); + if (vma->vm_ops && vma->vm_ops->close) + vma->vm_ops->close(vma); + if (vma->vm_file) { + fput(vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(mm); + } + put_nommu_region(vma->vm_region); + kmem_cache_free(vm_area_cachep, vma); +} + +/* + * look up the first VMA in which addr resides, NULL if none + * - should be called with mm->mmap_sem at least held readlocked + */ +struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma; + struct rb_node *n = mm->mm_rb.rb_node; + + /* check the cache first */ + vma = mm->mmap_cache; + if (vma && vma->vm_start <= addr && vma->vm_end > addr) + return vma; + + /* trawl the tree (there may be multiple mappings in which addr + * resides) */ + for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { + vma = rb_entry(n, struct vm_area_struct, vm_rb); + if (vma->vm_start > addr) + return NULL; + if (vma->vm_end > addr) { + mm->mmap_cache = vma; + return vma; + } + } + + return NULL; +} +EXPORT_SYMBOL(find_vma); + +/* + * find a VMA + * - we don't extend stack VMAs under NOMMU conditions + */ +struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) +{ + return find_vma(mm, addr); +} + +/* + * expand a stack to a given address + * - not supported under NOMMU conditions + */ +int expand_stack(struct vm_area_struct *vma, unsigned long address) +{ + return -ENOMEM; +} + +/* + * look up the first VMA exactly that exactly matches addr + * - should be called with mm->mmap_sem at least held readlocked + */ +static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, + unsigned long addr, + unsigned long len) +{ + struct vm_area_struct *vma; + struct rb_node *n = mm->mm_rb.rb_node; + unsigned long end = addr + len; + + /* check the cache first */ + vma = mm->mmap_cache; + if (vma && vma->vm_start == addr && vma->vm_end == end) + return vma; + + /* trawl the tree (there may be multiple mappings in which addr + * resides) */ + for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { + vma = rb_entry(n, struct vm_area_struct, vm_rb); + if (vma->vm_start < addr) + continue; + if (vma->vm_start > addr) + return NULL; + if (vma->vm_end == end) { + mm->mmap_cache = vma; + return vma; + } + } + + return NULL; } /* @@ -595,7 +792,7 @@ static int validate_mmap_request(struct file *file, unsigned long pgoff, unsigned long *_capabilities) { - unsigned long capabilities; + unsigned long capabilities, rlen; unsigned long reqprot = prot; int ret; @@ -615,12 +812,12 @@ static int validate_mmap_request(struct file *file, return -EINVAL; /* Careful about overflows.. */ - len = PAGE_ALIGN(len); - if (!len || len > TASK_SIZE) + rlen = PAGE_ALIGN(len); + if (!rlen || rlen > TASK_SIZE) return -ENOMEM; /* offset overflow? */ - if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) + if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) return -EOVERFLOW; if (file) { @@ -794,9 +991,10 @@ static unsigned long determine_vm_flags(struct file *file, } /* - * set up a shared mapping on a file + * set up a shared mapping on a file (the driver or filesystem provides and + * pins the storage) */ -static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len) +static int do_mmap_shared_file(struct vm_area_struct *vma) { int ret; @@ -814,10 +1012,14 @@ static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len) /* * set up a private mapping or an anonymous shared mapping */ -static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) +static int do_mmap_private(struct vm_area_struct *vma, + struct vm_region *region, + unsigned long len) { + struct page *pages; + unsigned long total, point, n, rlen; void *base; - int ret; + int ret, order; /* invoke the file's mapping function so that it can keep track of * shared mappings on devices or memory @@ -836,23 +1038,46 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) * make a private copy of the data and map that instead */ } + rlen = PAGE_ALIGN(len); + /* allocate some memory to hold the mapping * - note that this may not return a page-aligned address if the object * we're allocating is smaller than a page */ - base = kmalloc(len, GFP_KERNEL|__GFP_COMP); - if (!base) + order = get_order(rlen); + kdebug("alloc order %d for %lx", order, len); + + pages = alloc_pages(GFP_KERNEL, order); + if (!pages) goto enomem; - vma->vm_start = (unsigned long) base; - vma->vm_end = vma->vm_start + len; - vma->vm_flags |= VM_MAPPED_COPY; + /* we allocated a power-of-2 sized page set, so we need to trim off the + * excess */ + total = 1 << order; + atomic_add(total, &mmap_pages_allocated); + + point = rlen >> PAGE_SHIFT; + while (total > point) { + order = ilog2(total - point); + n = 1 << order; + kdebug("shave %lu/%lu @%lu", n, total - point, total); + atomic_sub(n, &mmap_pages_allocated); + total -= n; + set_page_refcounted(pages + total); + __free_pages(pages + total, order); + } + + total = rlen >> PAGE_SHIFT; + for (point = 1; point < total; point++) + set_page_refcounted(&pages[point]); -#ifdef WARN_ON_SLACK - if (len + WARN_ON_SLACK <= kobjsize(result)) - printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", - len, current->pid, kobjsize(result) - len); -#endif + base = page_address(pages); + region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; + region->vm_start = (unsigned long) base; + region->vm_end = region->vm_start + rlen; + + vma->vm_start = region->vm_start; + vma->vm_end = region->vm_start + len; if (vma->vm_file) { /* read the contents of a file into the copy */ @@ -864,26 +1089,27 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) old_fs = get_fs(); set_fs(KERNEL_DS); - ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos); + ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos); set_fs(old_fs); if (ret < 0) goto error_free; /* clear the last little bit */ - if (ret < len) - memset(base + ret, 0, len - ret); + if (ret < rlen) + memset(base + ret, 0, rlen - ret); } else { /* if it's an anonymous mapping, then just clear it */ - memset(base, 0, len); + memset(base, 0, rlen); } return 0; error_free: - kfree(base); - vma->vm_start = 0; + free_page_series(region->vm_start, region->vm_end); + region->vm_start = vma->vm_start = 0; + region->vm_end = vma->vm_end = 0; return ret; enomem: @@ -903,13 +1129,14 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long flags, unsigned long pgoff) { - struct vm_list_struct *vml = NULL; - struct vm_area_struct *vma = NULL; + struct vm_area_struct *vma; + struct vm_region *region; struct rb_node *rb; - unsigned long capabilities, vm_flags; - void *result; + unsigned long capabilities, vm_flags, result; int ret; + kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); + if (!(flags & MAP_FIXED)) addr = round_hint_to_min(addr); @@ -917,73 +1144,120 @@ unsigned long do_mmap_pgoff(struct file *file, * mapping */ ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, &capabilities); - if (ret < 0) + if (ret < 0) { + kleave(" = %d [val]", ret); return ret; + } /* we've determined that we can make the mapping, now translate what we * now know into VMA flags */ vm_flags = determine_vm_flags(file, prot, flags, capabilities); - /* we're going to need to record the mapping if it works */ - vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL); - if (!vml) - goto error_getting_vml; + /* we're going to need to record the mapping */ + region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); + if (!region) + goto error_getting_region; + + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + if (!vma) + goto error_getting_vma; + + atomic_set(®ion->vm_usage, 1); + region->vm_flags = vm_flags; + region->vm_pgoff = pgoff; - down_write(&nommu_vma_sem); + INIT_LIST_HEAD(&vma->anon_vma_node); + vma->vm_flags = vm_flags; + vma->vm_pgoff = pgoff; - /* if we want to share, we need to check for VMAs created by other + if (file) { + region->vm_file = file; + get_file(file); + vma->vm_file = file; + get_file(file); + if (vm_flags & VM_EXECUTABLE) { + added_exe_file_vma(current->mm); + vma->vm_mm = current->mm; + } + } + + down_write(&nommu_region_sem); + + /* if we want to share, we need to check for regions created by other * mmap() calls that overlap with our proposed mapping - * - we can only share with an exact match on most regular files + * - we can only share with a superset match on most regular files * - shared mappings on character devices and memory backed files are * permitted to overlap inexactly as far as we are concerned for in * these cases, sharing is handled in the driver or filesystem rather * than here */ if (vm_flags & VM_MAYSHARE) { - unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long vmpglen; + struct vm_region *pregion; + unsigned long pglen, rpglen, pgend, rpgend, start; - /* suppress VMA sharing for shared regions */ - if (vm_flags & VM_SHARED && - capabilities & BDI_CAP_MAP_DIRECT) - goto dont_share_VMAs; + pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + pgend = pgoff + pglen; - for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { - vma = rb_entry(rb, struct vm_area_struct, vm_rb); + for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { + pregion = rb_entry(rb, struct vm_region, vm_rb); - if (!(vma->vm_flags & VM_MAYSHARE)) + if (!(pregion->vm_flags & VM_MAYSHARE)) continue; /* search for overlapping mappings on the same file */ - if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode) + if (pregion->vm_file->f_path.dentry->d_inode != + file->f_path.dentry->d_inode) continue; - if (vma->vm_pgoff >= pgoff + pglen) + if (pregion->vm_pgoff >= pgend) continue; - vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1; - vmpglen >>= PAGE_SHIFT; - if (pgoff >= vma->vm_pgoff + vmpglen) + rpglen = pregion->vm_end - pregion->vm_start; + rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; + rpgend = pregion->vm_pgoff + rpglen; + if (pgoff >= rpgend) continue; - /* handle inexactly overlapping matches between mappings */ - if (vma->vm_pgoff != pgoff || vmpglen != pglen) { + /* handle inexactly overlapping matches between + * mappings */ + if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && + !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { + /* new mapping is not a subset of the region */ if (!(capabilities & BDI_CAP_MAP_DIRECT)) goto sharing_violation; continue; } - /* we've found a VMA we can share */ - atomic_inc(&vma->vm_usage); - - vml->vma = vma; - result = (void *) vma->vm_start; - goto shared; + /* we've found a region we can share */ + atomic_inc(&pregion->vm_usage); + vma->vm_region = pregion; + start = pregion->vm_start; + start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; + vma->vm_start = start; + vma->vm_end = start + len; + + if (pregion->vm_flags & VM_MAPPED_COPY) { + kdebug("share copy"); + vma->vm_flags |= VM_MAPPED_COPY; + } else { + kdebug("share mmap"); + ret = do_mmap_shared_file(vma); + if (ret < 0) { + vma->vm_region = NULL; + vma->vm_start = 0; + vma->vm_end = 0; + atomic_dec(&pregion->vm_usage); + pregion = NULL; + goto error_just_free; + } + } + fput(region->vm_file); + kmem_cache_free(vm_region_jar, region); + region = pregion; + result = start; + goto share; } - dont_share_VMAs: - vma = NULL; - /* obtain the address at which to make a shared mapping * - this is the hook for quasi-memory character devices to * tell us the location of a shared mapping @@ -994,102 +1268,93 @@ unsigned long do_mmap_pgoff(struct file *file, if (IS_ERR((void *) addr)) { ret = addr; if (ret != (unsigned long) -ENOSYS) - goto error; + goto error_just_free; /* the driver refused to tell us where to site * the mapping so we'll have to attempt to copy * it */ ret = (unsigned long) -ENODEV; if (!(capabilities & BDI_CAP_MAP_COPY)) - goto error; + goto error_just_free; capabilities &= ~BDI_CAP_MAP_DIRECT; + } else { + vma->vm_start = region->vm_start = addr; + vma->vm_end = region->vm_end = addr + len; } } } - /* we're going to need a VMA struct as well */ - vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); - if (!vma) - goto error_getting_vma; - - INIT_LIST_HEAD(&vma->anon_vma_node); - atomic_set(&vma->vm_usage, 1); - if (file) { - get_file(file); - if (vm_flags & VM_EXECUTABLE) { - added_exe_file_vma(current->mm); - vma->vm_mm = current->mm; - } - } - vma->vm_file = file; - vma->vm_flags = vm_flags; - vma->vm_start = addr; - vma->vm_end = addr + len; - vma->vm_pgoff = pgoff; - - vml->vma = vma; + vma->vm_region = region; /* set up the mapping */ if (file && vma->vm_flags & VM_SHARED) - ret = do_mmap_shared_file(vma, len); + ret = do_mmap_shared_file(vma); else - ret = do_mmap_private(vma, len); + ret = do_mmap_private(vma, region, len); if (ret < 0) - goto error; + goto error_put_region; + + add_nommu_region(region); /* okay... we have a mapping; now we have to register it */ - result = (void *) vma->vm_start; + result = vma->vm_start; current->mm->total_vm += len >> PAGE_SHIFT; - add_nommu_vma(vma); +share: + add_vma_to_mm(current->mm, vma); - shared: - add_vma_to_mm(current->mm, vml); - - up_write(&nommu_vma_sem); + up_write(&nommu_region_sem); if (prot & PROT_EXEC) - flush_icache_range((unsigned long) result, - (unsigned long) result + len); + flush_icache_range(result, result + len); -#ifdef DEBUG - printk("do_mmap:\n"); - show_process_blocks(); -#endif + kleave(" = %lx", result); + return result; - return (unsigned long) result; - - error: - up_write(&nommu_vma_sem); - kfree(vml); +error_put_region: + __put_nommu_region(region); if (vma) { if (vma->vm_file) { fput(vma->vm_file); if (vma->vm_flags & VM_EXECUTABLE) removed_exe_file_vma(vma->vm_mm); } - kfree(vma); + kmem_cache_free(vm_area_cachep, vma); } + kleave(" = %d [pr]", ret); return ret; - sharing_violation: - up_write(&nommu_vma_sem); - printk("Attempt to share mismatched mappings\n"); - kfree(vml); - return -EINVAL; +error_just_free: + up_write(&nommu_region_sem); +error: + fput(region->vm_file); + kmem_cache_free(vm_region_jar, region); + fput(vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(vma->vm_mm); + kmem_cache_free(vm_area_cachep, vma); + kleave(" = %d", ret); + return ret; + +sharing_violation: + up_write(&nommu_region_sem); + printk(KERN_WARNING "Attempt to share mismatched mappings\n"); + ret = -EINVAL; + goto error; - error_getting_vma: - up_write(&nommu_vma_sem); - kfree(vml); - printk("Allocation of vma for %lu byte allocation from process %d failed\n", +error_getting_vma: + kmem_cache_free(vm_region_jar, region); + printk(KERN_WARNING "Allocation of vma for %lu byte allocation" + " from process %d failed\n", len, current->pid); show_free_areas(); return -ENOMEM; - error_getting_vml: - printk("Allocation of vml for %lu byte allocation from process %d failed\n", +error_getting_region: + printk(KERN_WARNING "Allocation of vm region for %lu byte allocation" + " from process %d failed\n", len, current->pid); show_free_areas(); return -ENOMEM; @@ -1097,77 +1362,180 @@ unsigned long do_mmap_pgoff(struct file *file, EXPORT_SYMBOL(do_mmap_pgoff); /* - * handle mapping disposal for uClinux + * split a vma into two pieces at address 'addr', a new vma is allocated either + * for the first part or the tail. */ -static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma) +int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, int new_below) { - if (vma) { - down_write(&nommu_vma_sem); + struct vm_area_struct *new; + struct vm_region *region; + unsigned long npages; - if (atomic_dec_and_test(&vma->vm_usage)) { - delete_nommu_vma(vma); + kenter(""); - if (vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); + /* we're only permitted to split anonymous regions that have a single + * owner */ + if (vma->vm_file || + atomic_read(&vma->vm_region->vm_usage) != 1) + return -ENOMEM; - /* IO memory and memory shared directly out of the pagecache from - * ramfs/tmpfs mustn't be released here */ - if (vma->vm_flags & VM_MAPPED_COPY) - kfree((void *) vma->vm_start); + if (mm->map_count >= sysctl_max_map_count) + return -ENOMEM; - if (vma->vm_file) { - fput(vma->vm_file); - if (vma->vm_flags & VM_EXECUTABLE) - removed_exe_file_vma(mm); - } - kfree(vma); - } + region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); + if (!region) + return -ENOMEM; + + new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + if (!new) { + kmem_cache_free(vm_region_jar, region); + return -ENOMEM; + } + + /* most fields are the same, copy all, and then fixup */ + *new = *vma; + *region = *vma->vm_region; + new->vm_region = region; + + npages = (addr - vma->vm_start) >> PAGE_SHIFT; + + if (new_below) { + region->vm_end = new->vm_end = addr; + } else { + region->vm_start = new->vm_start = addr; + region->vm_pgoff = new->vm_pgoff += npages; + } - up_write(&nommu_vma_sem); + if (new->vm_ops && new->vm_ops->open) + new->vm_ops->open(new); + + delete_vma_from_mm(vma); + down_write(&nommu_region_sem); + delete_nommu_region(vma->vm_region); + if (new_below) { + vma->vm_region->vm_start = vma->vm_start = addr; + vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; + } else { + vma->vm_region->vm_end = vma->vm_end = addr; } + add_nommu_region(vma->vm_region); + add_nommu_region(new->vm_region); + up_write(&nommu_region_sem); + add_vma_to_mm(mm, vma); + add_vma_to_mm(mm, new); + return 0; } /* - * release a mapping - * - under NOMMU conditions the parameters must match exactly to the mapping to - * be removed + * shrink a VMA by removing the specified chunk from either the beginning or + * the end */ -int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) +static int shrink_vma(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long from, unsigned long to) { - struct vm_list_struct *vml, **parent; - unsigned long end = addr + len; + struct vm_region *region; -#ifdef DEBUG - printk("do_munmap:\n"); -#endif + kenter(""); - for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) { - if ((*parent)->vma->vm_start > addr) - break; - if ((*parent)->vma->vm_start == addr && - ((len == 0) || ((*parent)->vma->vm_end == end))) - goto found; - } + /* adjust the VMA's pointers, which may reposition it in the MM's tree + * and list */ + delete_vma_from_mm(vma); + if (from > vma->vm_start) + vma->vm_end = from; + else + vma->vm_start = to; + add_vma_to_mm(mm, vma); - printk("munmap of non-mmaped memory by process %d (%s): %p\n", - current->pid, current->comm, (void *) addr); - return -EINVAL; + /* cut the backing region down to size */ + region = vma->vm_region; + BUG_ON(atomic_read(®ion->vm_usage) != 1); - found: - vml = *parent; + down_write(&nommu_region_sem); + delete_nommu_region(region); + if (from > region->vm_start) + region->vm_end = from; + else + region->vm_start = to; + add_nommu_region(region); + up_write(&nommu_region_sem); - put_vma(mm, vml->vma); + free_page_series(from, to); + return 0; +} - *parent = vml->next; - kfree(vml); +/* + * release a mapping + * - under NOMMU conditions the chunk to be unmapped must be backed by a single + * VMA, though it need not cover the whole VMA + */ +int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) +{ + struct vm_area_struct *vma; + struct rb_node *rb; + unsigned long end = start + len; + int ret; - update_hiwater_vm(mm); - mm->total_vm -= len >> PAGE_SHIFT; + kenter(",%lx,%zx", start, len); -#ifdef DEBUG - show_process_blocks(); -#endif + if (len == 0) + return -EINVAL; + + /* find the first potentially overlapping VMA */ + vma = find_vma(mm, start); + if (!vma) { + printk(KERN_WARNING + "munmap of memory not mmapped by process %d (%s):" + " 0x%lx-0x%lx\n", + current->pid, current->comm, start, start + len - 1); + return -EINVAL; + } + /* we're allowed to split an anonymous VMA but not a file-backed one */ + if (vma->vm_file) { + do { + if (start > vma->vm_start) { + kleave(" = -EINVAL [miss]"); + return -EINVAL; + } + if (end == vma->vm_end) + goto erase_whole_vma; + rb = rb_next(&vma->vm_rb); + vma = rb_entry(rb, struct vm_area_struct, vm_rb); + } while (rb); + kleave(" = -EINVAL [split file]"); + return -EINVAL; + } else { + /* the chunk must be a subset of the VMA found */ + if (start == vma->vm_start && end == vma->vm_end) + goto erase_whole_vma; + if (start < vma->vm_start || end > vma->vm_end) { + kleave(" = -EINVAL [superset]"); + return -EINVAL; + } + if (start & ~PAGE_MASK) { + kleave(" = -EINVAL [unaligned start]"); + return -EINVAL; + } + if (end != vma->vm_end && end & ~PAGE_MASK) { + kleave(" = -EINVAL [unaligned split]"); + return -EINVAL; + } + if (start != vma->vm_start && end != vma->vm_end) { + ret = split_vma(mm, vma, start, 1); + if (ret < 0) { + kleave(" = %d [split]", ret); + return ret; + } + } + return shrink_vma(mm, vma, start, end); + } + +erase_whole_vma: + delete_vma_from_mm(vma); + delete_vma(mm, vma); + kleave(" = 0"); return 0; } EXPORT_SYMBOL(do_munmap); @@ -1184,29 +1552,26 @@ asmlinkage long sys_munmap(unsigned long addr, size_t len) } /* - * Release all mappings + * release all the mappings made in a process's VM space */ -void exit_mmap(struct mm_struct * mm) +void exit_mmap(struct mm_struct *mm) { - struct vm_list_struct *tmp; + struct vm_area_struct *vma; - if (mm) { -#ifdef DEBUG - printk("Exit_mmap:\n"); -#endif + if (!mm) + return; - mm->total_vm = 0; + kenter(""); - while ((tmp = mm->context.vmlist)) { - mm->context.vmlist = tmp->next; - put_vma(mm, tmp->vma); - kfree(tmp); - } + mm->total_vm = 0; -#ifdef DEBUG - show_process_blocks(); -#endif + while ((vma = mm->mmap)) { + mm->mmap = vma->vm_next; + delete_vma_from_mm(vma); + delete_vma(mm, vma); } + + kleave(""); } unsigned long do_brk(unsigned long addr, unsigned long len) @@ -1219,8 +1584,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len) * time (controlled by the MREMAP_MAYMOVE flag and available VM space) * * under NOMMU conditions, we only permit changing a mapping's size, and only - * as long as it stays within the hole allocated by the kmalloc() call in - * do_mmap_pgoff() and the block is not shareable + * as long as it stays within the region allocated by do_mmap_private() and the + * block is not shareable * * MREMAP_FIXED is not supported under NOMMU conditions */ @@ -1231,13 +1596,16 @@ unsigned long do_mremap(unsigned long addr, struct vm_area_struct *vma; /* insanity checks first */ - if (new_len == 0) + if (old_len == 0 || new_len == 0) return (unsigned long) -EINVAL; + if (addr & ~PAGE_MASK) + return -EINVAL; + if (flags & MREMAP_FIXED && new_addr != addr) return (unsigned long) -EINVAL; - vma = find_vma_exact(current->mm, addr); + vma = find_vma_exact(current->mm, addr, old_len); if (!vma) return (unsigned long) -EINVAL; @@ -1247,19 +1615,19 @@ unsigned long do_mremap(unsigned long addr, if (vma->vm_flags & VM_MAYSHARE) return (unsigned long) -EPERM; - if (new_len > kobjsize((void *) addr)) + if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) return (unsigned long) -ENOMEM; /* all checks complete - do it */ vma->vm_end = vma->vm_start + new_len; - return vma->vm_start; } EXPORT_SYMBOL(do_mremap); -asmlinkage unsigned long sys_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr) +asmlinkage +unsigned long sys_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr) { unsigned long ret; -- cgit v1.2.3-70-g09d2 From dd8632a12e500a684478fea0951f380478d56fed Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: NOMMU: Make mmap allocation page trimming behaviour configurable. NOMMU mmap allocates a piece of memory for an mmap that's rounded up in size to the nearest power-of-2 number of pages. Currently it then discards the excess pages back to the page allocator, making that memory available for use by other things. This can, however, cause greater amount of fragmentation. To counter this, a sysctl is added in order to fine-tune the trimming behaviour. The default behaviour remains to trim pages aggressively, while this can either be disabled completely or set to a higher page-granular watermark in order to have finer-grained control. vm region vm_top bits taken from an earlier patch by David Howells. Signed-off-by: Paul Mundt Signed-off-by: David Howells Tested-by: Mike Frysinger --- Documentation/nommu-mmap.txt | 15 ++++++++++ Documentation/sysctl/vm.txt | 18 ++++++++++++ include/linux/mm_types.h | 1 + kernel/sysctl.c | 14 ++++++++++ mm/nommu.c | 65 ++++++++++++++++++++++++++++---------------- 5 files changed, 90 insertions(+), 23 deletions(-) (limited to 'Documentation') diff --git a/Documentation/nommu-mmap.txt b/Documentation/nommu-mmap.txt index 02b89dcf38a..b565e8279d1 100644 --- a/Documentation/nommu-mmap.txt +++ b/Documentation/nommu-mmap.txt @@ -248,3 +248,18 @@ PROVIDING SHAREABLE BLOCK DEVICE SUPPORT Provision of shared mappings on block device files is exactly the same as for character devices. If there isn't a real device underneath, then the driver should allocate sufficient contiguous memory to honour any supported mapping. + + +================================= +ADJUSTING PAGE TRIMMING BEHAVIOUR +================================= + +NOMMU mmap automatically rounds up to the nearest power-of-2 number of pages +when performing an allocation. This can have adverse effects on memory +fragmentation, and as such, is left configurable. The default behaviour is to +aggressively trim allocations and discard any excess pages back in to the page +allocator. In order to retain finer-grained control over fragmentation, this +behaviour can either be disabled completely, or bumped up to a higher page +watermark where trimming begins. + +Page trimming behaviour is configurable via the sysctl `vm.nr_trim_pages'. diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index cd05994a49e..a3415070bca 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -38,6 +38,7 @@ Currently, these files are in /proc/sys/vm: - numa_zonelist_order - nr_hugepages - nr_overcommit_hugepages +- nr_trim_pages (only if CONFIG_MMU=n) ============================================================== @@ -348,3 +349,20 @@ Change the maximum size of the hugepage pool. The maximum is nr_hugepages + nr_overcommit_hugepages. See Documentation/vm/hugetlbpage.txt + +============================================================== + +nr_trim_pages + +This is available only on NOMMU kernels. + +This value adjusts the excess page trimming behaviour of power-of-2 aligned +NOMMU mmap allocations. + +A value of 0 disables trimming of allocations entirely, while a value of 1 +trims excess pages aggressively. Any value >= 1 acts as the watermark where +trimming of allocations is initiated. + +The default value is 1. + +See Documentation/nommu-mmap.txt for more information. diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1c1e0d3a171..92915e81443 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -106,6 +106,7 @@ struct vm_region { unsigned long vm_flags; /* VMA vm_flags */ unsigned long vm_start; /* start address of region */ unsigned long vm_end; /* region initialised to here */ + unsigned long vm_top; /* region allocated to here */ unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ struct file *vm_file; /* the backing file or NULL */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 92f6e5bc3c2..89d74436318 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -82,6 +82,9 @@ extern int percpu_pagelist_fraction; extern int compat_log; extern int latencytop_enabled; extern int sysctl_nr_open_min, sysctl_nr_open_max; +#ifndef CONFIG_MMU +extern int sysctl_nr_trim_pages; +#endif #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ @@ -1102,6 +1105,17 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, +#else + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nr_trim_pages", + .data = &sysctl_nr_trim_pages, + .maxlen = sizeof(sysctl_nr_trim_pages), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, #endif { .ctl_name = VM_LAPTOP_MODE, diff --git a/mm/nommu.c b/mm/nommu.c index 0d363dfcf10..a6e8ccfbd40 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -10,7 +10,7 @@ * Copyright (c) 2000-2003 David McCullough * Copyright (c) 2000-2001 D Jeff Dionne * Copyright (c) 2002 Greg Ungerer - * Copyright (c) 2007 Paul Mundt + * Copyright (c) 2007-2008 Paul Mundt */ #include @@ -66,6 +66,7 @@ atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; +int sysctl_nr_trim_pages = 1; /* page trimming behaviour */ int heap_stack_gap = 0; atomic_t mmap_pages_allocated; @@ -455,6 +456,8 @@ static noinline void validate_nommu_regions(void) last = rb_entry(lastp, struct vm_region, vm_rb); if (unlikely(last->vm_end <= last->vm_start)) BUG(); + if (unlikely(last->vm_top < last->vm_end)) + BUG(); while ((p = rb_next(lastp))) { region = rb_entry(p, struct vm_region, vm_rb); @@ -462,7 +465,9 @@ static noinline void validate_nommu_regions(void) if (unlikely(region->vm_end <= region->vm_start)) BUG(); - if (unlikely(region->vm_start < last->vm_end)) + if (unlikely(region->vm_top < region->vm_end)) + BUG(); + if (unlikely(region->vm_start < last->vm_top)) BUG(); lastp = p; @@ -536,7 +541,7 @@ static void free_page_series(unsigned long from, unsigned long to) /* * release a reference to a region * - the caller must hold the region semaphore, which this releases - * - the region may not have been added to the tree yet, in which case vm_end + * - the region may not have been added to the tree yet, in which case vm_top * will equal vm_start */ static void __put_nommu_region(struct vm_region *region) @@ -547,7 +552,7 @@ static void __put_nommu_region(struct vm_region *region) BUG_ON(!nommu_region_tree.rb_node); if (atomic_dec_and_test(®ion->vm_usage)) { - if (region->vm_end > region->vm_start) + if (region->vm_top > region->vm_start) delete_nommu_region(region); up_write(&nommu_region_sem); @@ -558,7 +563,7 @@ static void __put_nommu_region(struct vm_region *region) * from ramfs/tmpfs mustn't be released here */ if (region->vm_flags & VM_MAPPED_COPY) { kdebug("free series"); - free_page_series(region->vm_start, region->vm_end); + free_page_series(region->vm_start, region->vm_top); } kmem_cache_free(vm_region_jar, region); } else { @@ -999,6 +1004,10 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) int ret; ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); + if (ret == 0) { + vma->vm_region->vm_top = vma->vm_region->vm_end; + return ret; + } if (ret != -ENOSYS) return ret; @@ -1027,11 +1036,14 @@ static int do_mmap_private(struct vm_area_struct *vma, */ if (vma->vm_file) { ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); - if (ret != -ENOSYS) { + if (ret == 0) { /* shouldn't return success if we're not sharing */ - BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE)); - return ret; /* success or a real error */ + BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); + vma->vm_region->vm_top = vma->vm_region->vm_end; + return ret; } + if (ret != -ENOSYS) + return ret; /* getting an ENOSYS error indicates that direct mmap isn't * possible (as opposed to tried but failed) so we'll try to @@ -1051,23 +1063,25 @@ static int do_mmap_private(struct vm_area_struct *vma, if (!pages) goto enomem; - /* we allocated a power-of-2 sized page set, so we need to trim off the - * excess */ total = 1 << order; atomic_add(total, &mmap_pages_allocated); point = rlen >> PAGE_SHIFT; - while (total > point) { - order = ilog2(total - point); - n = 1 << order; - kdebug("shave %lu/%lu @%lu", n, total - point, total); - atomic_sub(n, &mmap_pages_allocated); - total -= n; - set_page_refcounted(pages + total); - __free_pages(pages + total, order); + + /* we allocated a power-of-2 sized page set, so we may want to trim off + * the excess */ + if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) { + while (total > point) { + order = ilog2(total - point); + n = 1 << order; + kdebug("shave %lu/%lu @%lu", n, total - point, total); + atomic_sub(n, &mmap_pages_allocated); + total -= n; + set_page_refcounted(pages + total); + __free_pages(pages + total, order); + } } - total = rlen >> PAGE_SHIFT; for (point = 1; point < total; point++) set_page_refcounted(&pages[point]); @@ -1075,6 +1089,7 @@ static int do_mmap_private(struct vm_area_struct *vma, region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; region->vm_start = (unsigned long) base; region->vm_end = region->vm_start + rlen; + region->vm_top = region->vm_start + (total << PAGE_SHIFT); vma->vm_start = region->vm_start; vma->vm_end = region->vm_start + len; @@ -1110,6 +1125,7 @@ error_free: free_page_series(region->vm_start, region->vm_end); region->vm_start = vma->vm_start = 0; region->vm_end = vma->vm_end = 0; + region->vm_top = 0; return ret; enomem: @@ -1401,7 +1417,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, npages = (addr - vma->vm_start) >> PAGE_SHIFT; if (new_below) { - region->vm_end = new->vm_end = addr; + region->vm_top = region->vm_end = new->vm_end = addr; } else { region->vm_start = new->vm_start = addr; region->vm_pgoff = new->vm_pgoff += npages; @@ -1418,6 +1434,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; } else { vma->vm_region->vm_end = vma->vm_end = addr; + vma->vm_region->vm_top = addr; } add_nommu_region(vma->vm_region); add_nommu_region(new->vm_region); @@ -1454,10 +1471,12 @@ static int shrink_vma(struct mm_struct *mm, down_write(&nommu_region_sem); delete_nommu_region(region); - if (from > region->vm_start) - region->vm_end = from; - else + if (from > region->vm_start) { + to = region->vm_top; + region->vm_top = region->vm_end = from; + } else { region->vm_start = to; + } add_nommu_region(region); up_write(&nommu_region_sem); -- cgit v1.2.3-70-g09d2 From 18e7f1f0d34be4a39f7f47324a3e26b43fddb714 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 7 Jan 2009 18:07:32 -0800 Subject: cgroups: documentation updates - remove 'releasable' since it has been moved to the debug subsys. - update lock requirements of subsys callbacks. Signed-off-by: Li Zefan Cc: Paul Menage Cc: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/cgroups.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index d9014aa0eb6..60287e9e9d2 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -227,7 +227,6 @@ Each cgroup is represented by a directory in the cgroup file system containing the following files describing that cgroup: - tasks: list of tasks (by pid) attached to that cgroup - - releasable flag: cgroup currently removeable? - notify_on_release flag: run the release agent on exit? - release_agent: the path to use for release notifications (this file exists in the top cgroup only) @@ -360,7 +359,7 @@ Now you want to do something with this cgroup. In this directory you can find several files: # ls -notify_on_release releasable tasks +notify_on_release tasks (plus whatever files added by the attached subsystems) Now attach your shell to this cgroup: @@ -479,7 +478,6 @@ newly-created cgroup if an error occurs after this subsystem's create() method has been called for the new cgroup). void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); -(cgroup_mutex held by caller) Called before checking the reference count on each subsystem. This may be useful for subsystems which have some extra references even if @@ -498,6 +496,7 @@ remain valid while the caller holds cgroup_mutex. void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, struct cgroup *old_cgrp, struct task_struct *task) +(cgroup_mutex held by caller) Called after the task has been attached to the cgroup, to allow any post-attachment activity that requires memory allocations or blocking. @@ -511,6 +510,7 @@ void exit(struct cgroup_subsys *ss, struct task_struct *task) Called during task exit. int populate(struct cgroup_subsys *ss, struct cgroup *cgrp) +(cgroup_mutex held by caller) Called after creation of a cgroup to allow a subsystem to populate the cgroup directory with file entries. The subsystem should make @@ -520,6 +520,7 @@ method can return an error code, the error code is currently not always handled well. void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp) +(cgroup_mutex held by caller) Called at the end of cgroup_clone() to do any paramater initialization which might be required before a task could attach. For -- cgit v1.2.3-70-g09d2 From f817ed48535ac6510ebae7c4116f24a5f9268834 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:53 -0800 Subject: memcg: move all acccounting to parent at rmdir() This patch provides a function to move account information of a page between mem_cgroups and rewrite force_empty to make use of this. This moving of page_cgroup is done under - lru_lock of source/destination mem_cgroup is held. - lock_page_cgroup() is held. Then, a routine which touches pc->mem_cgroup without lock_page_cgroup() should confirm pc->mem_cgroup is still valid or not. Typical code can be following. (while page is not under lock_page()) mem = pc->mem_cgroup; mz = page_cgroup_zoneinfo(pc) spin_lock_irqsave(&mz->lru_lock); if (pc->mem_cgroup == mem) ...../* some list handling */ spin_unlock_irqrestore(&mz->lru_lock); Of course, better way is lock_page_cgroup(pc); .... unlock_page_cgroup(pc); But you should confirm the nest of lock and avoid deadlock. If you treats page_cgroup from mem_cgroup's LRU under mz->lru_lock, you don't have to worry about what pc->mem_cgroup points to. moved pages are added to head of lru, not to tail. Expected users of this routine is: - force_empty (rmdir) - moving tasks between cgroup (for moving account information.) - hierarchy (maybe useful.) force_empty(rmdir) uses this move_account and move pages to its parent. This "move" will not cause OOM (I added "oom" parameter to try_charge().) If the parent is busy (not enough memory), force_empty calls try_to_free_page() and reduce usage. Purpose of this behavior is - Fix "forget all" behavior of force_empty and avoid leak of accounting. - By "moving first, free if necessary", keep pages on memory as much as possible. Adding a switch to change behavior of force_empty to - free first, move if necessary - free all, if there is mlocked/busy pages, return -EBUSY. is under consideration. (I'll add if someone requtests.) This patch also removes memory.force_empty file, a brutal debug-only interface. Reviewed-by: Daisuke Nishimura Tested-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/memory.txt | 12 +- mm/memcontrol.c | 277 ++++++++++++++++++++++++++--------- 2 files changed, 214 insertions(+), 75 deletions(-) (limited to 'Documentation') diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 1c07547d3f8..58f32c166fa 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -207,12 +207,6 @@ exceeded. The memory.stat file gives accounting information. Now, the number of caches, RSS and Active pages/Inactive pages are shown. -The memory.force_empty gives an interface to drop *all* charges by force. - -# echo 1 > memory.force_empty - -will drop all charges in cgroup. Currently, this is maintained for test. - 4. Testing Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11]. @@ -242,8 +236,10 @@ reclaimed. A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a cgroup might have some charge associated with it, even though all -tasks have migrated away from it. Such charges are automatically dropped at -rmdir() if there are no tasks. +tasks have migrated away from it. +Such charges are moved to its parent as much as possible and freed if parent +is full. Both of RSS and CACHES are moved to parent. +If both of them are busy, rmdir() returns -EBUSY. 5. TODO diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b71195e8198..49234d93988 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -257,7 +257,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, } static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, - struct page_cgroup *pc) + struct page_cgroup *pc, bool hot) { int lru = LRU_BASE; @@ -271,7 +271,10 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, } MEM_CGROUP_ZSTAT(mz, lru) += 1; - list_add(&pc->lru, &mz->lists[lru]); + if (hot) + list_add(&pc->lru, &mz->lists[lru]); + else + list_add_tail(&pc->lru, &mz->lists[lru]); mem_cgroup_charge_statistics(pc->mem_cgroup, pc, true); } @@ -467,21 +470,12 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, return nr_taken; } - -/** - * mem_cgroup_try_charge - get charge of PAGE_SIZE. - * @mm: an mm_struct which is charged against. (when *memcg is NULL) - * @gfp_mask: gfp_mask for reclaim. - * @memcg: a pointer to memory cgroup which is charged against. - * - * charge against memory cgroup pointed by *memcg. if *memcg == NULL, estimated - * memory cgroup from @mm is got and stored in *memcg. - * - * Returns 0 if success. -ENOMEM at failure. +/* + * Unlike exported interface, "oom" parameter is added. if oom==true, + * oom-killer can be invoked. */ - -int mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **memcg) +static int __mem_cgroup_try_charge(struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **memcg, bool oom) { struct mem_cgroup *mem; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; @@ -528,7 +522,8 @@ int mem_cgroup_try_charge(struct mm_struct *mm, continue; if (!nr_retries--) { - mem_cgroup_out_of_memory(mem, gfp_mask); + if (oom) + mem_cgroup_out_of_memory(mem, gfp_mask); goto nomem; } } @@ -538,6 +533,25 @@ nomem: return -ENOMEM; } +/** + * mem_cgroup_try_charge - get charge of PAGE_SIZE. + * @mm: an mm_struct which is charged against. (when *memcg is NULL) + * @gfp_mask: gfp_mask for reclaim. + * @memcg: a pointer to memory cgroup which is charged against. + * + * charge against memory cgroup pointed by *memcg. if *memcg == NULL, estimated + * memory cgroup from @mm is got and stored in *memcg. + * + * Returns 0 if success. -ENOMEM at failure. + * This call can invoke OOM-Killer. + */ + +int mem_cgroup_try_charge(struct mm_struct *mm, + gfp_t mask, struct mem_cgroup **memcg) +{ + return __mem_cgroup_try_charge(mm, mask, memcg, true); +} + /* * commit a charge got by mem_cgroup_try_charge() and makes page_cgroup to be * USED state. If already USED, uncharge and return. @@ -571,11 +585,109 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_add_list(mz, pc); + __mem_cgroup_add_list(mz, pc, true); spin_unlock_irqrestore(&mz->lru_lock, flags); unlock_page_cgroup(pc); } +/** + * mem_cgroup_move_account - move account of the page + * @pc: page_cgroup of the page. + * @from: mem_cgroup which the page is moved from. + * @to: mem_cgroup which the page is moved to. @from != @to. + * + * The caller must confirm following. + * 1. disable irq. + * 2. lru_lock of old mem_cgroup(@from) should be held. + * + * returns 0 at success, + * returns -EBUSY when lock is busy or "pc" is unstable. + * + * This function does "uncharge" from old cgroup but doesn't do "charge" to + * new cgroup. It should be done by a caller. + */ + +static int mem_cgroup_move_account(struct page_cgroup *pc, + struct mem_cgroup *from, struct mem_cgroup *to) +{ + struct mem_cgroup_per_zone *from_mz, *to_mz; + int nid, zid; + int ret = -EBUSY; + + VM_BUG_ON(!irqs_disabled()); + VM_BUG_ON(from == to); + + nid = page_cgroup_nid(pc); + zid = page_cgroup_zid(pc); + from_mz = mem_cgroup_zoneinfo(from, nid, zid); + to_mz = mem_cgroup_zoneinfo(to, nid, zid); + + + if (!trylock_page_cgroup(pc)) + return ret; + + if (!PageCgroupUsed(pc)) + goto out; + + if (pc->mem_cgroup != from) + goto out; + + if (spin_trylock(&to_mz->lru_lock)) { + __mem_cgroup_remove_list(from_mz, pc); + css_put(&from->css); + res_counter_uncharge(&from->res, PAGE_SIZE); + pc->mem_cgroup = to; + css_get(&to->css); + __mem_cgroup_add_list(to_mz, pc, false); + ret = 0; + spin_unlock(&to_mz->lru_lock); + } +out: + unlock_page_cgroup(pc); + return ret; +} + +/* + * move charges to its parent. + */ + +static int mem_cgroup_move_parent(struct page_cgroup *pc, + struct mem_cgroup *child, + gfp_t gfp_mask) +{ + struct cgroup *cg = child->css.cgroup; + struct cgroup *pcg = cg->parent; + struct mem_cgroup *parent; + struct mem_cgroup_per_zone *mz; + unsigned long flags; + int ret; + + /* Is ROOT ? */ + if (!pcg) + return -EINVAL; + + parent = mem_cgroup_from_cont(pcg); + + ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); + if (ret) + return ret; + + mz = mem_cgroup_zoneinfo(child, + page_cgroup_nid(pc), page_cgroup_zid(pc)); + + spin_lock_irqsave(&mz->lru_lock, flags); + ret = mem_cgroup_move_account(pc, child, parent); + spin_unlock_irqrestore(&mz->lru_lock, flags); + + /* drop extra refcnt */ + css_put(&parent->css); + /* uncharge if move fails */ + if (ret) + res_counter_uncharge(&parent->res, PAGE_SIZE); + + return ret; +} + /* * Charge the memory controller for page usage. * Return @@ -597,7 +709,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, prefetchw(pc); mem = memcg; - ret = mem_cgroup_try_charge(mm, gfp_mask, &mem); + ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); if (ret) return ret; @@ -899,46 +1011,52 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, * This routine traverse page_cgroup in given list and drop them all. * *And* this routine doesn't reclaim page itself, just removes page_cgroup. */ -#define FORCE_UNCHARGE_BATCH (128) -static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, +static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, struct mem_cgroup_per_zone *mz, enum lru_list lru) { - struct page_cgroup *pc; - struct page *page; - int count = FORCE_UNCHARGE_BATCH; + struct page_cgroup *pc, *busy; unsigned long flags; + unsigned long loop; struct list_head *list; + int ret = 0; list = &mz->lists[lru]; - spin_lock_irqsave(&mz->lru_lock, flags); - while (!list_empty(list)) { - pc = list_entry(list->prev, struct page_cgroup, lru); - page = pc->page; - if (!PageCgroupUsed(pc)) + loop = MEM_CGROUP_ZSTAT(mz, lru); + /* give some margin against EBUSY etc...*/ + loop += 256; + busy = NULL; + while (loop--) { + ret = 0; + spin_lock_irqsave(&mz->lru_lock, flags); + if (list_empty(list)) { + spin_unlock_irqrestore(&mz->lru_lock, flags); break; - get_page(page); + } + pc = list_entry(list->prev, struct page_cgroup, lru); + if (busy == pc) { + list_move(&pc->lru, list); + busy = 0; + spin_unlock_irqrestore(&mz->lru_lock, flags); + continue; + } spin_unlock_irqrestore(&mz->lru_lock, flags); - /* - * Check if this page is on LRU. !LRU page can be found - * if it's under page migration. - */ - if (PageLRU(page)) { - __mem_cgroup_uncharge_common(page, - MEM_CGROUP_CHARGE_TYPE_FORCE); - put_page(page); - if (--count <= 0) { - count = FORCE_UNCHARGE_BATCH; - cond_resched(); - } - } else { - spin_lock_irqsave(&mz->lru_lock, flags); + + ret = mem_cgroup_move_parent(pc, mem, GFP_HIGHUSER_MOVABLE); + if (ret == -ENOMEM) break; - } - spin_lock_irqsave(&mz->lru_lock, flags); + + if (ret == -EBUSY || ret == -EINVAL) { + /* found lock contention or "pc" is obsolete. */ + busy = pc; + cond_resched(); + } else + busy = NULL; } - spin_unlock_irqrestore(&mz->lru_lock, flags); + if (!ret && !list_empty(list)) + return -EBUSY; + return ret; } /* @@ -947,34 +1065,68 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, */ static int mem_cgroup_force_empty(struct mem_cgroup *mem) { - int ret = -EBUSY; - int node, zid; + int ret; + int node, zid, shrink; + int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; css_get(&mem->css); - /* - * page reclaim code (kswapd etc..) will move pages between - * active_list <-> inactive_list while we don't take a lock. - * So, we have to do loop here until all lists are empty. - */ + + shrink = 0; +move_account: while (mem->res.usage > 0) { + ret = -EBUSY; if (atomic_read(&mem->css.cgroup->count) > 0) goto out; + /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); - for_each_node_state(node, N_POSSIBLE) - for (zid = 0; zid < MAX_NR_ZONES; zid++) { + ret = 0; + for_each_node_state(node, N_POSSIBLE) { + for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { struct mem_cgroup_per_zone *mz; enum lru_list l; mz = mem_cgroup_zoneinfo(mem, node, zid); - for_each_lru(l) - mem_cgroup_force_empty_list(mem, mz, l); + for_each_lru(l) { + ret = mem_cgroup_force_empty_list(mem, + mz, l); + if (ret) + break; + } } + if (ret) + break; + } + /* it seems parent cgroup doesn't have enough mem */ + if (ret == -ENOMEM) + goto try_to_free; cond_resched(); } ret = 0; out: css_put(&mem->css); return ret; + +try_to_free: + /* returns EBUSY if we come here twice. */ + if (shrink) { + ret = -EBUSY; + goto out; + } + /* try to free all pages in this cgroup */ + shrink = 1; + while (nr_retries && mem->res.usage > 0) { + int progress; + progress = try_to_free_mem_cgroup_pages(mem, + GFP_HIGHUSER_MOVABLE); + if (!progress) + nr_retries--; + + } + /* try move_account...there may be some *locked* pages. */ + if (mem->res.usage) + goto move_account; + ret = 0; + goto out; } static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) @@ -1023,11 +1175,6 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) return 0; } -static int mem_force_empty_write(struct cgroup *cont, unsigned int event) -{ - return mem_cgroup_force_empty(mem_cgroup_from_cont(cont)); -} - static const struct mem_cgroup_stat_desc { const char *msg; u64 unit; @@ -1103,10 +1250,6 @@ static struct cftype mem_cgroup_files[] = { .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, - { - .name = "force_empty", - .trigger = mem_force_empty_write, - }, { .name = "stat", .read_map = mem_control_stat_show, -- cgit v1.2.3-70-g09d2 From c1e862c1f5ad34771b6d0a528cf681e0dcad7c86 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:55 -0800 Subject: memcg: new force_empty to free pages under group By memcg-move-all-accounts-to-parent-at-rmdir.patch, there is no leak of memory usage and force_empty is removed. This patch adds "force_empty" again, in reasonable manner. memory.force_empty file works when #echo 0 (or some) > memory.force_empty and have following function. 1. only works when there are no task in this cgroup. 2. free all page under this cgroup as much as possible. 3. page which cannot be freed will be moved up to parent. 4. Then, memcg will be empty after above echo returns. This is much better behavior than old "force_empty" which just forget all accounts. This patch also check signal_pending() and above "echo" can be stopped by "Ctrl-C". [akpm@linux-foundation.org: cleanup] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/memory.txt | 27 ++++++++++++++++++++---- mm/memcontrol.c | 41 ++++++++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 11 deletions(-) (limited to 'Documentation') diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 58f32c166fa..54253b7a8db 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -237,11 +237,30 @@ reclaimed. A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a cgroup might have some charge associated with it, even though all tasks have migrated away from it. -Such charges are moved to its parent as much as possible and freed if parent -is full. Both of RSS and CACHES are moved to parent. -If both of them are busy, rmdir() returns -EBUSY. +Such charges are freed(at default) or moved to its parent. When moved, +both of RSS and CACHES are moved to parent. +If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also. -5. TODO +5. Misc. interfaces. + +5.1 force_empty + memory.force_empty interface is provided to make cgroup's memory usage empty. + You can use this interface only when the cgroup has no tasks. + When writing anything to this + + # echo 0 > memory.force_empty + + Almost all pages tracked by this memcg will be unmapped and freed. Some of + pages cannot be freed because it's locked or in-use. Such pages are moved + to parent and this cgroup will be empty. But this may return -EBUSY in + some too busy case. + + Typical use case of this interface is that calling this before rmdir(). + Because rmdir() moves all pages to parent, some out-of-use page caches can be + moved to the parent. If you want to avoid that, force_empty will be useful. + + +6. TODO 1. Add support for accounting huge pages (as a separate controller) 2. Make per-cgroup scanner reclaim not-shared pages first diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e00f25e6545..decace3bb57 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1062,21 +1062,27 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, * make mem_cgroup's charge to be 0 if there is no task. * This enables deleting this mem_cgroup. */ -static int mem_cgroup_force_empty(struct mem_cgroup *mem) +static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) { int ret; int node, zid, shrink; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; + struct cgroup *cgrp = mem->css.cgroup; css_get(&mem->css); shrink = 0; + /* should free all ? */ + if (free_all) + goto try_to_free; move_account: while (mem->res.usage > 0) { ret = -EBUSY; - if (atomic_read(&mem->css.cgroup->count) > 0) + if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) + goto out; + ret = -EINTR; + if (signal_pending(current)) goto out; - /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); ret = 0; @@ -1106,19 +1112,29 @@ out: return ret; try_to_free: - /* returns EBUSY if we come here twice. */ - if (shrink) { + /* returns EBUSY if there is a task or if we come here twice. */ + if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) { ret = -EBUSY; goto out; } + /* we call try-to-free pages for make this cgroup empty */ + lru_add_drain_all(); /* try to free all pages in this cgroup */ shrink = 1; while (nr_retries && mem->res.usage > 0) { int progress; + + if (signal_pending(current)) { + ret = -EINTR; + goto out; + } progress = try_to_free_mem_cgroup_pages(mem, GFP_HIGHUSER_MOVABLE); - if (!progress) + if (!progress) { nr_retries--; + /* maybe some writeback is necessary */ + congestion_wait(WRITE, HZ/10); + } } /* try move_account...there may be some *locked* pages. */ @@ -1128,6 +1144,12 @@ try_to_free: goto out; } +int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) +{ + return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true); +} + + static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) { return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, @@ -1225,6 +1247,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, return 0; } + static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", @@ -1253,6 +1276,10 @@ static struct cftype mem_cgroup_files[] = { .name = "stat", .read_map = mem_control_stat_show, }, + { + .name = "force_empty", + .trigger = mem_cgroup_force_empty_write, + }, }; static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) @@ -1350,7 +1377,7 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, struct cgroup *cont) { struct mem_cgroup *mem = mem_cgroup_from_cont(cont); - mem_cgroup_force_empty(mem); + mem_cgroup_force_empty(mem, false); } static void mem_cgroup_destroy(struct cgroup_subsys *ss, -- cgit v1.2.3-70-g09d2 From d13d144309d2e5a3e6ad978b16c1d0226ddc9231 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:56 -0800 Subject: memcg: handle swap caches SwapCache support for memory resource controller (memcg) Before mem+swap controller, memcg itself should handle SwapCache in proper way. This is cut-out from it. In current memcg, SwapCache is just leaked and the user can create tons of SwapCache. This is a leak of account and should be handled. SwapCache accounting is done as following. charge (anon) - charged when it's mapped. (because of readahead, charge at add_to_swap_cache() is not sane) uncharge (anon) - uncharged when it's dropped from swapcache and fully unmapped. means it's not uncharged at unmap. Note: delete from swap cache at swap-in is done after rmap information is established. charge (shmem) - charged at swap-in. this prevents charge at add_to_page_cache(). uncharge (shmem) - uncharged when it's dropped from swapcache and not on shmem's radix-tree. at migration, check against 'old page' is modified to handle shmem. Comparing to the old version discussed (and caused troubles), we have advantages of - PCG_USED bit. - simple migrating handling. So, situation is much easier than several months ago, maybe. [hugh@veritas.com: memcg: handle swap caches build fix] Reviewed-by: Daisuke Nishimura Tested-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/memory.txt | 5 +++ include/linux/swap.h | 22 ++++++++++++ mm/memcontrol.c | 67 ++++++++++++++++++++++++++++++++---- mm/shmem.c | 18 ++++++++-- mm/swap_state.c | 1 + 5 files changed, 105 insertions(+), 8 deletions(-) (limited to 'Documentation') diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 54253b7a8db..9fe2d0eabe0 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -137,6 +137,11 @@ behind this approach is that a cgroup that aggressively uses a shared page will eventually get charged for it (once it is uncharged from the cgroup that brought it in -- this will happen on memory pressure). +Exception: When you do swapoff and make swapped-out pages of shmem(tmpfs) to +be backed into memory in force, charges for pages are accounted against the +caller of swapoff rather than the users of shmem. + + 2.4 Reclaim Each cgroup maintains a per cgroup LRU that consists of an active diff --git a/include/linux/swap.h b/include/linux/swap.h index 91dee50fe26..f8f3907533f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -333,6 +333,22 @@ static inline void disable_swap_token(void) put_swap_token(swap_token_mm); } +#ifdef CONFIG_CGROUP_MEM_RES_CTLR +extern int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked); +extern void mem_cgroup_uncharge_swapcache(struct page *page); +#else +static inline +int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked) +{ + return 0; +} +static inline void mem_cgroup_uncharge_swapcache(struct page *page) +{ +} +#endif + #else /* CONFIG_SWAP */ #define nr_swap_pages 0L @@ -409,6 +425,12 @@ static inline swp_entry_t get_swap_page(void) #define has_swap_token(x) 0 #define disable_swap_token() do { } while(0) +static inline int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked) +{ + return 0; +} + #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index decace3bb57..7288e9d85ca 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -139,6 +140,7 @@ enum charge_type { MEM_CGROUP_CHARGE_TYPE_MAPPED, MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */ MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ + MEM_CGROUP_CHARGE_TYPE_SWAPOUT, /* for accounting swapcache */ NR_CHARGE_TYPE, }; @@ -780,6 +782,33 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); } +#ifdef CONFIG_SWAP +int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked) +{ + int ret = 0; + + if (mem_cgroup_subsys.disabled) + return 0; + if (unlikely(!mm)) + mm = &init_mm; + if (!locked) + lock_page(page); + /* + * If not locked, the page can be dropped from SwapCache until + * we reach here. + */ + if (PageSwapCache(page)) { + ret = mem_cgroup_charge_common(page, mm, mask, + MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); + } + if (!locked) + unlock_page(page); + + return ret; +} +#endif + void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) { struct page_cgroup *pc; @@ -817,6 +846,9 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) if (mem_cgroup_subsys.disabled) return; + if (PageSwapCache(page)) + return; + /* * Check if our page_cgroup is valid */ @@ -825,12 +857,26 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) return; lock_page_cgroup(pc); - if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED && page_mapped(page)) - || !PageCgroupUsed(pc)) { - /* This happens at race in zap_pte_range() and do_swap_page()*/ - unlock_page_cgroup(pc); - return; + + if (!PageCgroupUsed(pc)) + goto unlock_out; + + switch (ctype) { + case MEM_CGROUP_CHARGE_TYPE_MAPPED: + if (page_mapped(page)) + goto unlock_out; + break; + case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: + if (!PageAnon(page)) { /* Shared memory */ + if (page->mapping && !page_is_file_cache(page)) + goto unlock_out; + } else if (page_mapped(page)) /* Anon */ + goto unlock_out; + break; + default: + break; } + ClearPageCgroupUsed(pc); mem = pc->mem_cgroup; @@ -844,6 +890,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) css_put(&mem->css); return; + +unlock_out: + unlock_page_cgroup(pc); + return; } void mem_cgroup_uncharge_page(struct page *page) @@ -863,6 +913,11 @@ void mem_cgroup_uncharge_cache_page(struct page *page) __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); } +void mem_cgroup_uncharge_swapcache(struct page *page) +{ + __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_SWAPOUT); +} + /* * Before starting migration, account PAGE_SIZE to mem_cgroup that the old * page belongs to. @@ -920,7 +975,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; /* unused page is not on radix-tree now. */ - if (unused && ctype != MEM_CGROUP_CHARGE_TYPE_MAPPED) + if (unused) __mem_cgroup_uncharge_common(unused, ctype); pc = lookup_page_cgroup(target); diff --git a/mm/shmem.c b/mm/shmem.c index bd9b4ea307b..adf5c3eedbc 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -928,8 +928,12 @@ found: error = 1; if (!inode) goto out; - /* Charge page using GFP_HIGHUSER_MOVABLE while we can wait */ - error = mem_cgroup_cache_charge(page, current->mm, GFP_HIGHUSER_MOVABLE); + /* + * Charge page using GFP_HIGHUSER_MOVABLE while we can wait. + * charged back to the user(not to caller) when swap account is used. + */ + error = mem_cgroup_cache_charge_swapin(page, + current->mm, GFP_HIGHUSER_MOVABLE, true); if (error) goto out; error = radix_tree_preload(GFP_KERNEL); @@ -1266,6 +1270,16 @@ repeat: goto repeat; } wait_on_page_locked(swappage); + /* + * We want to avoid charge at add_to_page_cache(). + * charge against this swap cache here. + */ + if (mem_cgroup_cache_charge_swapin(swappage, + current->mm, gfp, false)) { + page_cache_release(swappage); + error = -ENOMEM; + goto failed; + } page_cache_release(swappage); goto repeat; } diff --git a/mm/swap_state.c b/mm/swap_state.c index 81c825f67a7..09291ca11f5 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -118,6 +118,7 @@ void __delete_from_swap_cache(struct page *page) total_swapcache_pages--; __dec_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(del_total); + mem_cgroup_uncharge_swapcache(page); } /** -- cgit v1.2.3-70-g09d2 From c077719be8e9e6b55702117513d1b5f41d80404a Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:57 -0800 Subject: memcg: mem+swap controller Kconfig Config and control variable for mem+swap controller. This patch adds CONFIG_CGROUP_MEM_RES_CTLR_SWAP (memory resource controller swap extension.) For accounting swap, it's obvious that we have to use additional memory to remember "who uses swap". This adds more overhead. So, it's better to offer "choice" to users. This patch adds 2 choices. This patch adds 2 parameters to enable swap extension or not. - CONFIG - boot option Reviewed-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 +++ include/linux/memcontrol.h | 3 +++ init/Kconfig | 17 +++++++++++++++++ mm/memcontrol.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 532eacbbed6..fb849020aea 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1562,6 +1562,9 @@ and is between 256 and 4096 characters. It is defined in the file nosoftlockup [KNL] Disable the soft-lockup detector. + noswapaccount [KNL] Disable accounting of swap in memory resource + controller. (See Documentation/controllers/memory.txt) + nosync [HW,M68K] Disables sync negotiation for all devices. notsc [BUGS=X86-32] Disable Time Stamp Counter diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b095f5f6ecf..41b46cc9d1f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -77,6 +77,9 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +extern int do_swap_account; +#endif #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; diff --git a/init/Kconfig b/init/Kconfig index 7cbe1f43ca2..a724a149bf3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -428,6 +428,23 @@ config CGROUP_MEM_RES_CTLR config MM_OWNER bool +config CGROUP_MEM_RES_CTLR_SWAP + bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)" + depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL + help + Add swap management feature to memory resource controller. When you + enable this, you can limit mem+swap usage per cgroup. In other words, + when you disable this, memory resource controller has no cares to + usage of swap...a process can exhaust all of the swap. This extension + is useful when you want to avoid exhaustion swap but this itself + adds more overheads and consumes memory for remembering information. + Especially if you use 32bit system or small memory system, please + be careful about enabling this. When memory resource controller + is disabled by boot option, this will be automatically disabled and + there will be no overhead from this. Even when you set this config=y, + if boot option "noswapaccount" is set, swap will not be accounted. + + endmenu config SYSFS_DEPRECATED diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7288e9d85ca..59dd8c11637 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -41,6 +41,15 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly; #define MEM_CGROUP_RECLAIM_RETRIES 5 +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +/* Turned on only when memory cgroup is enabled && really_do_swap_account = 0 */ +int do_swap_account __read_mostly; +static int really_do_swap_account __initdata = 1; /* for remember boot option*/ +#else +#define do_swap_account (0) +#endif + + /* * Statistics for memory cgroup. */ @@ -1404,6 +1413,18 @@ static void mem_cgroup_free(struct mem_cgroup *mem) } +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +static void __init enable_swap_cgroup(void) +{ + if (!mem_cgroup_subsys.disabled && really_do_swap_account) + do_swap_account = 1; +} +#else +static void __init enable_swap_cgroup(void) +{ +} +#endif + static struct cgroup_subsys_state * mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { @@ -1419,6 +1440,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) goto free_out; + /* root ? */ + if (cont->parent == NULL) + enable_swap_cgroup(); return &mem->css; free_out: @@ -1490,3 +1514,13 @@ struct cgroup_subsys mem_cgroup_subsys = { .attach = mem_cgroup_move_task, .early_init = 0, }; + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP + +static int __init disable_swap_account(char *s) +{ + really_do_swap_account = 0; + return 1; +} +__setup("noswapaccount", disable_swap_account); +#endif -- cgit v1.2.3-70-g09d2 From 8c7c6e34a1256a5082d38c8e9bd1474476912715 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:00 -0800 Subject: memcg: mem+swap controller core This patch implements per cgroup limit for usage of memory+swap. However there are SwapCache, double counting of swap-cache and swap-entry is avoided. Mem+Swap controller works as following. - memory usage is limited by memory.limit_in_bytes. - memory + swap usage is limited by memory.memsw_limit_in_bytes. This has following benefits. - A user can limit total resource usage of mem+swap. Without this, because memory resource controller doesn't take care of usage of swap, a process can exhaust all the swap (by memory leak.) We can avoid this case. And Swap is shared resource but it cannot be reclaimed (goes back to memory) until it's used. This characteristic can be trouble when the memory is divided into some parts by cpuset or memcg. Assume group A and group B. After some application executes, the system can be.. Group A -- very large free memory space but occupy 99% of swap. Group B -- under memory shortage but cannot use swap...it's nearly full. Ability to set appropriate swap limit for each group is required. Maybe someone wonder "why not swap but mem+swap ?" - The global LRU(kswapd) can swap out arbitrary pages. Swap-out means to move account from memory to swap...there is no change in usage of mem+swap. In other words, when we want to limit the usage of swap without affecting global LRU, mem+swap limit is better than just limiting swap. Accounting target information is stored in swap_cgroup which is per swap entry record. Charge is done as following. map - charge page and memsw. unmap - uncharge page/memsw if not SwapCache. swap-out (__delete_from_swap_cache) - uncharge page - record mem_cgroup information to swap_cgroup. swap-in (do_swap_page) - charged as page and memsw. record in swap_cgroup is cleared. memsw accounting is decremented. swap-free (swap_free()) - if swap entry is freed, memsw is uncharged by PAGE_SIZE. There are people work under never-swap environments and consider swap as something bad. For such people, this mem+swap controller extension is just an overhead. This overhead is avoided by config or boot option. (see Kconfig. detail is not in this patch.) TODO: - maybe more optimization can be don in swap-in path. (but not very safe.) But we just do simple accounting at this stage. [nishimura@mxp.nes.nec.co.jp: make resize limit hold mutex] [hugh@veritas.com: memswap controller core swapcache fixes] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Daisuke Nishimura Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/memory.txt | 29 ++- include/linux/memcontrol.h | 11 +- include/linux/swap.h | 14 +- mm/memcontrol.c | 400 +++++++++++++++++++++++++++++++---- mm/memory.c | 18 +- mm/swap_state.c | 5 +- mm/swapfile.c | 11 +- mm/vmscan.c | 6 +- 8 files changed, 440 insertions(+), 54 deletions(-) (limited to 'Documentation') diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 9fe2d0eabe0..05fe29ab1e5 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -137,12 +137,32 @@ behind this approach is that a cgroup that aggressively uses a shared page will eventually get charged for it (once it is uncharged from the cgroup that brought it in -- this will happen on memory pressure). -Exception: When you do swapoff and make swapped-out pages of shmem(tmpfs) to +Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.. +When you do swapoff and make swapped-out pages of shmem(tmpfs) to be backed into memory in force, charges for pages are accounted against the caller of swapoff rather than the users of shmem. -2.4 Reclaim +2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP) +Swap Extension allows you to record charge for swap. A swapped-in page is +charged back to original page allocator if possible. + +When swap is accounted, following files are added. + - memory.memsw.usage_in_bytes. + - memory.memsw.limit_in_bytes. + +usage of mem+swap is limited by memsw.limit_in_bytes. + +Note: why 'mem+swap' rather than swap. +The global LRU(kswapd) can swap out arbitrary pages. Swap-out means +to move account from memory to swap...there is no change in usage of +mem+swap. + +In other words, when we want to limit the usage of swap without affecting +global LRU, mem+swap limit is better than just limiting swap from OS point +of view. + +2.5 Reclaim Each cgroup maintains a per cgroup LRU that consists of an active and inactive list. When a cgroup goes over its limit, we first try @@ -246,6 +266,11 @@ Such charges are freed(at default) or moved to its parent. When moved, both of RSS and CACHES are moved to parent. If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also. +Charges recorded in swap information is not updated at removal of cgroup. +Recorded information is discarded and a cgroup which uses swap (swapcache) +will be charged as a new owner of it. + + 5. Misc. interfaces. 5.1 force_empty diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 41b46cc9d1f..ca51ac72d6c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -32,6 +32,8 @@ extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, /* for swap handling */ extern int mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **ptr); +extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, + struct page *page, gfp_t mask, struct mem_cgroup **ptr); extern void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr); extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); @@ -80,7 +82,6 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; #endif - #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -97,7 +98,13 @@ static inline int mem_cgroup_cache_charge(struct page *page, } static inline int mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **ptr) + gfp_t gfp_mask, struct mem_cgroup **ptr) +{ + return 0; +} + +static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm, + struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr) { return 0; } diff --git a/include/linux/swap.h b/include/linux/swap.h index f8f3907533f..be938ce4895 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -214,7 +214,7 @@ static inline void lru_cache_add_active_file(struct page *page) extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, - gfp_t gfp_mask); + gfp_t gfp_mask, bool noswap); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; @@ -336,7 +336,7 @@ static inline void disable_swap_token(void) #ifdef CONFIG_CGROUP_MEM_RES_CTLR extern int mem_cgroup_cache_charge_swapin(struct page *page, struct mm_struct *mm, gfp_t mask, bool locked); -extern void mem_cgroup_uncharge_swapcache(struct page *page); +extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent); #else static inline int mem_cgroup_cache_charge_swapin(struct page *page, @@ -344,7 +344,15 @@ int mem_cgroup_cache_charge_swapin(struct page *page, { return 0; } -static inline void mem_cgroup_uncharge_swapcache(struct page *page) +static inline void +mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) +{ +} +#endif +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +extern void mem_cgroup_uncharge_swap(swp_entry_t ent); +#else +static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) { } #endif diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 59dd8c11637..2efcf38f3b7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -131,6 +132,10 @@ struct mem_cgroup { * the counter to account for memory usage */ struct res_counter res; + /* + * the counter to account for mem+swap usage. + */ + struct res_counter memsw; /* * Per cgroup active and inactive list, similar to the * per zone LRU lists. @@ -138,6 +143,8 @@ struct mem_cgroup { struct mem_cgroup_lru_info info; int prev_priority; /* for recording reclaim priority */ + int obsolete; + atomic_t refcnt; /* * statistics. This must be placed at the end of memcg. */ @@ -167,6 +174,17 @@ pcg_default_flags[NR_CHARGE_TYPE] = { 0, /* FORCE */ }; + +/* for encoding cft->private value on file */ +#define _MEM (0) +#define _MEMSWAP (1) +#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) +#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff) +#define MEMFILE_ATTR(val) ((val) & 0xffff) + +static void mem_cgroup_get(struct mem_cgroup *mem); +static void mem_cgroup_put(struct mem_cgroup *mem); + /* * Always modified under lru lock. Then, not necessary to preempt_disable() */ @@ -485,7 +503,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, * oom-killer can be invoked. */ static int __mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **memcg, bool oom) + gfp_t gfp_mask, struct mem_cgroup **memcg, + bool oom) { struct mem_cgroup *mem; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; @@ -513,12 +532,25 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, css_get(&mem->css); } + while (1) { + int ret; + bool noswap = false; - while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) { + ret = res_counter_charge(&mem->res, PAGE_SIZE); + if (likely(!ret)) { + if (!do_swap_account) + break; + ret = res_counter_charge(&mem->memsw, PAGE_SIZE); + if (likely(!ret)) + break; + /* mem+swap counter fails */ + res_counter_uncharge(&mem->res, PAGE_SIZE); + noswap = true; + } if (!(gfp_mask & __GFP_WAIT)) goto nomem; - if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) + if (try_to_free_mem_cgroup_pages(mem, gfp_mask, noswap)) continue; /* @@ -527,8 +559,13 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, * moved to swap cache or just unmapped from the cgroup. * Check the limit again to see if the reclaim reduced the * current usage of the cgroup before giving up + * */ - if (res_counter_check_under_limit(&mem->res)) + if (!do_swap_account && + res_counter_check_under_limit(&mem->res)) + continue; + if (do_swap_account && + res_counter_check_under_limit(&mem->memsw)) continue; if (!nr_retries--) { @@ -582,6 +619,8 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, if (unlikely(PageCgroupUsed(pc))) { unlock_page_cgroup(pc); res_counter_uncharge(&mem->res, PAGE_SIZE); + if (do_swap_account) + res_counter_uncharge(&mem->memsw, PAGE_SIZE); css_put(&mem->css); return; } @@ -646,6 +685,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, __mem_cgroup_remove_list(from_mz, pc); css_put(&from->css); res_counter_uncharge(&from->res, PAGE_SIZE); + if (do_swap_account) + res_counter_uncharge(&from->memsw, PAGE_SIZE); pc->mem_cgroup = to; css_get(&to->css); __mem_cgroup_add_list(to_mz, pc, false); @@ -692,8 +733,11 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, /* drop extra refcnt */ css_put(&parent->css); /* uncharge if move fails */ - if (ret) + if (ret) { res_counter_uncharge(&parent->res, PAGE_SIZE); + if (do_swap_account) + res_counter_uncharge(&parent->memsw, PAGE_SIZE); + } return ret; } @@ -791,7 +835,42 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); } +int mem_cgroup_try_charge_swapin(struct mm_struct *mm, + struct page *page, + gfp_t mask, struct mem_cgroup **ptr) +{ + struct mem_cgroup *mem; + swp_entry_t ent; + + if (mem_cgroup_subsys.disabled) + return 0; + + if (!do_swap_account) + goto charge_cur_mm; + + /* + * A racing thread's fault, or swapoff, may have already updated + * the pte, and even removed page from swap cache: return success + * to go on to do_swap_page()'s pte_same() test, which should fail. + */ + if (!PageSwapCache(page)) + return 0; + + ent.val = page_private(page); + + mem = lookup_swap_cgroup(ent); + if (!mem || mem->obsolete) + goto charge_cur_mm; + *ptr = mem; + return __mem_cgroup_try_charge(NULL, mask, ptr, true); +charge_cur_mm: + if (unlikely(!mm)) + mm = &init_mm; + return __mem_cgroup_try_charge(mm, mask, ptr, true); +} + #ifdef CONFIG_SWAP + int mem_cgroup_cache_charge_swapin(struct page *page, struct mm_struct *mm, gfp_t mask, bool locked) { @@ -808,8 +887,28 @@ int mem_cgroup_cache_charge_swapin(struct page *page, * we reach here. */ if (PageSwapCache(page)) { + struct mem_cgroup *mem = NULL; + swp_entry_t ent; + + ent.val = page_private(page); + if (do_swap_account) { + mem = lookup_swap_cgroup(ent); + if (mem && mem->obsolete) + mem = NULL; + if (mem) + mm = NULL; + } ret = mem_cgroup_charge_common(page, mm, mask, - MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); + MEM_CGROUP_CHARGE_TYPE_SHMEM, mem); + + if (!ret && do_swap_account) { + /* avoid double counting */ + mem = swap_cgroup_record(ent, NULL); + if (mem) { + res_counter_uncharge(&mem->memsw, PAGE_SIZE); + mem_cgroup_put(mem); + } + } } if (!locked) unlock_page(page); @@ -828,6 +927,23 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) return; pc = lookup_page_cgroup(page); __mem_cgroup_commit_charge(ptr, pc, MEM_CGROUP_CHARGE_TYPE_MAPPED); + /* + * Now swap is on-memory. This means this page may be + * counted both as mem and swap....double count. + * Fix it by uncharging from memsw. This SwapCache is stable + * because we're still under lock_page(). + */ + if (do_swap_account) { + swp_entry_t ent = {.val = page_private(page)}; + struct mem_cgroup *memcg; + memcg = swap_cgroup_record(ent, NULL); + if (memcg) { + /* If memcg is obsolete, memcg can be != ptr */ + res_counter_uncharge(&memcg->memsw, PAGE_SIZE); + mem_cgroup_put(memcg); + } + + } } void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) @@ -837,6 +953,8 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) if (!mem) return; res_counter_uncharge(&mem->res, PAGE_SIZE); + if (do_swap_account) + res_counter_uncharge(&mem->memsw, PAGE_SIZE); css_put(&mem->css); } @@ -844,29 +962,31 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) /* * uncharge if !page_mapped(page) */ -static void +static struct mem_cgroup * __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) { struct page_cgroup *pc; - struct mem_cgroup *mem; + struct mem_cgroup *mem = NULL; struct mem_cgroup_per_zone *mz; unsigned long flags; if (mem_cgroup_subsys.disabled) - return; + return NULL; if (PageSwapCache(page)) - return; + return NULL; /* * Check if our page_cgroup is valid */ pc = lookup_page_cgroup(page); if (unlikely(!pc || !PageCgroupUsed(pc))) - return; + return NULL; lock_page_cgroup(pc); + mem = pc->mem_cgroup; + if (!PageCgroupUsed(pc)) goto unlock_out; @@ -886,8 +1006,11 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) break; } + res_counter_uncharge(&mem->res, PAGE_SIZE); + if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) + res_counter_uncharge(&mem->memsw, PAGE_SIZE); + ClearPageCgroupUsed(pc); - mem = pc->mem_cgroup; mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); @@ -895,14 +1018,13 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) spin_unlock_irqrestore(&mz->lru_lock, flags); unlock_page_cgroup(pc); - res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); - return; + return mem; unlock_out: unlock_page_cgroup(pc); - return; + return NULL; } void mem_cgroup_uncharge_page(struct page *page) @@ -922,10 +1044,42 @@ void mem_cgroup_uncharge_cache_page(struct page *page) __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); } -void mem_cgroup_uncharge_swapcache(struct page *page) +/* + * called from __delete_from_swap_cache() and drop "page" account. + * memcg information is recorded to swap_cgroup of "ent" + */ +void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) +{ + struct mem_cgroup *memcg; + + memcg = __mem_cgroup_uncharge_common(page, + MEM_CGROUP_CHARGE_TYPE_SWAPOUT); + /* record memcg information */ + if (do_swap_account && memcg) { + swap_cgroup_record(ent, memcg); + mem_cgroup_get(memcg); + } +} + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +/* + * called from swap_entry_free(). remove record in swap_cgroup and + * uncharge "memsw" account. + */ +void mem_cgroup_uncharge_swap(swp_entry_t ent) { - __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_SWAPOUT); + struct mem_cgroup *memcg; + + if (!do_swap_account) + return; + + memcg = swap_cgroup_record(ent, NULL); + if (memcg) { + res_counter_uncharge(&memcg->memsw, PAGE_SIZE); + mem_cgroup_put(memcg); + } } +#endif /* * Before starting migration, account PAGE_SIZE to mem_cgroup that the old @@ -1034,7 +1188,7 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) rcu_read_unlock(); do { - progress = try_to_free_mem_cgroup_pages(mem, gfp_mask); + progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true); progress += res_counter_check_under_limit(&mem->res); } while (!progress && --retry); @@ -1044,26 +1198,84 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) return 0; } +static DEFINE_MUTEX(set_limit_mutex); + static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, - unsigned long long val) + unsigned long long val) { int retry_count = MEM_CGROUP_RECLAIM_RETRIES; int progress; + u64 memswlimit; int ret = 0; - while (res_counter_set_limit(&memcg->res, val)) { + while (retry_count) { if (signal_pending(current)) { ret = -EINTR; break; } - if (!retry_count) { - ret = -EBUSY; + /* + * Rather than hide all in some function, I do this in + * open coded manner. You see what this really does. + * We have to guarantee mem->res.limit < mem->memsw.limit. + */ + mutex_lock(&set_limit_mutex); + memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); + if (memswlimit < val) { + ret = -EINVAL; + mutex_unlock(&set_limit_mutex); break; } + ret = res_counter_set_limit(&memcg->res, val); + mutex_unlock(&set_limit_mutex); + + if (!ret) + break; + progress = try_to_free_mem_cgroup_pages(memcg, - GFP_HIGHUSER_MOVABLE); - if (!progress) + GFP_HIGHUSER_MOVABLE, false); + if (!progress) retry_count--; + } + return ret; +} + +int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, + unsigned long long val) +{ + int retry_count = MEM_CGROUP_RECLAIM_RETRIES; + u64 memlimit, oldusage, curusage; + int ret; + + if (!do_swap_account) + return -EINVAL; + + while (retry_count) { + if (signal_pending(current)) { + ret = -EINTR; + break; + } + /* + * Rather than hide all in some function, I do this in + * open coded manner. You see what this really does. + * We have to guarantee mem->res.limit < mem->memsw.limit. + */ + mutex_lock(&set_limit_mutex); + memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT); + if (memlimit > val) { + ret = -EINVAL; + mutex_unlock(&set_limit_mutex); + break; + } + ret = res_counter_set_limit(&memcg->memsw, val); + mutex_unlock(&set_limit_mutex); + + if (!ret) + break; + + oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); + try_to_free_mem_cgroup_pages(memcg, GFP_HIGHUSER_MOVABLE, true); + curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); + if (curusage >= oldusage) retry_count--; } return ret; @@ -1193,7 +1405,7 @@ try_to_free: goto out; } progress = try_to_free_mem_cgroup_pages(mem, - GFP_HIGHUSER_MOVABLE); + GFP_HIGHUSER_MOVABLE, false); if (!progress) { nr_retries--; /* maybe some writeback is necessary */ @@ -1216,8 +1428,25 @@ int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) { - return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, - cft->private); + struct mem_cgroup *mem = mem_cgroup_from_cont(cont); + u64 val = 0; + int type, name; + + type = MEMFILE_TYPE(cft->private); + name = MEMFILE_ATTR(cft->private); + switch (type) { + case _MEM: + val = res_counter_read_u64(&mem->res, name); + break; + case _MEMSWAP: + if (do_swap_account) + val = res_counter_read_u64(&mem->memsw, name); + break; + default: + BUG(); + break; + } + return val; } /* * The user of this function is... @@ -1227,15 +1456,22 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, const char *buffer) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + int type, name; unsigned long long val; int ret; - switch (cft->private) { + type = MEMFILE_TYPE(cft->private); + name = MEMFILE_ATTR(cft->private); + switch (name) { case RES_LIMIT: /* This function does all necessary parse...reuse it */ ret = res_counter_memparse_write_strategy(buffer, &val); - if (!ret) + if (ret) + break; + if (type == _MEM) ret = mem_cgroup_resize_limit(memcg, val); + else + ret = mem_cgroup_resize_memsw_limit(memcg, val); break; default: ret = -EINVAL; /* should be BUG() ? */ @@ -1247,14 +1483,23 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) { struct mem_cgroup *mem; + int type, name; mem = mem_cgroup_from_cont(cont); - switch (event) { + type = MEMFILE_TYPE(event); + name = MEMFILE_ATTR(event); + switch (name) { case RES_MAX_USAGE: - res_counter_reset_max(&mem->res); + if (type == _MEM) + res_counter_reset_max(&mem->res); + else + res_counter_reset_max(&mem->memsw); break; case RES_FAILCNT: - res_counter_reset_failcnt(&mem->res); + if (type == _MEM) + res_counter_reset_failcnt(&mem->res); + else + res_counter_reset_failcnt(&mem->memsw); break; } return 0; @@ -1315,24 +1560,24 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", - .private = RES_USAGE, + .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), .read_u64 = mem_cgroup_read, }, { .name = "max_usage_in_bytes", - .private = RES_MAX_USAGE, + .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, { .name = "limit_in_bytes", - .private = RES_LIMIT, + .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), .write_string = mem_cgroup_write, .read_u64 = mem_cgroup_read, }, { .name = "failcnt", - .private = RES_FAILCNT, + .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, @@ -1346,6 +1591,47 @@ static struct cftype mem_cgroup_files[] = { }, }; +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +static struct cftype memsw_cgroup_files[] = { + { + .name = "memsw.usage_in_bytes", + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), + .read_u64 = mem_cgroup_read, + }, + { + .name = "memsw.max_usage_in_bytes", + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), + .trigger = mem_cgroup_reset, + .read_u64 = mem_cgroup_read, + }, + { + .name = "memsw.limit_in_bytes", + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), + .write_string = mem_cgroup_write, + .read_u64 = mem_cgroup_read, + }, + { + .name = "memsw.failcnt", + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), + .trigger = mem_cgroup_reset, + .read_u64 = mem_cgroup_read, + }, +}; + +static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) +{ + if (!do_swap_account) + return 0; + return cgroup_add_files(cont, ss, memsw_cgroup_files, + ARRAY_SIZE(memsw_cgroup_files)); +}; +#else +static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) +{ + return 0; +} +#endif + static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) { struct mem_cgroup_per_node *pn; @@ -1404,14 +1690,44 @@ static struct mem_cgroup *mem_cgroup_alloc(void) return mem; } +/* + * At destroying mem_cgroup, references from swap_cgroup can remain. + * (scanning all at force_empty is too costly...) + * + * Instead of clearing all references at force_empty, we remember + * the number of reference from swap_cgroup and free mem_cgroup when + * it goes down to 0. + * + * When mem_cgroup is destroyed, mem->obsolete will be set to 0 and + * entry which points to this memcg will be ignore at swapin. + * + * Removal of cgroup itself succeeds regardless of refs from swap. + */ + static void mem_cgroup_free(struct mem_cgroup *mem) { + if (atomic_read(&mem->refcnt) > 0) + return; if (mem_cgroup_size() < PAGE_SIZE) kfree(mem); else vfree(mem); } +static void mem_cgroup_get(struct mem_cgroup *mem) +{ + atomic_inc(&mem->refcnt); +} + +static void mem_cgroup_put(struct mem_cgroup *mem) +{ + if (atomic_dec_and_test(&mem->refcnt)) { + if (!mem->obsolete) + return; + mem_cgroup_free(mem); + } +} + #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP static void __init enable_swap_cgroup(void) @@ -1436,6 +1752,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) return ERR_PTR(-ENOMEM); res_counter_init(&mem->res); + res_counter_init(&mem->memsw); for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) @@ -1456,6 +1773,7 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, struct cgroup *cont) { struct mem_cgroup *mem = mem_cgroup_from_cont(cont); + mem->obsolete = 1; mem_cgroup_force_empty(mem, false); } @@ -1474,8 +1792,14 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss, static int mem_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) { - return cgroup_add_files(cont, ss, mem_cgroup_files, - ARRAY_SIZE(mem_cgroup_files)); + int ret; + + ret = cgroup_add_files(cont, ss, mem_cgroup_files, + ARRAY_SIZE(mem_cgroup_files)); + + if (!ret) + ret = register_memsw_files(cont, ss); + return ret; } static void mem_cgroup_move_task(struct cgroup_subsys *ss, diff --git a/mm/memory.c b/mm/memory.c index ba5189e322e..1358012ffa7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2431,7 +2431,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, lock_page(page); delayacct_clear_flag(DELAYACCT_PF_SWAPIN); - if (mem_cgroup_try_charge(mm, GFP_HIGHUSER_MOVABLE, &ptr) == -ENOMEM) { + if (mem_cgroup_try_charge_swapin(mm, page, + GFP_HIGHUSER_MOVABLE, &ptr) == -ENOMEM) { ret = VM_FAULT_OOM; unlock_page(page); goto out; @@ -2449,8 +2450,20 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_nomap; } - /* The page isn't present yet, go ahead with the fault. */ + /* + * The page isn't present yet, go ahead with the fault. + * + * Be careful about the sequence of operations here. + * To get its accounting right, reuse_swap_page() must be called + * while the page is counted on swap but not yet in mapcount i.e. + * before page_add_anon_rmap() and swap_free(); try_to_free_swap() + * must be called after the swap_free(), or it will never succeed. + * And mem_cgroup_commit_charge_swapin(), which uses the swp_entry + * in page->private, must be called before reuse_swap_page(), + * which may delete_from_swap_cache(). + */ + mem_cgroup_commit_charge_swapin(page, ptr); inc_mm_counter(mm, anon_rss); pte = mk_pte(page, vma->vm_page_prot); if (write_access && reuse_swap_page(page)) { @@ -2461,7 +2474,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, flush_icache_page(vma, page); set_pte_at(mm, address, page_table, pte); page_add_anon_rmap(page, vma, address); - mem_cgroup_commit_charge_swapin(page, ptr); swap_free(entry); if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) diff --git a/mm/swap_state.c b/mm/swap_state.c index 09291ca11f5..3ecea98ecb4 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -108,6 +109,8 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) */ void __delete_from_swap_cache(struct page *page) { + swp_entry_t ent = {.val = page_private(page)}; + VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(!PageSwapCache(page)); VM_BUG_ON(PageWriteback(page)); @@ -118,7 +121,7 @@ void __delete_from_swap_cache(struct page *page) total_swapcache_pages--; __dec_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(del_total); - mem_cgroup_uncharge_swapcache(page); + mem_cgroup_uncharge_swapcache(page, ent); } /** diff --git a/mm/swapfile.c b/mm/swapfile.c index 1e7a715a386..0579d9069b6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -471,8 +471,9 @@ out: return NULL; } -static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) +static int swap_entry_free(struct swap_info_struct *p, swp_entry_t ent) { + unsigned long offset = swp_offset(ent); int count = p->swap_map[offset]; if (count < SWAP_MAP_MAX) { @@ -487,6 +488,7 @@ static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) swap_list.next = p - swap_info; nr_swap_pages++; p->inuse_pages--; + mem_cgroup_uncharge_swap(ent); } } return count; @@ -502,7 +504,7 @@ void swap_free(swp_entry_t entry) p = swap_info_get(entry); if (p) { - swap_entry_free(p, swp_offset(entry)); + swap_entry_free(p, entry); spin_unlock(&swap_lock); } } @@ -582,7 +584,7 @@ int free_swap_and_cache(swp_entry_t entry) p = swap_info_get(entry); if (p) { - if (swap_entry_free(p, swp_offset(entry)) == 1) { + if (swap_entry_free(p, entry) == 1) { page = find_get_page(&swapper_space, entry.val); if (page && !trylock_page(page)) { page_cache_release(page); @@ -696,7 +698,8 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, pte_t *pte; int ret = 1; - if (mem_cgroup_try_charge(vma->vm_mm, GFP_HIGHUSER_MOVABLE, &ptr)) + if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, + GFP_HIGHUSER_MOVABLE, &ptr)) ret = -ENOMEM; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); diff --git a/mm/vmscan.c b/mm/vmscan.c index b07c48b09a9..f63b20dd771 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1661,7 +1661,8 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, - gfp_t gfp_mask) + gfp_t gfp_mask, + bool noswap) { struct scan_control sc = { .may_writepage = !laptop_mode, @@ -1674,6 +1675,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, }; struct zonelist *zonelist; + if (noswap) + sc.may_swap = 0; + sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); zonelist = NODE_DATA(numa_node_id())->node_zonelists; -- cgit v1.2.3-70-g09d2 From 52bc0d82100cd896213a9a25ec01c1ba87b939db Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 7 Jan 2009 18:08:03 -0800 Subject: memcg: memory cgroup hierarchy documentation Documentation updates for hierarchy support Signed-off-by: Balbir Singh Cc: YAMAMOTO Takashi Cc: Paul Menage Cc: Li Zefan Cc: David Rientjes Cc: Pavel Emelianov Cc: Dhaval Giani Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/memory.txt | 38 +++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 05fe29ab1e5..09e1c737d28 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -289,8 +289,44 @@ will be charged as a new owner of it. Because rmdir() moves all pages to parent, some out-of-use page caches can be moved to the parent. If you want to avoid that, force_empty will be useful. +6. Hierarchy support -6. TODO +The memory controller supports a deep hierarchy and hierarchical accounting. +The hierarchy is created by creating the appropriate cgroups in the +cgroup filesystem. Consider for example, the following cgroup filesystem +hierarchy + + root + / | \ + / | \ + a b c + | \ + | \ + d e + +In the diagram above, with hierarchical accounting enabled, all memory +usage of e, is accounted to its ancestors up until the root (i.e, c and root), +that has memory.use_hierarchy enabled. If one of the ancestors goes over its +limit, the reclaim algorithm reclaims from the tasks in the ancestor and the +children of the ancestor. + +6.1 Enabling hierarchical accounting and reclaim + +The memory controller by default disables the hierarchy feature. Support +can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup + +# echo 1 > memory.use_hierarchy + +The feature can be disabled by + +# echo 0 > memory.use_hierarchy + +NOTE1: Enabling/disabling will fail if the cgroup already has other +cgroups created below it. + +NOTE2: This feature can be enabled/disabled per subtree. + +7. TODO 1. Add support for accounting huge pages (as a separate controller) 2. Make per-cgroup scanner reclaim not-shared pages first -- cgit v1.2.3-70-g09d2 From 7f016ee8b6a9a43f768e6252021f169abec4fa1f Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:22 -0800 Subject: memcg: show reclaim stat Add the following four fields to memory.stat file: - inactive_ratio - recent_rotated_anon - recent_rotated_file - recent_scanned_anon - recent_scanned_file Acked-by: Rik van Riel Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/memory.txt | 25 +++++++++++++++++++++++++ mm/memcontrol.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'Documentation') diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 09e1c737d28..d71745cc2f0 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -289,6 +289,31 @@ will be charged as a new owner of it. Because rmdir() moves all pages to parent, some out-of-use page caches can be moved to the parent. If you want to avoid that, force_empty will be useful. +5.2 stat file + memory.stat file includes following statistics (now) + cache - # of pages from page-cache and shmem. + rss - # of pages from anonymous memory. + pgpgin - # of event of charging + pgpgout - # of event of uncharging + active_anon - # of pages on active lru of anon, shmem. + inactive_anon - # of pages on active lru of anon, shmem + active_file - # of pages on active lru of file-cache + inactive_file - # of pages on inactive lru of file cache + unevictable - # of pages cannot be reclaimed.(mlocked etc) + + Below is depend on CONFIG_DEBUG_VM. + inactive_ratio - VM inernal parameter. (see mm/page_alloc.c) + recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) + recent_rotated_file - VM internal parameter. (see mm/vmscan.c) + recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) + recent_scanned_file - VM internal parameter. (see mm/vmscan.c) + + Memo: + recent_rotated means recent frequency of lru rotation. + recent_scanned means recent # of scans to lru. + showing for better debug please see the code for meanings. + + 6. Hierarchy support The memory controller supports a deep hierarchy and hierarchical accounting. diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b8c1e5acc25..af28e128b74 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1816,6 +1816,36 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, cb->fill(cb, "unevictable", unevictable * PAGE_SIZE); } + +#ifdef CONFIG_DEBUG_VM + cb->fill(cb, "inactive_ratio", mem_cont->inactive_ratio); + + { + int nid, zid; + struct mem_cgroup_per_zone *mz; + unsigned long recent_rotated[2] = {0, 0}; + unsigned long recent_scanned[2] = {0, 0}; + + for_each_online_node(nid) + for (zid = 0; zid < MAX_NR_ZONES; zid++) { + mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); + + recent_rotated[0] += + mz->reclaim_stat.recent_rotated[0]; + recent_rotated[1] += + mz->reclaim_stat.recent_rotated[1]; + recent_scanned[0] += + mz->reclaim_stat.recent_scanned[0]; + recent_scanned[1] += + mz->reclaim_stat.recent_scanned[1]; + } + cb->fill(cb, "recent_rotated_anon", recent_rotated[0]); + cb->fill(cb, "recent_rotated_file", recent_rotated[1]); + cb->fill(cb, "recent_scanned_anon", recent_scanned[0]); + cb->fill(cb, "recent_scanned_file", recent_scanned[1]); + } +#endif + return 0; } -- cgit v1.2.3-70-g09d2 From a7885eb8ad465ec9db99ac5b5e6680f0ca8e11c8 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:24 -0800 Subject: memcg: swappiness Currently, /proc/sys/vm/swappiness can change swappiness ratio for global reclaim. However, memcg reclaim doesn't have tuning parameter for itself. In general, the optimal swappiness depend on workload. (e.g. hpc workload need to low swappiness than the others.) Then, per cgroup swappiness improve administrator tunability. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/memory.txt | 9 +++++ include/linux/swap.h | 3 +- mm/memcontrol.c | 78 ++++++++++++++++++++++++++++++++---- mm/vmscan.c | 7 ++-- 4 files changed, 86 insertions(+), 11 deletions(-) (limited to 'Documentation') diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index d71745cc2f0..e1501964df1 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -314,6 +314,15 @@ will be charged as a new owner of it. showing for better debug please see the code for meanings. +5.3 swappiness + Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. + + Following cgroup's swapiness can't be changed. + - root cgroup (uses /proc/sys/vm/swappiness). + - a cgroup which uses hierarchy and it has child cgroup. + - a cgroup which uses hierarchy and not the root of hierarchy. + + 6. Hierarchy support The memory controller supports a deep hierarchy and hierarchical accounting. diff --git a/include/linux/swap.h b/include/linux/swap.h index be938ce4895..4ccca25d0f0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -214,7 +214,8 @@ static inline void lru_cache_add_active_file(struct page *page) extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, - gfp_t gfp_mask, bool noswap); + gfp_t gfp_mask, bool noswap, + unsigned int swappiness); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 027c0dd7a83..ab2ecbb95b8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -164,6 +164,9 @@ struct mem_cgroup { int obsolete; atomic_t refcnt; + unsigned int swappiness; + + unsigned int inactive_ratio; /* @@ -636,6 +639,22 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) return false; } +static unsigned int get_swappiness(struct mem_cgroup *memcg) +{ + struct cgroup *cgrp = memcg->css.cgroup; + unsigned int swappiness; + + /* root ? */ + if (cgrp->parent == NULL) + return vm_swappiness; + + spin_lock(&memcg->reclaim_param_lock); + swappiness = memcg->swappiness; + spin_unlock(&memcg->reclaim_param_lock); + + return swappiness; +} + /* * Dance down the hierarchy if needed to reclaim memory. We remember the * last child we reclaimed from, so that we don't end up penalizing @@ -656,7 +675,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, * but there might be left over accounting, even after children * have left. */ - ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap); + ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, + get_swappiness(root_mem)); if (mem_cgroup_check_under_limit(root_mem)) return 0; if (!root_mem->use_hierarchy) @@ -672,7 +692,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, cgroup_unlock(); continue; } - ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap); + ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, + get_swappiness(next_mem)); if (mem_cgroup_check_under_limit(root_mem)) return 0; cgroup_lock(); @@ -1400,7 +1421,8 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) rcu_read_unlock(); do { - progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true); + progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true, + get_swappiness(mem)); progress += mem_cgroup_check_under_limit(mem); } while (!progress && --retry); @@ -1468,7 +1490,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, break; progress = try_to_free_mem_cgroup_pages(memcg, - GFP_KERNEL, false); + GFP_KERNEL, + false, + get_swappiness(memcg)); if (!progress) retry_count--; } @@ -1512,7 +1536,8 @@ int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, break; oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); - try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true); + try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true, + get_swappiness(memcg)); curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); if (curusage >= oldusage) retry_count--; @@ -1643,8 +1668,8 @@ try_to_free: ret = -EINTR; goto out; } - progress = try_to_free_mem_cgroup_pages(mem, - GFP_KERNEL, false); + progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, + false, get_swappiness(mem)); if (!progress) { nr_retries--; /* maybe some writeback is necessary */ @@ -1864,6 +1889,37 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, return 0; } +static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft) +{ + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); + + return get_swappiness(memcg); +} + +static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, + u64 val) +{ + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); + struct mem_cgroup *parent; + if (val > 100) + return -EINVAL; + + if (cgrp->parent == NULL) + return -EINVAL; + + parent = mem_cgroup_from_cont(cgrp->parent); + /* If under hierarchy, only empty-root can set this value */ + if ((parent->use_hierarchy) || + (memcg->use_hierarchy && !list_empty(&cgrp->children))) + return -EINVAL; + + spin_lock(&memcg->reclaim_param_lock); + memcg->swappiness = val; + spin_unlock(&memcg->reclaim_param_lock); + + return 0; +} + static struct cftype mem_cgroup_files[] = { { @@ -1902,6 +1958,11 @@ static struct cftype mem_cgroup_files[] = { .write_u64 = mem_cgroup_hierarchy_write, .read_u64 = mem_cgroup_hierarchy_read, }, + { + .name = "swappiness", + .read_u64 = mem_cgroup_swappiness_read, + .write_u64 = mem_cgroup_swappiness_write, + }, }; #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP @@ -2093,6 +2154,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) mem->last_scanned_child = NULL; spin_lock_init(&mem->reclaim_param_lock); + if (parent) + mem->swappiness = get_swappiness(parent); + return &mem->css; free_out: for_each_node_state(node, N_POSSIBLE) diff --git a/mm/vmscan.c b/mm/vmscan.c index f03c239440a..ece2f405187 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1707,14 +1707,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, - gfp_t gfp_mask, - bool noswap) + gfp_t gfp_mask, + bool noswap, + unsigned int swappiness) { struct scan_control sc = { .may_writepage = !laptop_mode, .may_swap = 1, .swap_cluster_max = SWAP_CLUSTER_MAX, - .swappiness = vm_swappiness, + .swappiness = swappiness, .order = 0, .mem_cgroup = mem_cont, .isolate_pages = mem_cgroup_isolate_pages, -- cgit v1.2.3-70-g09d2 From 9836d89191edd4887ed026a9ce53d9dfac62ec1c Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:27 -0800 Subject: memcg: explain details and test document Documentation for implementation details and how to test. Just an example. feel free to modify, add, remove lines. Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/memcg_test.txt | 311 +++++++++++++++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100644 Documentation/controllers/memcg_test.txt (limited to 'Documentation') diff --git a/Documentation/controllers/memcg_test.txt b/Documentation/controllers/memcg_test.txt new file mode 100644 index 00000000000..c91f69b0b54 --- /dev/null +++ b/Documentation/controllers/memcg_test.txt @@ -0,0 +1,311 @@ +Memory Resource Controller(Memcg) Implementation Memo. +Last Updated: 2008/12/10 +Base Kernel Version: based on 2.6.28-rc7-mm. + +Because VM is getting complex (one of reasons is memcg...), memcg's behavior +is complex. This is a document for memcg's internal behavior. +Please note that implementation details can be changed. + +(*) Topics on API should be in Documentation/controllers/memory.txt) + +0. How to record usage ? + 2 objects are used. + + page_cgroup ....an object per page. + Allocated at boot or memory hotplug. Freed at memory hot removal. + + swap_cgroup ... an entry per swp_entry. + Allocated at swapon(). Freed at swapoff(). + + The page_cgroup has USED bit and double count against a page_cgroup never + occurs. swap_cgroup is used only when a charged page is swapped-out. + +1. Charge + + a page/swp_entry may be charged (usage += PAGE_SIZE) at + + mem_cgroup_newpage_charge() + Called at new page fault and Copy-On-Write. + + mem_cgroup_try_charge_swapin() + Called at do_swap_page() (page fault on swap entry) and swapoff. + Followed by charge-commit-cancel protocol. (With swap accounting) + At commit, a charge recorded in swap_cgroup is removed. + + mem_cgroup_cache_charge() + Called at add_to_page_cache() + + mem_cgroup_cache_charge_swapin() + Called at shmem's swapin. + + mem_cgroup_prepare_migration() + Called before migration. "extra" charge is done and followed by + charge-commit-cancel protocol. + At commit, charge against oldpage or newpage will be committed. + +2. Uncharge + a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by + + mem_cgroup_uncharge_page() + Called when an anonymous page is fully unmapped. I.e., mapcount goes + to 0. If the page is SwapCache, uncharge is delayed until + mem_cgroup_uncharge_swapcache(). + + mem_cgroup_uncharge_cache_page() + Called when a page-cache is deleted from radix-tree. If the page is + SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache(). + + mem_cgroup_uncharge_swapcache() + Called when SwapCache is removed from radix-tree. The charge itself + is moved to swap_cgroup. (If mem+swap controller is disabled, no + charge to swap occurs.) + + mem_cgroup_uncharge_swap() + Called when swp_entry's refcnt goes down to 0. A charge against swap + disappears. + + mem_cgroup_end_migration(old, new) + At success of migration old is uncharged (if necessary), a charge + to new page is committed. At failure, charge to old page is committed. + +3. charge-commit-cancel + In some case, we can't know this "charge" is valid or not at charging + (because of races). + To handle such case, there are charge-commit-cancel functions. + mem_cgroup_try_charge_XXX + mem_cgroup_commit_charge_XXX + mem_cgroup_cancel_charge_XXX + these are used in swap-in and migration. + + At try_charge(), there are no flags to say "this page is charged". + at this point, usage += PAGE_SIZE. + + At commit(), the function checks the page should be charged or not + and set flags or avoid charging.(usage -= PAGE_SIZE) + + At cancel(), simply usage -= PAGE_SIZE. + +Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. + +4. Anonymous + Anonymous page is newly allocated at + - page fault into MAP_ANONYMOUS mapping. + - Copy-On-Write. + It is charged right after it's allocated before doing any page table + related operations. Of course, it's uncharged when another page is used + for the fault address. + + At freeing anonymous page (by exit() or munmap()), zap_pte() is called + and pages for ptes are freed one by one.(see mm/memory.c). Uncharges + are done at page_remove_rmap() when page_mapcount() goes down to 0. + + Another page freeing is by page-reclaim (vmscan.c) and anonymous + pages are swapped out. In this case, the page is marked as + PageSwapCache(). uncharge() routine doesn't uncharge the page marked + as SwapCache(). It's delayed until __delete_from_swap_cache(). + + 4.1 Swap-in. + At swap-in, the page is taken from swap-cache. There are 2 cases. + + (a) If the SwapCache is newly allocated and read, it has no charges. + (b) If the SwapCache has been mapped by processes, it has been + charged already. + + In case (a), we charge it. In case (b), we don't charge it. + (But racy state between (a) and (b) exists. We do check it.) + At charging, a charge recorded in swap_cgroup is moved to page_cgroup. + + 4.2 Swap-out. + At swap-out, typical state transition is below. + + (a) add to swap cache. (marked as SwapCache) + swp_entry's refcnt += 1. + (b) fully unmapped. + swp_entry's refcnt += # of ptes. + (c) write back to swap. + (d) delete from swap cache. (remove from SwapCache) + swp_entry's refcnt -= 1. + + + At (b), the page is marked as SwapCache and not uncharged. + At (d), the page is removed from SwapCache and a charge in page_cgroup + is moved to swap_cgroup. + + Finally, at task exit, + (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0. + Here, a charge in swap_cgroup disappears. + +5. Page Cache + Page Cache is charged at + - add_to_page_cache_locked(). + + uncharged at + - __remove_from_page_cache(). + + The logic is very clear. (About migration, see below) + Note: __remove_from_page_cache() is called by remove_from_page_cache() + and __remove_mapping(). + +6. Shmem(tmpfs) Page Cache + Memcg's charge/uncharge have special handlers of shmem. The best way + to understand shmem's page state transition is to read mm/shmem.c. + But brief explanation of the behavior of memcg around shmem will be + helpful to understand the logic. + + Shmem's page (just leaf page, not direct/indirect block) can be on + - radix-tree of shmem's inode. + - SwapCache. + - Both on radix-tree and SwapCache. This happens at swap-in + and swap-out, + + It's charged when... + - A new page is added to shmem's radix-tree. + - A swp page is read. (move a charge from swap_cgroup to page_cgroup) + It's uncharged when + - A page is removed from radix-tree and not SwapCache. + - When SwapCache is removed, a charge is moved to swap_cgroup. + - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup + disappears. + +7. Page Migration + One of the most complicated functions is page-migration-handler. + Memcg has 2 routines. Assume that we are migrating a page's contents + from OLDPAGE to NEWPAGE. + + Usual migration logic is.. + (a) remove the page from LRU. + (b) allocate NEWPAGE (migration target) + (c) lock by lock_page(). + (d) unmap all mappings. + (e-1) If necessary, replace entry in radix-tree. + (e-2) move contents of a page. + (f) map all mappings again. + (g) pushback the page to LRU. + (-) OLDPAGE will be freed. + + Before (g), memcg should complete all necessary charge/uncharge to + NEWPAGE/OLDPAGE. + + The point is.... + - If OLDPAGE is anonymous, all charges will be dropped at (d) because + try_to_unmap() drops all mapcount and the page will not be + SwapCache. + + - If OLDPAGE is SwapCache, charges will be kept at (g) because + __delete_from_swap_cache() isn't called at (e-1) + + - If OLDPAGE is page-cache, charges will be kept at (g) because + remove_from_swap_cache() isn't called at (e-1) + + memcg provides following hooks. + + - mem_cgroup_prepare_migration(OLDPAGE) + Called after (b) to account a charge (usage += PAGE_SIZE) against + memcg which OLDPAGE belongs to. + + - mem_cgroup_end_migration(OLDPAGE, NEWPAGE) + Called after (f) before (g). + If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already + charged, a charge by prepare_migration() is automatically canceled. + If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE. + + But zap_pte() (by exit or munmap) can be called while migration, + we have to check if OLDPAGE/NEWPAGE is a valid page after commit(). + +8. LRU + Each memcg has its own private LRU. Now, it's handling is under global + VM's control (means that it's handled under global zone->lru_lock). + Almost all routines around memcg's LRU is called by global LRU's + list management functions under zone->lru_lock(). + + A special function is mem_cgroup_isolate_pages(). This scans + memcg's private LRU and call __isolate_lru_page() to extract a page + from LRU. + (By __isolate_lru_page(), the page is removed from both of global and + private LRU.) + + +9. Typical Tests. + + Tests for racy cases. + + 9.1 Small limit to memcg. + When you do test to do racy case, it's good test to set memcg's limit + to be very small rather than GB. Many races found in the test under + xKB or xxMB limits. + (Memory behavior under GB and Memory behavior under MB shows very + different situation.) + + 9.2 Shmem + Historically, memcg's shmem handling was poor and we saw some amount + of troubles here. This is because shmem is page-cache but can be + SwapCache. Test with shmem/tmpfs is always good test. + + 9.3 Migration + For NUMA, migration is an another special case. To do easy test, cpuset + is useful. Following is a sample script to do migration. + + mount -t cgroup -o cpuset none /opt/cpuset + + mkdir /opt/cpuset/01 + echo 1 > /opt/cpuset/01/cpuset.cpus + echo 0 > /opt/cpuset/01/cpuset.mems + echo 1 > /opt/cpuset/01/cpuset.memory_migrate + mkdir /opt/cpuset/02 + echo 1 > /opt/cpuset/02/cpuset.cpus + echo 1 > /opt/cpuset/02/cpuset.mems + echo 1 > /opt/cpuset/02/cpuset.memory_migrate + + In above set, when you moves a task from 01 to 02, page migration to + node 0 to node 1 will occur. Following is a script to migrate all + under cpuset. + -- + move_task() + { + for pid in $1 + do + /bin/echo $pid >$2/tasks 2>/dev/null + echo -n $pid + echo -n " " + done + echo END + } + + G1_TASK=`cat ${G1}/tasks` + G2_TASK=`cat ${G2}/tasks` + move_task "${G1_TASK}" ${G2} & + -- + 9.4 Memory hotplug. + memory hotplug test is one of good test. + to offline memory, do following. + # echo offline > /sys/devices/system/memory/memoryXXX/state + (XXX is the place of memory) + This is an easy way to test page migration, too. + + 9.5 mkdir/rmdir + When using hierarchy, mkdir/rmdir test should be done. + Use tests like the following. + + echo 1 >/opt/cgroup/01/memory/use_hierarchy + mkdir /opt/cgroup/01/child_a + mkdir /opt/cgroup/01/child_b + + set limit to 01. + add limit to 01/child_b + run jobs under child_a and child_b + + create/delete following groups at random while jobs are running. + /opt/cgroup/01/child_a/child_aa + /opt/cgroup/01/child_b/child_bb + /opt/cgroup/01/child_c + + running new jobs in new group is also good. + + 9.6 Mount with other subsystems. + Mounting with other subsystems is a good test because there is a + race and lock dependency with other cgroup subsystems. + + example) + # mount -t cgroup none /cgroup -t cpuset,memory,cpu,devices + + and do task move, mkdir, rmdir etc...under this. -- cgit v1.2.3-70-g09d2 From 03f3c433648a97ae7c86be789edba67690f6ea60 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:31 -0800 Subject: memcg: fix swap accounting leak Fix swapin charge operation of memcg. Now, memcg has hooks to swap-out operation and checks SwapCache is really unused or not. That check depends on contents of struct page. I.e. If PageAnon(page) && page_mapped(page), the page is recoginized as still-in-use. Now, reuse_swap_page() calles delete_from_swap_cache() before establishment of any rmap. Then, in followinig sequence (Page fault with WRITE) try_charge() (charge += PAGESIZE) commit_charge() (Check page_cgroup is used or not..) reuse_swap_page() -> delete_from_swapcache() -> mem_cgroup_uncharge_swapcache() (charge -= PAGESIZE) ...... New charge is uncharged soon.... To avoid this, move commit_charge() after page_mapcount() goes up to 1. By this, try_charge() (usage += PAGESIZE) reuse_swap_page() (may usage -= PAGESIZE if PCG_USED is set) commit_charge() (If page_cgroup is not marked as PCG_USED, add new charge.) Accounting will be correct. Changelog (v2) -> (v3) - fixed invalid charge to swp_entry==0. - updated documentation. Changelog (v1) -> (v2) - fixed comment. [nishimura@mxp.nes.nec.co.jp: swap accounting leak doc fix] Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Tested-by: Balbir Singh Cc: Hugh Dickins Cc: Daisuke Nishimura Signed-off-by: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/memcg_test.txt | 41 ++++++++++++++++++++++++++++---- mm/memcontrol.c | 7 +++--- mm/memory.c | 11 +++++---- 3 files changed, 46 insertions(+), 13 deletions(-) (limited to 'Documentation') diff --git a/Documentation/controllers/memcg_test.txt b/Documentation/controllers/memcg_test.txt index c91f69b0b54..08d4d3ea0d7 100644 --- a/Documentation/controllers/memcg_test.txt +++ b/Documentation/controllers/memcg_test.txt @@ -1,6 +1,6 @@ Memory Resource Controller(Memcg) Implementation Memo. -Last Updated: 2008/12/10 -Base Kernel Version: based on 2.6.28-rc7-mm. +Last Updated: 2008/12/15 +Base Kernel Version: based on 2.6.28-rc8-mm. Because VM is getting complex (one of reasons is memcg...), memcg's behavior is complex. This is a document for memcg's internal behavior. @@ -111,9 +111,40 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. (b) If the SwapCache has been mapped by processes, it has been charged already. - In case (a), we charge it. In case (b), we don't charge it. - (But racy state between (a) and (b) exists. We do check it.) - At charging, a charge recorded in swap_cgroup is moved to page_cgroup. + This swap-in is one of the most complicated work. In do_swap_page(), + following events occur when pte is unchanged. + + (1) the page (SwapCache) is looked up. + (2) lock_page() + (3) try_charge_swapin() + (4) reuse_swap_page() (may call delete_swap_cache()) + (5) commit_charge_swapin() + (6) swap_free(). + + Considering following situation for example. + + (A) The page has not been charged before (2) and reuse_swap_page() + doesn't call delete_from_swap_cache(). + (B) The page has not been charged before (2) and reuse_swap_page() + calls delete_from_swap_cache(). + (C) The page has been charged before (2) and reuse_swap_page() doesn't + call delete_from_swap_cache(). + (D) The page has been charged before (2) and reuse_swap_page() calls + delete_from_swap_cache(). + + memory.usage/memsw.usage changes to this page/swp_entry will be + Case (A) (B) (C) (D) + Event + Before (2) 0/ 1 0/ 1 1/ 1 1/ 1 + =========================================== + (3) +1/+1 +1/+1 +1/+1 +1/+1 + (4) - 0/ 0 - -1/ 0 + (5) 0/-1 0/ 0 -1/-1 0/ 0 + (6) - 0/-1 - 0/-1 + =========================================== + Result 1/ 1 1/ 1 1/ 1 1/ 1 + + In any cases, charges to this page should be 1/ 1. 4.2 Swap-out. At swap-out, typical state transition is below. diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a7ecf23150c..0ed61e27d52 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1169,10 +1169,11 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) /* * Now swap is on-memory. This means this page may be * counted both as mem and swap....double count. - * Fix it by uncharging from memsw. This SwapCache is stable - * because we're still under lock_page(). + * Fix it by uncharging from memsw. Basically, this SwapCache is stable + * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page() + * may call delete_from_swap_cache() before reach here. */ - if (do_swap_account) { + if (do_swap_account && PageSwapCache(page)) { swp_entry_t ent = {.val = page_private(page)}; struct mem_cgroup *memcg; memcg = swap_cgroup_record(ent, NULL); diff --git a/mm/memory.c b/mm/memory.c index e5bfbe6b594..e009ce87085 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2457,22 +2457,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, * while the page is counted on swap but not yet in mapcount i.e. * before page_add_anon_rmap() and swap_free(); try_to_free_swap() * must be called after the swap_free(), or it will never succeed. - * And mem_cgroup_commit_charge_swapin(), which uses the swp_entry - * in page->private, must be called before reuse_swap_page(), - * which may delete_from_swap_cache(). + * Because delete_from_swap_page() may be called by reuse_swap_page(), + * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry + * in page->private. In this case, a record in swap_cgroup is silently + * discarded at swap_free(). */ - mem_cgroup_commit_charge_swapin(page, ptr); inc_mm_counter(mm, anon_rss); pte = mk_pte(page, vma->vm_page_prot); if (write_access && reuse_swap_page(page)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); write_access = 0; } - flush_icache_page(vma, page); set_pte_at(mm, address, page_table, pte); page_add_anon_rmap(page, vma, address); + /* It's better to call commit-charge after rmap is established */ + mem_cgroup_commit_charge_swapin(page, ptr); swap_free(entry); if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) -- cgit v1.2.3-70-g09d2 From 999cd8a450f8f93701669a61cac4d3b19eca07e8 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 7 Jan 2009 18:08:36 -0800 Subject: cgroups: add a per-subsystem hierarchy_mutex These patches introduce new locking/refcount support for cgroups to reduce the need for subsystems to call cgroup_lock(). This will ultimately allow the atomicity of cgroup_rmdir() (which was removed recently) to be restored. These three patches give: 1/3 - introduce a per-subsystem hierarchy_mutex which a subsystem can use to prevent changes to its own cgroup tree 2/3 - use hierarchy_mutex in place of calling cgroup_lock() in the memory controller 3/3 - introduce a css_tryget() function similar to the one recently proposed by Kamezawa, but avoiding spurious refcount failures in the event of a race between a css_tryget() and an unsuccessful cgroup_rmdir() Future patches will likely involve: - using hierarchy mutex in place of cgroup_lock() in more subsystems where appropriate - restoring the atomicity of cgroup_rmdir() with respect to cgroup_create() This patch: Add a hierarchy_mutex to the cgroup_subsys object that protects changes to the hierarchy observed by that subsystem. It is taken by the cgroup subsystem (in addition to cgroup_mutex) for the following operations: - linking a cgroup into that subsystem's cgroup tree - unlinking a cgroup from that subsystem's cgroup tree - moving the subsystem to/from a hierarchy (including across the bind() callback) Thus if the subsystem holds its own hierarchy_mutex, it can safely traverse its own hierarchy. Signed-off-by: Paul Menage Tested-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/cgroups.txt | 2 +- include/linux/cgroup.h | 17 ++++++++++++++++- kernel/cgroup.c | 37 +++++++++++++++++++++++++++++++++++-- 3 files changed, 52 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 60287e9e9d2..e33ee74eee7 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -528,7 +528,7 @@ example in cpusets, no task may attach before 'cpus' and 'mems' are set up. void bind(struct cgroup_subsys *ss, struct cgroup *root) -(cgroup_mutex held by caller) +(cgroup_mutex and ss->hierarchy_mutex held by caller) Called when a cgroup subsystem is rebound to a different hierarchy and root cgroup. Currently this will only involve movement between diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 73d1c730c3c..ce1c1f34c30 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -340,8 +340,23 @@ struct cgroup_subsys { #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; - struct cgroupfs_root *root; + /* + * Protects sibling/children links of cgroups in this + * hierarchy, plus protects which hierarchy (or none) the + * subsystem is a part of (i.e. root/sibling). To avoid + * potential deadlocks, the following operations should not be + * undertaken while holding any hierarchy_mutex: + * + * - allocating memory + * - initiating hotplug events + */ + struct mutex hierarchy_mutex; + /* + * Link to parent, and list entry in parent's children. + * Protected by this->hierarchy_mutex and cgroup_lock() + */ + struct cgroupfs_root *root; struct list_head sibling; }; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 83ea4f524be..8b6379cdf63 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -722,23 +722,26 @@ static int rebind_subsystems(struct cgroupfs_root *root, BUG_ON(cgrp->subsys[i]); BUG_ON(!dummytop->subsys[i]); BUG_ON(dummytop->subsys[i]->cgroup != dummytop); + mutex_lock(&ss->hierarchy_mutex); cgrp->subsys[i] = dummytop->subsys[i]; cgrp->subsys[i]->cgroup = cgrp; list_move(&ss->sibling, &root->subsys_list); ss->root = root; if (ss->bind) ss->bind(ss, cgrp); - + mutex_unlock(&ss->hierarchy_mutex); } else if (bit & removed_bits) { /* We're removing this subsystem */ BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); BUG_ON(cgrp->subsys[i]->cgroup != cgrp); + mutex_lock(&ss->hierarchy_mutex); if (ss->bind) ss->bind(ss, dummytop); dummytop->subsys[i]->cgroup = dummytop; cgrp->subsys[i] = NULL; subsys[i]->root = &rootnode; list_move(&ss->sibling, &rootnode.subsys_list); + mutex_unlock(&ss->hierarchy_mutex); } else if (bit & final_bits) { /* Subsystem state should already exist */ BUG_ON(!cgrp->subsys[i]); @@ -2338,6 +2341,29 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, cgrp->subsys[ss->subsys_id] = css; } +static void cgroup_lock_hierarchy(struct cgroupfs_root *root) +{ + /* We need to take each hierarchy_mutex in a consistent order */ + int i; + + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (ss->root == root) + mutex_lock_nested(&ss->hierarchy_mutex, i); + } +} + +static void cgroup_unlock_hierarchy(struct cgroupfs_root *root) +{ + int i; + + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (ss->root == root) + mutex_unlock(&ss->hierarchy_mutex); + } +} + /* * cgroup_create - create a cgroup * @parent: cgroup that will be parent of the new cgroup @@ -2386,7 +2412,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, init_cgroup_css(css, ss, cgrp); } + cgroup_lock_hierarchy(root); list_add(&cgrp->sibling, &cgrp->parent->children); + cgroup_unlock_hierarchy(root); root->number_of_cgroups++; err = cgroup_create_dir(cgrp, dentry, mode); @@ -2504,8 +2532,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) if (!list_empty(&cgrp->release_list)) list_del(&cgrp->release_list); spin_unlock(&release_list_lock); - /* delete my sibling from parent->children */ + + cgroup_lock_hierarchy(cgrp->root); + /* delete this cgroup from parent->children */ list_del(&cgrp->sibling); + cgroup_unlock_hierarchy(cgrp->root); + spin_lock(&cgrp->dentry->d_lock); d = dget(cgrp->dentry); spin_unlock(&d->d_lock); @@ -2547,6 +2579,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) * need to invoke fork callbacks here. */ BUG_ON(!list_empty(&init_task.tasks)); + mutex_init(&ss->hierarchy_mutex); ss->active = 1; } -- cgit v1.2.3-70-g09d2 From a5fd9139f74c722a190b3bd69bbd611a8d91b388 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Jan 2009 18:08:58 -0800 Subject: w1: add 1-wire master driver for i.MX27 / i.MX31 This patch adds support for the 1-wire master interface for i.MX27 and i.MX31. Signed-off-by: Luotao Fu Signed-off-by: Sascha Hauer Signed-off-by: Evgeniy Polyakov Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/w1/masters/00-INDEX | 2 + Documentation/w1/masters/mxc-w1 | 11 ++ drivers/w1/masters/Kconfig | 6 ++ drivers/w1/masters/Makefile | 2 + drivers/w1/masters/mxc_w1.c | 211 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 232 insertions(+) create mode 100644 Documentation/w1/masters/mxc-w1 create mode 100644 drivers/w1/masters/mxc_w1.c (limited to 'Documentation') diff --git a/Documentation/w1/masters/00-INDEX b/Documentation/w1/masters/00-INDEX index 7b0ceaaad7a..d63fa024ac0 100644 --- a/Documentation/w1/masters/00-INDEX +++ b/Documentation/w1/masters/00-INDEX @@ -4,5 +4,7 @@ ds2482 - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses. ds2490 - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges. +mxc_w1 + - W1 master controller driver found on Freescale MX2/MX3 SoCs w1-gpio - GPIO 1-wire bus master driver. diff --git a/Documentation/w1/masters/mxc-w1 b/Documentation/w1/masters/mxc-w1 new file mode 100644 index 00000000000..97f6199a7f3 --- /dev/null +++ b/Documentation/w1/masters/mxc-w1 @@ -0,0 +1,11 @@ +Kernel driver mxc_w1 +==================== + +Supported chips: + * Freescale MX27, MX31 and probably other i.MX SoCs + Datasheets: + http://www.freescale.com/files/32bit/doc/data_sheet/MCIMX31.pdf?fpsp=1 + http://www.freescale.com/files/dsp/MCIMX27.pdf?fpsp=1 + +Author: Originally based on Freescale code, prepared for mainline by + Sascha Hauer diff --git a/drivers/w1/masters/Kconfig b/drivers/w1/masters/Kconfig index 90616822cd2..96d2f8e4c27 100644 --- a/drivers/w1/masters/Kconfig +++ b/drivers/w1/masters/Kconfig @@ -34,6 +34,12 @@ config W1_MASTER_DS2482 This driver can also be built as a module. If so, the module will be called ds2482. +config W1_MASTER_MXC + tristate "Freescale MXC 1-wire busmaster" + depends on W1 && ARCH_MXC + help + Say Y here to enable MXC 1-wire host + config W1_MASTER_DS1WM tristate "Maxim DS1WM 1-wire busmaster" depends on W1 && ARM && HAVE_CLK diff --git a/drivers/w1/masters/Makefile b/drivers/w1/masters/Makefile index bc4714a75f3..c5a3e96fcba 100644 --- a/drivers/w1/masters/Makefile +++ b/drivers/w1/masters/Makefile @@ -5,6 +5,8 @@ obj-$(CONFIG_W1_MASTER_MATROX) += matrox_w1.o obj-$(CONFIG_W1_MASTER_DS2490) += ds2490.o obj-$(CONFIG_W1_MASTER_DS2482) += ds2482.o +obj-$(CONFIG_W1_MASTER_MXC) += mxc_w1.o + obj-$(CONFIG_W1_MASTER_DS1WM) += ds1wm.o obj-$(CONFIG_W1_MASTER_GPIO) += w1-gpio.o obj-$(CONFIG_HDQ_MASTER_OMAP) += omap_hdq.o diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c new file mode 100644 index 00000000000..b9d74d0b353 --- /dev/null +++ b/drivers/w1/masters/mxc_w1.c @@ -0,0 +1,211 @@ +/* + * Copyright 2005-2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Luotao Fu, kernel@pengutronix.de + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "../w1.h" +#include "../w1_int.h" +#include "../w1_log.h" + +/* According to the mx27 Datasheet the reset procedure should take up to about + * 1350us. We set the timeout to 500*100us = 50ms for sure */ +#define MXC_W1_RESET_TIMEOUT 500 + +/* + * MXC W1 Register offsets + */ +#define MXC_W1_CONTROL 0x00 +#define MXC_W1_TIME_DIVIDER 0x02 +#define MXC_W1_RESET 0x04 +#define MXC_W1_COMMAND 0x06 +#define MXC_W1_TXRX 0x08 +#define MXC_W1_INTERRUPT 0x0A +#define MXC_W1_INTERRUPT_EN 0x0C + +struct mxc_w1_device { + void __iomem *regs; + unsigned int clkdiv; + struct clk *clk; + struct w1_bus_master bus_master; +}; + +/* + * this is the low level routine to + * reset the device on the One Wire interface + * on the hardware + */ +static u8 mxc_w1_ds2_reset_bus(void *data) +{ + u8 reg_val; + unsigned int timeout_cnt = 0; + struct mxc_w1_device *dev = data; + + __raw_writeb(0x80, (dev->regs + MXC_W1_CONTROL)); + + while (1) { + reg_val = __raw_readb(dev->regs + MXC_W1_CONTROL); + + if (((reg_val >> 7) & 0x1) == 0 || + timeout_cnt > MXC_W1_RESET_TIMEOUT) + break; + else + timeout_cnt++; + + udelay(100); + } + return (reg_val >> 7) & 0x1; +} + +/* + * this is the low level routine to read/write a bit on the One Wire + * interface on the hardware. It does write 0 if parameter bit is set + * to 0, otherwise a write 1/read. + */ +static u8 mxc_w1_ds2_touch_bit(void *data, u8 bit) +{ + struct mxc_w1_device *mdev = data; + void __iomem *ctrl_addr = mdev->regs + MXC_W1_CONTROL; + unsigned int timeout_cnt = 400; /* Takes max. 120us according to + * datasheet. + */ + + __raw_writeb((1 << (5 - bit)), ctrl_addr); + + while (timeout_cnt--) { + if (!((__raw_readb(ctrl_addr) >> (5 - bit)) & 0x1)) + break; + + udelay(1); + } + + return ((__raw_readb(ctrl_addr)) >> 3) & 0x1; +} + +static int __init mxc_w1_probe(struct platform_device *pdev) +{ + struct mxc_w1_device *mdev; + struct resource *res; + int err = 0; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + mdev = kzalloc(sizeof(struct mxc_w1_device), GFP_KERNEL); + if (!mdev) + return -ENOMEM; + + mdev->clk = clk_get(&pdev->dev, "owire_clk"); + if (!mdev->clk) { + err = -ENODEV; + goto failed_clk; + } + + mdev->clkdiv = (clk_get_rate(mdev->clk) / 1000000) - 1; + + res = request_mem_region(res->start, resource_size(res), + "mxc_w1"); + if (!res) { + err = -EBUSY; + goto failed_req; + } + + mdev->regs = ioremap(res->start, resource_size(res)); + if (!mdev->regs) { + printk(KERN_ERR "Cannot map frame buffer registers\n"); + goto failed_ioremap; + } + + clk_enable(mdev->clk); + __raw_writeb(mdev->clkdiv, mdev->regs + MXC_W1_TIME_DIVIDER); + + mdev->bus_master.data = mdev; + mdev->bus_master.reset_bus = mxc_w1_ds2_reset_bus; + mdev->bus_master.touch_bit = mxc_w1_ds2_touch_bit; + + err = w1_add_master_device(&mdev->bus_master); + + if (err) + goto failed_add; + + platform_set_drvdata(pdev, mdev); + return 0; + +failed_add: + iounmap(mdev->regs); +failed_ioremap: + release_mem_region(res->start, resource_size(res)); +failed_req: + clk_put(mdev->clk); +failed_clk: + kfree(mdev); + return err; +} + +/* + * disassociate the w1 device from the driver + */ +static int mxc_w1_remove(struct platform_device *pdev) +{ + struct mxc_w1_device *mdev = platform_get_drvdata(pdev); + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + w1_remove_master_device(&mdev->bus_master); + + iounmap(mdev->regs); + release_mem_region(res->start, resource_size(res)); + clk_disable(mdev->clk); + clk_put(mdev->clk); + + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver mxc_w1_driver = { + .driver = { + .name = "mxc_w1", + }, + .probe = mxc_w1_probe, + .remove = mxc_w1_remove, +}; + +static int __init mxc_w1_init(void) +{ + return platform_driver_register(&mxc_w1_driver); +} + +static void mxc_w1_exit(void) +{ + platform_driver_unregister(&mxc_w1_driver); +} + +module_init(mxc_w1_init); +module_exit(mxc_w1_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Freescale Semiconductors Inc"); +MODULE_DESCRIPTION("Driver for One-Wire on MXC"); -- cgit v1.2.3-70-g09d2 From e4e056aa3518197830c884b85268799b1868e8e3 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Wed, 7 Jan 2009 18:09:02 -0800 Subject: w1: documentation update Signed-off-by: Evgeniy Polyakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/w1/w1.netlink | 129 +++++++++++++++++++++++++++++++------------- 1 file changed, 91 insertions(+), 38 deletions(-) (limited to 'Documentation') diff --git a/Documentation/w1/w1.netlink b/Documentation/w1/w1.netlink index 3640c7c87d4..d596d406e0a 100644 --- a/Documentation/w1/w1.netlink +++ b/Documentation/w1/w1.netlink @@ -5,69 +5,122 @@ Message types. ============= There are three types of messages between w1 core and userspace: -1. Events. They are generated each time new master or slave device found - either due to automatic or requested search. -2. Userspace commands. Includes read/write and search/alarm search comamnds. +1. Events. They are generated each time new master or slave device + found either due to automatic or requested search. +2. Userspace commands. 3. Replies to userspace commands. Protocol. ======== -[struct cn_msg] - connector header. It's length field is equal to size of the attached data. +[struct cn_msg] - connector header. + Its length field is equal to size of the attached data [struct w1_netlink_msg] - w1 netlink header. __u8 type - message type. - W1_SLAVE_ADD/W1_SLAVE_REMOVE - slave add/remove events. - W1_MASTER_ADD/W1_MASTER_REMOVE - master add/remove events. - W1_MASTER_CMD - userspace command for bus master device (search/alarm search). - W1_SLAVE_CMD - userspace command for slave device (read/write/ search/alarm search - for bus master device where given slave device found). + W1_LIST_MASTERS + list current bus masters + W1_SLAVE_ADD/W1_SLAVE_REMOVE + slave add/remove events + W1_MASTER_ADD/W1_MASTER_REMOVE + master add/remove events + W1_MASTER_CMD + userspace command for bus master + device (search/alarm search) + W1_SLAVE_CMD + userspace command for slave device + (read/write/touch) __u8 res - reserved - __u16 len - size of attached to this header data. + __u16 len - size of data attached to this header data union { - __u8 id; - slave unique device id + __u8 id[8]; - slave unique device id struct w1_mst { - __u32 id; - master's id. + __u32 id; - master's id __u32 res; - reserved } mst; } id; -[strucrt w1_netlink_cmd] - command for gived master or slave device. +[struct w1_netlink_cmd] - command for given master or slave device. __u8 cmd - command opcode. - W1_CMD_READ - read command. - W1_CMD_WRITE - write command. - W1_CMD_SEARCH - search command. - W1_CMD_ALARM_SEARCH - alarm search command. + W1_CMD_READ - read command + W1_CMD_WRITE - write command + W1_CMD_TOUCH - touch command + (write and sample data back to userspace) + W1_CMD_SEARCH - search command + W1_CMD_ALARM_SEARCH - alarm search command __u8 res - reserved - __u16 len - length of data for this command. - For read command data must be allocated like for write command. - __u8 data[0] - data for this command. + __u16 len - length of data for this command + For read command data must be allocated like for write command + __u8 data[0] - data for this command -Each connector message can include one or more w1_netlink_msg with zero of more attached w1_netlink_cmd messages. +Each connector message can include one or more w1_netlink_msg with +zero or more attached w1_netlink_cmd messages. -For event messages there are no w1_netlink_cmd embedded structures, only connector header -and w1_netlink_msg strucutre with "len" field being zero and filled type (one of event types) -and id - either 8 bytes of slave unique id in host order, or master's id, which is assigned -to bus master device when it is added to w1 core. +For event messages there are no w1_netlink_cmd embedded structures, +only connector header and w1_netlink_msg strucutre with "len" field +being zero and filled type (one of event types) and id: +either 8 bytes of slave unique id in host order, +or master's id, which is assigned to bus master device +when it is added to w1 core. + +Currently replies to userspace commands are only generated for read +command request. One reply is generated exactly for one w1_netlink_cmd +read request. Replies are not combined when sent - i.e. typical reply +messages looks like the following: -Currently replies to userspace commands are only generated for read command request. -One reply is generated exactly for one w1_netlink_cmd read request. -Replies are not combined when sent - i.e. typical reply messages looks like the following: [cn_msg][w1_netlink_msg][w1_netlink_cmd] -cn_msg.len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd) + cmd->len; +cn_msg.len = sizeof(struct w1_netlink_msg) + + sizeof(struct w1_netlink_cmd) + + cmd->len; w1_netlink_msg.len = sizeof(struct w1_netlink_cmd) + cmd->len; w1_netlink_cmd.len = cmd->len; +Replies to W1_LIST_MASTERS should send a message back to the userspace +which will contain list of all registered master ids in the following +format: + + cn_msg (CN_W1_IDX.CN_W1_VAL as id, len is equal to sizeof(struct + w1_netlink_msg) plus number of masters multipled by 4) + w1_netlink_msg (type: W1_LIST_MASTERS, len is equal to + number of masters multiplied by 4 (u32 size)) + id0 ... idN + + Each message is at most 4k in size, so if number of master devices + exceeds this, it will be split into several messages, + cn.seq will be increased for each one. + +W1 search and alarm search commands. +request: +[cn_msg] + [w1_netlink_msg type = W1_MASTER_CMD + id is equal to the bus master id to use for searching] + [w1_netlink_cmd cmd = W1_CMD_SEARCH or W1_CMD_ALARM_SEARCH] + +reply: + [cn_msg, ack = 1 and increasing, 0 means the last message, + seq is equal to the request seq] + [w1_netlink_msg type = W1_MASTER_CMD] + [w1_netlink_cmd cmd = W1_CMD_SEARCH or W1_CMD_ALARM_SEARCH + len is equal to number of IDs multiplied by 8] + [64bit-id0 ... 64bit-idN] +Length in each header corresponds to the size of the data behind it, so +w1_netlink_cmd->len = N * 8; where N is number of IDs in this message. + Can be zero. +w1_netlink_msg->len = sizeof(struct w1_netlink_cmd) + N * 8; +cn_msg->len = sizeof(struct w1_netlink_msg) + + sizeof(struct w1_netlink_cmd) + + N*8; Operation steps in w1 core when new command is received. ======================================================= -When new message (w1_netlink_msg) is received w1 core detects if it is master of slave request, -according to w1_netlink_msg.type field. +When new message (w1_netlink_msg) is received w1 core detects if it is +master or slave request, according to w1_netlink_msg.type field. Then master or slave device is searched for. -When found, master device (requested or those one on where slave device is found) is locked. -If slave command is requested, then reset/select procedure is started to select given device. +When found, master device (requested or those one on where slave device +is found) is locked. If slave command is requested, then reset/select +procedure is started to select given device. Then all requested in w1_netlink_msg operations are performed one by one. If command requires reply (like read command) it is sent on command completion. @@ -82,8 +135,8 @@ Connector [1] specific documentation. Each connector message includes two u32 fields as "address". w1 uses CN_W1_IDX and CN_W1_VAL defined in include/linux/connector.h header. Each message also includes sequence and acknowledge numbers. -Sequence number for event messages is appropriate bus master sequence number increased with -each event message sent "through" this master. +Sequence number for event messages is appropriate bus master sequence number +increased with each event message sent "through" this master. Sequence number for userspace requests is set by userspace application. Sequence number for reply is the same as was in request, and acknowledge number is set to seq+1. @@ -93,6 +146,6 @@ Additional documantion, source code examples. ============================================ 1. Documentation/connector -2. http://tservice.net.ru/~s0mbre/archive/w1 -This archive includes userspace application w1d.c which -uses read/write/search commands for all master/slave devices found on the bus. +2. http://www.ioremap.net/archive/w1 +This archive includes userspace application w1d.c which uses +read/write/search commands for all master/slave devices found on the bus. -- cgit v1.2.3-70-g09d2 From f89735c4e281e8642907b38640c076ae5048f3a6 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Wed, 7 Jan 2009 18:09:04 -0800 Subject: w1: added w1 reset command Command which allows to reset the bus. Signed-off-by: Evgeniy Polyakov Cc: Paul Alfille Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/w1/w1.netlink | 6 ++++++ drivers/w1/w1_netlink.c | 3 +++ drivers/w1/w1_netlink.h | 1 + 3 files changed, 10 insertions(+) (limited to 'Documentation') diff --git a/Documentation/w1/w1.netlink b/Documentation/w1/w1.netlink index d596d406e0a..2756681b6ef 100644 --- a/Documentation/w1/w1.netlink +++ b/Documentation/w1/w1.netlink @@ -112,6 +112,12 @@ cn_msg->len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd) + N*8; +W1 reset command. +[cn_msg] + [w1_netlink_msg type = W1_MASTER_CMD + id is equal to the bus master id to use for searching] + [w1_netlink_cmd cmd = W1_CMD_RESET] + Operation steps in w1 core when new command is received. ======================================================= diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c index a94336be765..f978c750400 100644 --- a/drivers/w1/w1_netlink.c +++ b/drivers/w1/w1_netlink.c @@ -197,6 +197,9 @@ static int w1_process_command_master(struct w1_master *dev, struct cn_msg *req_m case W1_CMD_TOUCH: err = w1_process_command_io(dev, msg, hdr, cmd); break; + case W1_CMD_RESET: + err = w1_reset_bus(dev); + break; default: cmd->res = EINVAL; cn_netlink_send(msg, 0, GFP_KERNEL); diff --git a/drivers/w1/w1_netlink.h b/drivers/w1/w1_netlink.h index 01d86a71cf3..68a4ff46cb9 100644 --- a/drivers/w1/w1_netlink.h +++ b/drivers/w1/w1_netlink.h @@ -58,6 +58,7 @@ enum w1_commands { W1_CMD_SEARCH, W1_CMD_ALARM_SEARCH, W1_CMD_TOUCH, + W1_CMD_RESET, W1_CMD_MAX, }; -- cgit v1.2.3-70-g09d2 From 4037014e3fb71e998189374e19ca141c59d15323 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Wed, 7 Jan 2009 18:09:05 -0800 Subject: w1: send status messages after command processing Send completion status of the commands to the userspace. Message and protocol are described in the documentation. Signed-off-by: Evgeniy Polyakov Cc: Paul Alfille Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/w1/w1.netlink | 29 +++++++++++++++++++++++++ drivers/w1/w1_netlink.c | 53 +++++++++++++++++++++++++++++++++++++++------ drivers/w1/w1_netlink.h | 2 +- 3 files changed, 76 insertions(+), 8 deletions(-) (limited to 'Documentation') diff --git a/Documentation/w1/w1.netlink b/Documentation/w1/w1.netlink index 2756681b6ef..804445f745e 100644 --- a/Documentation/w1/w1.netlink +++ b/Documentation/w1/w1.netlink @@ -118,6 +118,35 @@ W1 reset command. id is equal to the bus master id to use for searching] [w1_netlink_cmd cmd = W1_CMD_RESET] + +Command status replies. +====================== + +Each command (either root, master or slave with or without w1_netlink_cmd +structure) will be 'acked' by the w1 core. Format of the reply is the same +as request message except that length parameters do not account for data +requested by the user, i.e. read/write/touch IO requests will not contain +data, so w1_netlink_cmd.len will be 0, w1_netlink_msg.len will be size +of the w1_netlink_cmd structure and cn_msg.len will be equal to the sum +of the sizeof(struct w1_netlink_msg) and sizeof(struct w1_netlink_cmd). +If reply is generated for master or root command (which do not have +w1_netlink_cmd attached), reply will contain only cn_msg and w1_netlink_msg +structires. + +w1_netlink_msg.status field will carry positive error value +(EINVAL for example) or zero in case of success. + +All other fields in every structure will mirror the same parameters in the +request message (except lengths as described above). + +Status reply is generated for every w1_netlink_cmd embedded in the +w1_netlink_msg, if there are no w1_netlink_cmd structures, +reply will be generated for the w1_netlink_msg. + +All w1_netlink_cmd command structures are handled in every w1_netlink_msg, +even if there were errors, only length mismatch interrupts message processing. + + Operation steps in w1 core when new command is received. ======================================================= diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c index f978c750400..fdf72851c57 100644 --- a/drivers/w1/w1_netlink.c +++ b/drivers/w1/w1_netlink.c @@ -152,7 +152,7 @@ static int w1_process_command_io(struct w1_master *dev, struct cn_msg *msg, w1_write_block(dev, cmd->data, cmd->len); break; default: - err = -1; + err = -EINVAL; break; } @@ -195,14 +195,13 @@ static int w1_process_command_master(struct w1_master *dev, struct cn_msg *req_m case W1_CMD_READ: case W1_CMD_WRITE: case W1_CMD_TOUCH: - err = w1_process_command_io(dev, msg, hdr, cmd); + err = w1_process_command_io(dev, req_msg, req_hdr, req_cmd); break; case W1_CMD_RESET: err = w1_reset_bus(dev); break; default: - cmd->res = EINVAL; - cn_netlink_send(msg, 0, GFP_KERNEL); + err = -EINVAL; break; } @@ -246,7 +245,7 @@ static int w1_process_command_root(struct cn_msg *msg, struct w1_netlink_msg *mc w = (struct w1_netlink_msg *)(cn + 1); w->type = W1_LIST_MASTERS; - w->reserved = 0; + w->status = 0; w->len = 0; id = (u32 *)(w + 1); @@ -273,6 +272,40 @@ static int w1_process_command_root(struct cn_msg *msg, struct w1_netlink_msg *mc return 0; } +static int w1_netlink_send_error(struct cn_msg *rcmsg, struct w1_netlink_msg *rmsg, + struct w1_netlink_cmd *rcmd, int error) +{ + struct cn_msg *cmsg; + struct w1_netlink_msg *msg; + struct w1_netlink_cmd *cmd; + + cmsg = kzalloc(sizeof(*msg) + sizeof(*cmd) + sizeof(*cmsg), GFP_KERNEL); + if (!cmsg) + return -ENOMEM; + + msg = (struct w1_netlink_msg *)(cmsg + 1); + cmd = (struct w1_netlink_cmd *)(msg + 1); + + memcpy(cmsg, rcmsg, sizeof(*cmsg)); + cmsg->len = sizeof(*msg); + + memcpy(msg, rmsg, sizeof(*msg)); + msg->len = 0; + msg->status = (short)-error; + + if (rcmd) { + memcpy(cmd, rcmd, sizeof(*cmd)); + cmd->len = 0; + msg->len += sizeof(*cmd); + cmsg->len += sizeof(*cmd); + } + + error = cn_netlink_send(cmsg, 0, GFP_KERNEL); + kfree(cmsg); + + return error; +} + static void w1_cn_callback(void *data) { struct cn_msg *msg = data; @@ -289,6 +322,7 @@ static void w1_cn_callback(void *data) dev = NULL; sl = NULL; + cmd = NULL; memcpy(&id, m->id.id, sizeof(id)); #if 0 @@ -336,9 +370,12 @@ static void w1_cn_callback(void *data) } if (sl) - w1_process_command_slave(sl, msg, m, cmd); + err = w1_process_command_slave(sl, msg, m, cmd); else - w1_process_command_master(dev, msg, m, cmd); + err = w1_process_command_master(dev, msg, m, cmd); + + w1_netlink_send_error(msg, m, cmd, err); + err = 0; cmd_data += cmd->len + sizeof(struct w1_netlink_cmd); mlen -= cmd->len + sizeof(struct w1_netlink_cmd); @@ -349,6 +386,8 @@ out_up: atomic_dec(&sl->refcnt); mutex_unlock(&dev->mutex); out_cont: + if (!cmd || err) + w1_netlink_send_error(msg, m, cmd, err); msg->len -= sizeof(struct w1_netlink_msg) + m->len; m = (struct w1_netlink_msg *)(((u8 *)m) + sizeof(struct w1_netlink_msg) + m->len); diff --git a/drivers/w1/w1_netlink.h b/drivers/w1/w1_netlink.h index 68a4ff46cb9..27e950f935b 100644 --- a/drivers/w1/w1_netlink.h +++ b/drivers/w1/w1_netlink.h @@ -40,7 +40,7 @@ enum w1_netlink_message_types { struct w1_netlink_msg { __u8 type; - __u8 reserved; + __u8 status; __u16 len; union { __u8 id[8]; -- cgit v1.2.3-70-g09d2 From 73ac36ea14fd18ea3dc057e41b16ff31a3c0bd5a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 7 Jan 2009 18:09:16 -0800 Subject: fix similar typos to successfull When I review ocfs2 code, find there are 2 typos to "successfull". After doing grep "successfull " in kernel tree, 22 typos found totally -- great minds always think alike :) This patch fixes all the similar typos. Thanks for Randy's ack and comments. Signed-off-by: Coly Li Acked-by: Randy Dunlap Acked-by: Roland Dreier Cc: Jeremy Kerr Cc: Jeff Garzik Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Theodore Ts'o Cc: Mark Fasheh Cc: Vlad Yasevich Cc: Sridhar Samudrala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/hwmon/abituguru-datasheet | 4 ++-- Documentation/scsi/scsi_fc_transport.txt | 4 ++-- arch/powerpc/platforms/cell/spufs/spufs.h | 2 +- drivers/infiniband/hw/nes/nes_cm.c | 2 +- drivers/isdn/hardware/eicon/debuglib.h | 2 +- drivers/isdn/hardware/eicon/os_4bri.c | 2 +- drivers/isdn/hardware/eicon/os_bri.c | 2 +- drivers/isdn/hardware/eicon/os_pri.c | 2 +- drivers/mtd/ubi/kapi.c | 2 +- drivers/net/wireless/ath5k/dma.c | 2 +- drivers/net/wireless/zd1211rw/zd_mac.c | 2 +- drivers/s390/block/dasd_3990_erp.c | 2 +- drivers/s390/block/dasd_int.h | 2 +- drivers/s390/char/tape_3590.c | 2 +- drivers/s390/cio/cio.c | 2 +- drivers/s390/cio/qdio_main.c | 2 +- fs/ext4/extents.c | 2 +- fs/ocfs2/dlmglue.c | 4 ++-- net/sctp/auth.c | 2 +- 19 files changed, 22 insertions(+), 22 deletions(-) (limited to 'Documentation') diff --git a/Documentation/hwmon/abituguru-datasheet b/Documentation/hwmon/abituguru-datasheet index 4d184f2db0e..d9251efdcec 100644 --- a/Documentation/hwmon/abituguru-datasheet +++ b/Documentation/hwmon/abituguru-datasheet @@ -121,7 +121,7 @@ Once all bytes have been read data will hold 0x09, but there is no reason to test for this. Notice that the number of bytes is bank address dependent see above and below. -After completing a successfull read it is advised to put the uGuru back in +After completing a successful read it is advised to put the uGuru back in ready mode, so that it is ready for the next read / write cycle. This way if your program / driver is unloaded and later loaded again the detection algorithm described above will still work. @@ -141,7 +141,7 @@ don't ask why this is the way it is. Once DATA holds 0x01 read CMD it should hold 0xAC now. -After completing a successfull write it is advised to put the uGuru back in +After completing a successful write it is advised to put the uGuru back in ready mode, so that it is ready for the next read / write cycle. This way if your program / driver is unloaded and later loaded again the detection algorithm described above will still work. diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt index 38d324d62b2..e5b071d4661 100644 --- a/Documentation/scsi/scsi_fc_transport.txt +++ b/Documentation/scsi/scsi_fc_transport.txt @@ -191,7 +191,7 @@ Vport States: This is equivalent to a driver "attach" on an adapter, which is independent of the adapter's link state. - Instantiation of the vport on the FC link via ELS traffic, etc. - This is equivalent to a "link up" and successfull link initialization. + This is equivalent to a "link up" and successful link initialization. Further information can be found in the interfaces section below for Vport Creation. @@ -320,7 +320,7 @@ Vport Creation: This is equivalent to a driver "attach" on an adapter, which is independent of the adapter's link state. - Instantiation of the vport on the FC link via ELS traffic, etc. - This is equivalent to a "link up" and successfull link initialization. + This is equivalent to a "link up" and successful link initialization. The LLDD's vport_create() function will not synchronously wait for both parts to be fully completed before returning. It must validate that the diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 15c62d3ca12..3bf908e2873 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -314,7 +314,7 @@ extern char *isolated_loader; * we need to call spu_release(ctx) before sleeping, and * then spu_acquire(ctx) when awoken. * - * Returns with state_mutex re-acquired when successfull or + * Returns with state_mutex re-acquired when successful or * with -ERESTARTSYS and the state_mutex dropped when interrupted. */ diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index a812db24347..6ba57e91d7a 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2705,7 +2705,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) sizeof(struct ietf_mpa_frame)); - /* notify OF layer that accept event was successfull */ + /* notify OF layer that accept event was successful */ cm_id->add_ref(cm_id); cm_event.event = IW_CM_EVENT_ESTABLISHED; diff --git a/drivers/isdn/hardware/eicon/debuglib.h b/drivers/isdn/hardware/eicon/debuglib.h index 016410cf227..8ea587783e1 100644 --- a/drivers/isdn/hardware/eicon/debuglib.h +++ b/drivers/isdn/hardware/eicon/debuglib.h @@ -235,7 +235,7 @@ typedef void ( * DbgOld) (unsigned short, char *, va_list) ; typedef void ( * DbgEv) (unsigned short, unsigned long, va_list) ; typedef void ( * DbgIrq) (unsigned short, int, char *, va_list) ; typedef struct _DbgHandle_ -{ char Registered ; /* driver successfull registered */ +{ char Registered ; /* driver successfully registered */ #define DBG_HANDLE_REG_NEW 0x01 /* this (new) structure */ #define DBG_HANDLE_REG_OLD 0x7f /* old structure (see below) */ char Version; /* version of this structure */ diff --git a/drivers/isdn/hardware/eicon/os_4bri.c b/drivers/isdn/hardware/eicon/os_4bri.c index 7b4ec3f60db..c964b8d91ad 100644 --- a/drivers/isdn/hardware/eicon/os_4bri.c +++ b/drivers/isdn/hardware/eicon/os_4bri.c @@ -997,7 +997,7 @@ diva_4bri_start_adapter(PISDN_ADAPTER IoAdapter, diva_xdi_display_adapter_features(IoAdapter->ANum); for (i = 0; i < IoAdapter->tasks; i++) { - DBG_LOG(("A(%d) %s adapter successfull started", + DBG_LOG(("A(%d) %s adapter successfully started", IoAdapter->QuadroList->QuadroAdapter[i]->ANum, (IoAdapter->tasks == 1) ? "BRI 2.0" : "4BRI")) diva_xdi_didd_register_adapter(IoAdapter->QuadroList->QuadroAdapter[i]->ANum); diff --git a/drivers/isdn/hardware/eicon/os_bri.c b/drivers/isdn/hardware/eicon/os_bri.c index f31bba5b16f..08f01993f46 100644 --- a/drivers/isdn/hardware/eicon/os_bri.c +++ b/drivers/isdn/hardware/eicon/os_bri.c @@ -736,7 +736,7 @@ diva_bri_start_adapter(PISDN_ADAPTER IoAdapter, IoAdapter->Properties.Features = (word) features; diva_xdi_display_adapter_features(IoAdapter->ANum); - DBG_LOG(("A(%d) BRI adapter successfull started", IoAdapter->ANum)) + DBG_LOG(("A(%d) BRI adapter successfully started", IoAdapter->ANum)) /* Register with DIDD */ diff --git a/drivers/isdn/hardware/eicon/os_pri.c b/drivers/isdn/hardware/eicon/os_pri.c index 903356547b7..5d65405c75f 100644 --- a/drivers/isdn/hardware/eicon/os_pri.c +++ b/drivers/isdn/hardware/eicon/os_pri.c @@ -513,7 +513,7 @@ diva_pri_start_adapter(PISDN_ADAPTER IoAdapter, diva_xdi_display_adapter_features(IoAdapter->ANum); - DBG_LOG(("A(%d) PRI adapter successfull started", IoAdapter->ANum)) + DBG_LOG(("A(%d) PRI adapter successfully started", IoAdapter->ANum)) /* Register with DIDD */ diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index 5d9bcf109c1..4abbe573fa4 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -564,7 +564,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_unmap); * @dtype: expected data type * * This function maps an un-mapped logical eraseblock @lnum to a physical - * eraseblock. This means, that after a successfull invocation of this + * eraseblock. This means, that after a successful invocation of this * function the logical eraseblock @lnum will be empty (contain only %0xFF * bytes) and be mapped to a physical eraseblock, even if an unclean reboot * happens. diff --git a/drivers/net/wireless/ath5k/dma.c b/drivers/net/wireless/ath5k/dma.c index 7e2b1a67e5d..b65b4feb2d2 100644 --- a/drivers/net/wireless/ath5k/dma.c +++ b/drivers/net/wireless/ath5k/dma.c @@ -594,7 +594,7 @@ int ath5k_hw_get_isr(struct ath5k_hw *ah, enum ath5k_int *interrupt_mask) * XXX: BMISS interrupts may occur after association. * I found this on 5210 code but it needs testing. If this is * true we should disable them before assoc and re-enable them - * after a successfull assoc + some jiffies. + * after a successful assoc + some jiffies. interrupt_mask &= ~AR5K_INT_BMISS; */ } diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 9caa96a1358..a611ad85798 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -287,7 +287,7 @@ static void zd_op_stop(struct ieee80211_hw *hw) * @skb - a sk-buffer * @flags: extra flags to set in the TX status info * @ackssi: ACK signal strength - * @success - True for successfull transmission of the frame + * @success - True for successful transmission of the frame * * This information calls ieee80211_tx_status_irqsafe() if required by the * control information. It copies the control information into the status diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index b8f9c00633f..d82aad5224f 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -2621,7 +2621,7 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr) } } - /* double-check if current erp/cqr was successfull */ + /* double-check if current erp/cqr was successful */ if ((cqr->irb.scsw.cmd.cstat == 0x00) && (cqr->irb.scsw.cmd.dstat == (DEV_STAT_CHN_END | DEV_STAT_DEV_END))) { diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 05a14536c36..4a39084d9c9 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -199,7 +199,7 @@ struct dasd_ccw_req { #define DASD_CQR_ERROR 0x82 /* request is completed with error */ #define DASD_CQR_CLEAR_PENDING 0x83 /* request is clear pending */ #define DASD_CQR_CLEARED 0x84 /* request was cleared */ -#define DASD_CQR_SUCCESS 0x85 /* request was successfull */ +#define DASD_CQR_SUCCESS 0x85 /* request was successful */ /* per dasd_ccw_req flags */ diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index 4005c44a404..71605a179d6 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -801,7 +801,7 @@ tape_3590_done(struct tape_device *device, struct tape_request *request) static inline int tape_3590_erp_succeded(struct tape_device *device, struct tape_request *request) { - DBF_EVENT(3, "Error Recovery successfull for %s\n", + DBF_EVENT(3, "Error Recovery successful for %s\n", tape_op_verbose[request->op]); return tape_3590_done(device, request); } diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 06b71823f39..659f8a79165 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -379,7 +379,7 @@ int cio_commit_config(struct subchannel *sch) if (ccode < 0) /* -EIO if msch gets a program check. */ return ccode; switch (ccode) { - case 0: /* successfull */ + case 0: /* successful */ if (stsch(sch->schid, &schib) || !css_sch_is_valid(&schib)) return -ENODEV; diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 744f928a59e..10cb0f8726e 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -114,7 +114,7 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq) * @count: count of buffers to examine * @auto_ack: automatically acknowledge buffers * - * Returns the number of successfull extracted equal buffer states. + * Returns the number of successfully extracted equal buffer states. * Stops processing if a state is different from the last buffers state. */ static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ea2ce3c0ae6..3f54db31cdc 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2536,7 +2536,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, */ newdepth = ext_depth(inode); /* - * update the extent length after successfull insert of the + * update the extent length after successful insert of the * split extent */ orig_ex.ee_len = cpu_to_le16(ee_len - diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index f731ab49179..b0c4cadd4c4 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1324,7 +1324,7 @@ again: goto out; } - mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n", + mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", lockres->l_name); /* At this point we've gone inside the dlm and need to @@ -2951,7 +2951,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, ocfs2_dlm_dump_lksb(&lockres->l_lksb); BUG(); } - mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n", + mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", lockres->l_name); ocfs2_wait_on_busy_lock(lockres); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 20c576f530f..56935bbc149 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -489,7 +489,7 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) return 0; out_err: - /* Clean up any successfull allocations */ + /* Clean up any successful allocations */ sctp_auth_destroy_hmacs(ep->auth_hmacs); return -ENOMEM; } -- cgit v1.2.3-70-g09d2 From 7ad68e2f970fd84d15ad67ce3216aed05f944a9c Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 11 Nov 2008 17:39:02 -0800 Subject: regulator: sysfs attribute reduction (v2) Clean up the sysfs interface to regulators by only exposing the attributes that can be properly displayed. For example: when a particular regulator method is needed to display the value, only create that attribute when that method exists. This cleaned-up interface is much more comprehensible. Most regulators only support a subset of the possible methods, so often more than half the attributes would be meaningless. Many "not defined" values are no longer necessary. (But handling of out-of-range values still looks a bit iffy.) Documentation is updated to reflect that few of the attributes are *always* present, and to briefly explain why a regulator may not have a given attribute. This adds object code, about a dozen bytes more than was removed by the preceding patch, but saves a bunch of per-regulator data associated with the now-removed attributes. So there's a net reduction in memory footprint. Signed-off-by: David Brownell Signed-off-by: Liam Girdwood --- Documentation/ABI/testing/sysfs-class-regulator | 136 ++++++++-------- drivers/regulator/core.c | 196 ++++++++++++++++++------ 2 files changed, 208 insertions(+), 124 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-class-regulator b/Documentation/ABI/testing/sysfs-class-regulator index 3731f6f29bc..873ef1fc156 100644 --- a/Documentation/ABI/testing/sysfs-class-regulator +++ b/Documentation/ABI/testing/sysfs-class-regulator @@ -3,8 +3,9 @@ Date: April 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called - state. This holds the regulator output state. + Some regulator directories will contain a field called + state. This reports the regulator enable status, for + regulators which can report that value. This will be one of the following strings: @@ -18,7 +19,8 @@ Description: 'disabled' means the regulator output is OFF and is not supplying power to the system.. - 'unknown' means software cannot determine the state. + 'unknown' means software cannot determine the state, or + the reported state is invalid. NOTE: this field can be used in conjunction with microvolts and microamps to determine regulator output levels. @@ -53,9 +55,10 @@ Date: April 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called microvolts. This holds the regulator output voltage setting - measured in microvolts (i.e. E-6 Volts). + measured in microvolts (i.e. E-6 Volts), for regulators + which can report that voltage. NOTE: This value should not be used to determine the regulator output voltage level as this value is the same regardless of @@ -67,9 +70,10 @@ Date: April 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called microamps. This holds the regulator output current limit - setting measured in microamps (i.e. E-6 Amps). + setting measured in microamps (i.e. E-6 Amps), for regulators + which can report that current. NOTE: This value should not be used to determine the regulator output current level as this value is the same regardless of @@ -81,8 +85,9 @@ Date: April 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called - opmode. This holds the regulator operating mode setting. + Some regulator directories will contain a field called + opmode. This holds the current regulator operating mode, + for regulators which can report it. The opmode value can be one of the following strings: @@ -92,7 +97,7 @@ Description: 'standby' 'unknown' - The modes are described in include/linux/regulator/regulator.h + The modes are described in include/linux/regulator/consumer.h NOTE: This value should not be used to determine the regulator output operating mode as this value is the same regardless of @@ -104,9 +109,10 @@ Date: April 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called min_microvolts. This holds the minimum safe working regulator - output voltage setting for this domain measured in microvolts. + output voltage setting for this domain measured in microvolts, + for regulators which support voltage constraints. NOTE: this will return the string 'constraint not defined' if the power domain has no min microvolts constraint defined by @@ -118,9 +124,10 @@ Date: April 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called max_microvolts. This holds the maximum safe working regulator - output voltage setting for this domain measured in microvolts. + output voltage setting for this domain measured in microvolts, + for regulators which support voltage constraints. NOTE: this will return the string 'constraint not defined' if the power domain has no max microvolts constraint defined by @@ -132,10 +139,10 @@ Date: April 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called min_microamps. This holds the minimum safe working regulator output current limit setting for this domain measured in - microamps. + microamps, for regulators which support current constraints. NOTE: this will return the string 'constraint not defined' if the power domain has no min microamps constraint defined by @@ -147,10 +154,10 @@ Date: April 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called max_microamps. This holds the maximum safe working regulator output current limit setting for this domain measured in - microamps. + microamps, for regulators which support current constraints. NOTE: this will return the string 'constraint not defined' if the power domain has no max microamps constraint defined by @@ -185,7 +192,7 @@ Date: April 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called requested_microamps. This holds the total requested load current in microamps for this regulator from all its consumer devices. @@ -204,125 +211,102 @@ Date: May 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called suspend_mem_microvolts. This holds the regulator output voltage setting for this domain measured in microvolts when - the system is suspended to memory. - - NOTE: this will return the string 'not defined' if - the power domain has no suspend to memory voltage defined by - platform code. + the system is suspended to memory, for voltage regulators + implementing suspend voltage configuration constraints. What: /sys/class/regulator/.../suspend_disk_microvolts Date: May 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called suspend_disk_microvolts. This holds the regulator output voltage setting for this domain measured in microvolts when - the system is suspended to disk. - - NOTE: this will return the string 'not defined' if - the power domain has no suspend to disk voltage defined by - platform code. + the system is suspended to disk, for voltage regulators + implementing suspend voltage configuration constraints. What: /sys/class/regulator/.../suspend_standby_microvolts Date: May 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called suspend_standby_microvolts. This holds the regulator output voltage setting for this domain measured in microvolts when - the system is suspended to standby. - - NOTE: this will return the string 'not defined' if - the power domain has no suspend to standby voltage defined by - platform code. + the system is suspended to standby, for voltage regulators + implementing suspend voltage configuration constraints. What: /sys/class/regulator/.../suspend_mem_mode Date: May 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called suspend_mem_mode. This holds the regulator operating mode setting for this domain when the system is suspended to - memory. - - NOTE: this will return the string 'not defined' if - the power domain has no suspend to memory mode defined by - platform code. + memory, for regulators implementing suspend mode + configuration constraints. What: /sys/class/regulator/.../suspend_disk_mode Date: May 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called suspend_disk_mode. This holds the regulator operating mode - setting for this domain when the system is suspended to disk. - - NOTE: this will return the string 'not defined' if - the power domain has no suspend to disk mode defined by - platform code. + setting for this domain when the system is suspended to disk, + for regulators implementing suspend mode configuration + constraints. What: /sys/class/regulator/.../suspend_standby_mode Date: May 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called suspend_standby_mode. This holds the regulator operating mode setting for this domain when the system is suspended to - standby. - - NOTE: this will return the string 'not defined' if - the power domain has no suspend to standby mode defined by - platform code. + standby, for regulators implementing suspend mode + configuration constraints. What: /sys/class/regulator/.../suspend_mem_state Date: May 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called suspend_mem_state. This holds the regulator operating state - when suspended to memory. - - This will be one of the following strings: + when suspended to memory, for regulators implementing suspend + configuration constraints. - 'enabled' - 'disabled' - 'not defined' + This will be one of the same strings reported by + the "state" attribute. What: /sys/class/regulator/.../suspend_disk_state Date: May 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called suspend_disk_state. This holds the regulator operating state - when suspended to disk. - - This will be one of the following strings: + when suspended to disk, for regulators implementing + suspend configuration constraints. - 'enabled' - 'disabled' - 'not defined' + This will be one of the same strings reported by + the "state" attribute. What: /sys/class/regulator/.../suspend_standby_state Date: May 2008 KernelVersion: 2.6.26 Contact: Liam Girdwood Description: - Each regulator directory will contain a field called + Some regulator directories will contain a field called suspend_standby_state. This holds the regulator operating - state when suspended to standby. - - This will be one of the following strings: + state when suspended to standby, for regulators implementing + suspend configuration constraints. - 'enabled' - 'disabled' - 'not defined' + This will be one of the same strings reported by + the "state" attribute. diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 5109f7d4809..9a5ff97d158 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -242,6 +242,7 @@ static ssize_t regulator_uV_show(struct device *dev, return ret; } +static DEVICE_ATTR(microvolts, 0444, regulator_uV_show, NULL); static ssize_t regulator_uA_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -250,6 +251,7 @@ static ssize_t regulator_uA_show(struct device *dev, return sprintf(buf, "%d\n", _regulator_get_current_limit(rdev)); } +static DEVICE_ATTR(microamps, 0444, regulator_uA_show, NULL); static ssize_t regulator_name_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -289,6 +291,7 @@ static ssize_t regulator_opmode_show(struct device *dev, return regulator_print_opmode(buf, _regulator_get_mode(rdev)); } +static DEVICE_ATTR(opmode, 0444, regulator_opmode_show, NULL); static ssize_t regulator_print_state(char *buf, int state) { @@ -307,6 +310,7 @@ static ssize_t regulator_state_show(struct device *dev, return regulator_print_state(buf, _regulator_is_enabled(rdev)); } +static DEVICE_ATTR(state, 0444, regulator_state_show, NULL); static ssize_t regulator_min_uA_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -318,6 +322,7 @@ static ssize_t regulator_min_uA_show(struct device *dev, return sprintf(buf, "%d\n", rdev->constraints->min_uA); } +static DEVICE_ATTR(min_microamps, 0444, regulator_min_uA_show, NULL); static ssize_t regulator_max_uA_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -329,6 +334,7 @@ static ssize_t regulator_max_uA_show(struct device *dev, return sprintf(buf, "%d\n", rdev->constraints->max_uA); } +static DEVICE_ATTR(max_microamps, 0444, regulator_max_uA_show, NULL); static ssize_t regulator_min_uV_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -340,6 +346,7 @@ static ssize_t regulator_min_uV_show(struct device *dev, return sprintf(buf, "%d\n", rdev->constraints->min_uV); } +static DEVICE_ATTR(min_microvolts, 0444, regulator_min_uV_show, NULL); static ssize_t regulator_max_uV_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -351,6 +358,7 @@ static ssize_t regulator_max_uV_show(struct device *dev, return sprintf(buf, "%d\n", rdev->constraints->max_uV); } +static DEVICE_ATTR(max_microvolts, 0444, regulator_max_uV_show, NULL); static ssize_t regulator_total_uA_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -365,6 +373,7 @@ static ssize_t regulator_total_uA_show(struct device *dev, mutex_unlock(&rdev->mutex); return sprintf(buf, "%d\n", uA); } +static DEVICE_ATTR(requested_microamps, 0444, regulator_total_uA_show, NULL); static ssize_t regulator_num_users_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -392,131 +401,106 @@ static ssize_t regulator_suspend_mem_uV_show(struct device *dev, { struct regulator_dev *rdev = dev_get_drvdata(dev); - if (!rdev->constraints) - return sprintf(buf, "not defined\n"); return sprintf(buf, "%d\n", rdev->constraints->state_mem.uV); } +static DEVICE_ATTR(suspend_mem_microvolts, 0444, + regulator_suspend_mem_uV_show, NULL); static ssize_t regulator_suspend_disk_uV_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); - if (!rdev->constraints) - return sprintf(buf, "not defined\n"); return sprintf(buf, "%d\n", rdev->constraints->state_disk.uV); } +static DEVICE_ATTR(suspend_disk_microvolts, 0444, + regulator_suspend_disk_uV_show, NULL); static ssize_t regulator_suspend_standby_uV_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); - if (!rdev->constraints) - return sprintf(buf, "not defined\n"); return sprintf(buf, "%d\n", rdev->constraints->state_standby.uV); } +static DEVICE_ATTR(suspend_standby_microvolts, 0444, + regulator_suspend_standby_uV_show, NULL); static ssize_t regulator_suspend_mem_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); - if (!rdev->constraints) - return sprintf(buf, "not defined\n"); return regulator_print_opmode(buf, rdev->constraints->state_mem.mode); } +static DEVICE_ATTR(suspend_mem_mode, 0444, + regulator_suspend_mem_mode_show, NULL); static ssize_t regulator_suspend_disk_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); - if (!rdev->constraints) - return sprintf(buf, "not defined\n"); return regulator_print_opmode(buf, rdev->constraints->state_disk.mode); } +static DEVICE_ATTR(suspend_disk_mode, 0444, + regulator_suspend_disk_mode_show, NULL); static ssize_t regulator_suspend_standby_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); - if (!rdev->constraints) - return sprintf(buf, "not defined\n"); return regulator_print_opmode(buf, rdev->constraints->state_standby.mode); } +static DEVICE_ATTR(suspend_standby_mode, 0444, + regulator_suspend_standby_mode_show, NULL); static ssize_t regulator_suspend_mem_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); - if (!rdev->constraints) - return sprintf(buf, "not defined\n"); - return regulator_print_state(buf, rdev->constraints->state_mem.enabled); } +static DEVICE_ATTR(suspend_mem_state, 0444, + regulator_suspend_mem_state_show, NULL); static ssize_t regulator_suspend_disk_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); - if (!rdev->constraints) - return sprintf(buf, "not defined\n"); - return regulator_print_state(buf, rdev->constraints->state_disk.enabled); } +static DEVICE_ATTR(suspend_disk_state, 0444, + regulator_suspend_disk_state_show, NULL); static ssize_t regulator_suspend_standby_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); - if (!rdev->constraints) - return sprintf(buf, "not defined\n"); - return regulator_print_state(buf, rdev->constraints->state_standby.enabled); } +static DEVICE_ATTR(suspend_standby_state, 0444, + regulator_suspend_standby_state_show, NULL); + +/* + * These are the only attributes are present for all regulators. + * Other attributes are a function of regulator functionality. + */ static struct device_attribute regulator_dev_attrs[] = { __ATTR(name, 0444, regulator_name_show, NULL), - __ATTR(microvolts, 0444, regulator_uV_show, NULL), - __ATTR(microamps, 0444, regulator_uA_show, NULL), - __ATTR(opmode, 0444, regulator_opmode_show, NULL), - __ATTR(state, 0444, regulator_state_show, NULL), - __ATTR(min_microvolts, 0444, regulator_min_uV_show, NULL), - __ATTR(min_microamps, 0444, regulator_min_uA_show, NULL), - __ATTR(max_microvolts, 0444, regulator_max_uV_show, NULL), - __ATTR(max_microamps, 0444, regulator_max_uA_show, NULL), - __ATTR(requested_microamps, 0444, regulator_total_uA_show, NULL), __ATTR(num_users, 0444, regulator_num_users_show, NULL), __ATTR(type, 0444, regulator_type_show, NULL), - __ATTR(suspend_mem_microvolts, 0444, - regulator_suspend_mem_uV_show, NULL), - __ATTR(suspend_disk_microvolts, 0444, - regulator_suspend_disk_uV_show, NULL), - __ATTR(suspend_standby_microvolts, 0444, - regulator_suspend_standby_uV_show, NULL), - __ATTR(suspend_mem_mode, 0444, - regulator_suspend_mem_mode_show, NULL), - __ATTR(suspend_disk_mode, 0444, - regulator_suspend_disk_mode_show, NULL), - __ATTR(suspend_standby_mode, 0444, - regulator_suspend_standby_mode_show, NULL), - __ATTR(suspend_mem_state, 0444, - regulator_suspend_mem_state_show, NULL), - __ATTR(suspend_disk_state, 0444, - regulator_suspend_disk_state_show, NULL), - __ATTR(suspend_standby_state, 0444, - regulator_suspend_standby_state_show, NULL), __ATTR_NULL, }; @@ -1711,6 +1695,117 @@ int regulator_notifier_call_chain(struct regulator_dev *rdev, } EXPORT_SYMBOL_GPL(regulator_notifier_call_chain); +/* + * To avoid cluttering sysfs (and memory) with useless state, only + * create attributes that can be meaningfully displayed. + */ +static int add_regulator_attributes(struct regulator_dev *rdev) +{ + struct device *dev = &rdev->dev; + struct regulator_ops *ops = rdev->desc->ops; + int status = 0; + + /* some attributes need specific methods to be displayed */ + if (ops->get_voltage) { + status = device_create_file(dev, &dev_attr_microvolts); + if (status < 0) + return status; + } + if (ops->get_current_limit) { + status = device_create_file(dev, &dev_attr_microamps); + if (status < 0) + return status; + } + if (ops->get_mode) { + status = device_create_file(dev, &dev_attr_opmode); + if (status < 0) + return status; + } + if (ops->is_enabled) { + status = device_create_file(dev, &dev_attr_state); + if (status < 0) + return status; + } + + /* some attributes are type-specific */ + if (rdev->desc->type == REGULATOR_CURRENT) { + status = device_create_file(dev, &dev_attr_requested_microamps); + if (status < 0) + return status; + } + + /* all the other attributes exist to support constraints; + * don't show them if there are no constraints, or if the + * relevant supporting methods are missing. + */ + if (!rdev->constraints) + return status; + + /* constraints need specific supporting methods */ + if (ops->set_voltage) { + status = device_create_file(dev, &dev_attr_min_microvolts); + if (status < 0) + return status; + status = device_create_file(dev, &dev_attr_max_microvolts); + if (status < 0) + return status; + } + if (ops->set_current_limit) { + status = device_create_file(dev, &dev_attr_min_microamps); + if (status < 0) + return status; + status = device_create_file(dev, &dev_attr_max_microamps); + if (status < 0) + return status; + } + + /* suspend mode constraints need multiple supporting methods */ + if (!(ops->set_suspend_enable && ops->set_suspend_disable)) + return status; + + status = device_create_file(dev, &dev_attr_suspend_standby_state); + if (status < 0) + return status; + status = device_create_file(dev, &dev_attr_suspend_mem_state); + if (status < 0) + return status; + status = device_create_file(dev, &dev_attr_suspend_disk_state); + if (status < 0) + return status; + + if (ops->set_suspend_voltage) { + status = device_create_file(dev, + &dev_attr_suspend_standby_microvolts); + if (status < 0) + return status; + status = device_create_file(dev, + &dev_attr_suspend_mem_microvolts); + if (status < 0) + return status; + status = device_create_file(dev, + &dev_attr_suspend_disk_microvolts); + if (status < 0) + return status; + } + + if (ops->set_suspend_mode) { + status = device_create_file(dev, + &dev_attr_suspend_standby_mode); + if (status < 0) + return status; + status = device_create_file(dev, + &dev_attr_suspend_mem_mode); + if (status < 0) + return status; + status = device_create_file(dev, + &dev_attr_suspend_disk_mode); + if (status < 0) + return status; + } + + return status; +} + /** * regulator_register - register regulator * @regulator: regulator source @@ -1779,6 +1874,11 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, dev_set_drvdata(&rdev->dev, rdev); + /* add attributes supported by this regulator */ + ret = add_regulator_attributes(rdev); + if (ret < 0) + goto scrub; + /* set supply regulator if it exists */ if (init_data->supply_regulator_dev) { ret = set_supply(rdev, -- cgit v1.2.3-70-g09d2 From 9fe5817f196054142b9a13ed78c73b76a29f2ea3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 31 Dec 2008 12:52:44 +0000 Subject: regulator: Add basic DocBook manual Add a basic DocBook manual for the regulator API. This is much more skeletal than the existing text documentation, the main benefit is to provide a skeleton for automatic generation of a manual based on the kerneldoc for the API. Since large portions of the text are lifted from the existing text format documentation written by Liam Girdwood much of the credit belongs to him. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- Documentation/DocBook/Makefile | 2 +- Documentation/DocBook/regulator.tmpl | 304 +++++++++++++++++++++++++++++++++++ 2 files changed, 305 insertions(+), 1 deletion(-) create mode 100644 Documentation/DocBook/regulator.tmpl (limited to 'Documentation') diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 0a08126d309..dc3154e4927 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -12,7 +12,7 @@ DOCBOOKS := z8530book.xml mcabook.xml \ kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ - mac80211.xml debugobjects.xml sh.xml + mac80211.xml debugobjects.xml sh.xml regulator.xml ### # The build process is as follows (targets): diff --git a/Documentation/DocBook/regulator.tmpl b/Documentation/DocBook/regulator.tmpl new file mode 100644 index 00000000000..53f4f8d3b81 --- /dev/null +++ b/Documentation/DocBook/regulator.tmpl @@ -0,0 +1,304 @@ + + + + + + Voltage and current regulator API + + + + Liam + Girdwood + +
+ lrg@slimlogic.co.uk +
+
+
+ + Mark + Brown + + Wolfson Microelectronics +
+ broonie@opensource.wolfsonmicro.com +
+
+
+
+ + + 2007-2008 + Wolfson Microelectronics + + + 2008 + Liam Girdwood + + + + + This documentation is free software; you can redistribute + it and/or modify it under the terms of the GNU General Public + License version 2 as published by the Free Software Foundation. + + + + This program is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + + + You should have received a copy of the GNU General Public + License along with this program; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, + MA 02111-1307 USA + + + + For more details see the file COPYING in the source + distribution of Linux. + + +
+ + + + + Introduction + + This framework is designed to provide a standard kernel + interface to control voltage and current regulators. + + + The intention is to allow systems to dynamically control + regulator power output in order to save power and prolong + battery life. This applies to both voltage regulators (where + voltage output is controllable) and current sinks (where current + limit is controllable). + + + Note that additional (and currently more complete) documentation + is available in the Linux kernel source under + Documentation/power/regulator. + + + + Glossary + + The regulator API uses a number of terms which may not be + familiar: + + + + + Regulator + + + Electronic device that supplies power to other devices. Most + regulators can enable and disable their output and some can also + control their output voltage or current. + + + + + + Consumer + + + Electronic device which consumes power provided by a regulator. + These may either be static, requiring only a fixed supply, or + dynamic, requiring active management of the regulator at + runtime. + + + + + + Power Domain + + + The electronic circuit supplied by a given regulator, including + the regulator and all consumer devices. The configuration of + the regulator is shared between all the components in the + circuit. + + + + + + Power Management Integrated Circuit + PMIC + + + An IC which contains numerous regulators and often also other + subsystems. In an embedded system the primary PMIC is often + equivalent to a combination of the PSU and southbridge in a + desktop system. + + + + + + + + + Consumer driver interface + + This offers a similar API to the kernel clock framework. + Consumer drivers use get and put operations to acquire and + release regulators. Functions are + provided to enable + and disable the + reguator and to get and set the runtime parameters of the + regulator. + + + When requesting regulators consumers use symbolic names for their + supplies, such as "Vcc", which are mapped into actual regulator + devices by the machine interface. + + + A stub version of this API is provided when the regulator + framework is not in use in order to minimise the need to use + ifdefs. + + + + Enabling and disabling + + The regulator API provides reference counted enabling and + disabling of regulators. Consumer devices use the regulator_enable + and regulator_disable + functions to enable and disable regulators. Calls + to the two functions must be balanced. + + + Note that since multiple consumers may be using a regulator and + machine constraints may not allow the regulator to be disabled + there is no guarantee that calling + regulator_disable will actually cause the + supply provided by the regulator to be disabled. Consumer + drivers should assume that the regulator may be enabled at all + times. + + + + + Configuration + + Some consumer devices may need to be able to dynamically + configure their supplies. For example, MMC drivers may need to + select the correct operating voltage for their cards. This may + be done while the regulator is enabled or disabled. + + + The regulator_set_voltage + and regulator_set_current_limit + functions provide the primary interface for this. + Both take ranges of voltages and currents, supporting drivers + that do not require a specific value (eg, CPU frequency scaling + normally permits the CPU to use a wider range of supply + voltages at lower frequencies but does not require that the + supply voltage be lowered). Where an exact value is required + both minimum and maximum values should be identical. + + + + + Callbacks + + Callbacks may also be registered + for events such as regulation failures. + + + + + + Regulator driver interface + + Drivers for regulator chips register the regulators + with the regulator core, providing operations structures to the + core. A notifier interface + allows error conditions to be reported to the core. + + + Registration should be triggered by explicit setup done by the + platform, supplying a struct + regulator_init_data for the regulator containing + constraint and + supply information. + + + + + Machine interface + + This interface provides a way to define how regulators are + connected to consumers on a given system and what the valid + operating parameters are for the system. + + + + Supplies + + Regulator supplies are specified using struct + regulator_consumer_supply. This is done at + driver registration + time as part of the machine constraints. + + + + + Constraints + + As well as definining the connections the machine interface + also provides constraints definining the operations that + clients are allowed to perform and the parameters that may be + set. This is required since generally regulator devices will + offer more flexibility than it is safe to use on a given + system, for example supporting higher supply voltages than the + consumers are rated for. + + + This is done at driver + registration time by providing a struct + regulation_constraints. + + + The constraints may also specify an initial configuration for the + regulator in the constraints, which is particularly useful for + use with static consumers. + + + + + + API reference + + Due to limitations of the kernel documentation framework and the + existing layout of the source code the entire regulator API is + documented here. + +!Iinclude/linux/regulator/consumer.h +!Iinclude/linux/regulator/machine.h +!Iinclude/linux/regulator/driver.h +!Edrivers/regulator/core.c + +
-- cgit v1.2.3-70-g09d2 From d5b524327b2a482dddae3839ced8f8825074730d Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 8 Jan 2009 16:32:13 -0700 Subject: Fix a typo in the development process document. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: AnĂ­bal Monsalve Salazar Signed-off-by: Jonathan Corbet --- Documentation/development-process/4.Coding | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/development-process/4.Coding b/Documentation/development-process/4.Coding index 014aca8f14e..a5a3450faaa 100644 --- a/Documentation/development-process/4.Coding +++ b/Documentation/development-process/4.Coding @@ -375,10 +375,10 @@ say, this can be a large job, so it is best to be sure that the justification is solid. When making an incompatible API change, one should, whenever possible, -ensure that code which has not been updated is caught by the compiler. +ensure that code which has not been updated is caught by the compiler. This will help you to be sure that you have found all in-tree uses of that interface. It will also alert developers of out-of-tree code that there is a change that they need to respond to. Supporting out-of-tree code is not something that kernel developers need to be worried about, but we also do -not have to make life harder for out-of-tree developers than it it needs to -be. +not have to make life harder for out-of-tree developers than it needs to +be. -- cgit v1.2.3-70-g09d2 From 237889bf0a62f1399fb2ba0c2a259e6a96597131 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 17 Dec 2008 16:55:18 +0800 Subject: ACPI : Use RSDT instead of XSDT by adding boot option of "acpi=rsdt" On some boxes there exist both RSDT and XSDT table. But unfortunately sometimes there exists the following error when XSDT table is used: a. 32/64X address mismatch b. The 32/64X FACS address mismatch In such case the boot option of "acpi=rsdt" is provided so that RSDT is tried instead of XSDT table when the system can't work well. http://bugzilla.kernel.org/show_bug.cgi?id=8246 Signed-off-by: Zhao Yakui cc:Thomas Renninger Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 1 + arch/ia64/kernel/acpi.c | 1 + arch/x86/kernel/acpi/boot.c | 6 +++++- drivers/acpi/tables/tbutils.c | 3 ++- include/acpi/acpixf.h | 1 + 5 files changed, 10 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c9115c1b672..136f02842de 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -139,6 +139,7 @@ and is between 256 and 4096 characters. It is defined in the file ht -- run only enough ACPI to enable Hyper Threading strict -- Be less tolerant of platforms that are not strictly ACPI specification compliant. + rsdt -- prefer RSDT over (default) XSDT See also Documentation/power/pm.txt, pci=noacpi diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index bd7acc71e8a..c19b686db9b 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -65,6 +65,7 @@ EXPORT_SYMBOL(pm_idle); void (*pm_power_off) (void); EXPORT_SYMBOL(pm_power_off); +u32 acpi_rsdt_forced; unsigned int acpi_cpei_override; unsigned int acpi_cpei_phys_cpuid; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4c51a2f8fd3..db1a90a76b3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -47,7 +47,7 @@ #endif static int __initdata acpi_force = 0; - +u32 acpi_rsdt_forced; #ifdef CONFIG_ACPI int acpi_disabled = 0; #else @@ -1783,6 +1783,10 @@ static int __init parse_acpi(char *arg) disable_acpi(); acpi_ht = 1; } + /* acpi=rsdt use RSDT instead of XSDT */ + else if (strcmp(arg, "rsdt") == 0) { + acpi_rsdt_forced = 1; + } /* "acpi=noirq" disables ACPI interrupt routing */ else if (strcmp(arg, "noirq") == 0) { acpi_noirq_set(); diff --git a/drivers/acpi/tables/tbutils.c b/drivers/acpi/tables/tbutils.c index 0cc92ef5236..da9f240186e 100644 --- a/drivers/acpi/tables/tbutils.c +++ b/drivers/acpi/tables/tbutils.c @@ -420,7 +420,8 @@ acpi_tb_parse_root_table(acpi_physical_address rsdp_address, u8 flags) /* Differentiate between RSDT and XSDT root tables */ - if (rsdp->revision > 1 && rsdp->xsdt_physical_address) { + if (rsdp->revision > 1 && rsdp->xsdt_physical_address + && !acpi_rsdt_forced) { /* * Root table is an XSDT (64-bit physical addresses). We must use the * XSDT if the revision is > 1 and the XSDT pointer is present, as per diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 33bc0e3b195..05d2614e007 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -48,6 +48,7 @@ #include "actypes.h" #include "actbl.h" +extern u32 acpi_rsdt_forced; /* * Global interfaces */ -- cgit v1.2.3-70-g09d2 From 555d61d6542d51563e50532ff604dcd31c96fb24 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 9 Jan 2009 12:15:02 +0100 Subject: [S390] update documentation for hvc_iucv kernel parameter. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- Documentation/kernel-parameters.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index fb849020aea..ed0a72442cf 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -829,8 +829,8 @@ and is between 256 and 4096 characters. It is defined in the file hlt [BUGS=ARM,SH] - hvc_iucv= [S390] Number of z/VM IUCV Hypervisor console (HVC) - back-ends. Valid parameters: 0..8 + hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC) + terminal devices. Valid values: 0..8 i8042.debug [HW] Toggle i8042 debug mode i8042.direct [HW] Put keyboard port into non-translated mode -- cgit v1.2.3-70-g09d2 From c4be0c1dc4cdc37b175579be1460f15ac6495e9a Mon Sep 17 00:00:00 2001 From: Takashi Sato Date: Fri, 9 Jan 2009 16:40:58 -0800 Subject: filesystem freeze: add error handling of write_super_lockfs/unlockfs Currently, ext3 in mainline Linux doesn't have the freeze feature which suspends write requests. So, we cannot take a backup which keeps the filesystem's consistency with the storage device's features (snapshot and replication) while it is mounted. In many case, a commercial filesystem (e.g. VxFS) has the freeze feature and it would be used to get the consistent backup. If Linux's standard filesystem ext3 has the freeze feature, we can do it without a commercial filesystem. So I have implemented the ioctls of the freeze feature. I think we can take the consistent backup with the following steps. 1. Freeze the filesystem with the freeze ioctl. 2. Separate the replication volume or create the snapshot with the storage device's feature. 3. Unfreeze the filesystem with the unfreeze ioctl. 4. Take the backup from the separated replication volume or the snapshot. This patch: VFS: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they can return an error. Rename write_super_lockfs and unlockfs of the super block operation freeze_fs and unfreeze_fs to avoid a confusion. ext3, ext4, xfs, gfs2, jfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that write_super_lockfs returns an error if needed, and unlockfs always returns 0. reiserfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they always return 0 (success) to keep a current behavior. Signed-off-by: Takashi Sato Signed-off-by: Masayuki Hamaguchi Cc: Cc: Cc: Christoph Hellwig Cc: Dave Kleikamp Cc: Dave Chinner Cc: Alasdair G Kergon Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 8 +++---- Documentation/filesystems/vfs.txt | 8 +++---- fs/buffer.c | 8 +++---- fs/ext3/super.c | 45 +++++++++++++++++++++++++-------------- fs/ext4/super.c | 45 +++++++++++++++++++++++++++------------ fs/gfs2/ops_super.c | 16 ++++++++------ fs/jfs/super.c | 10 +++++---- fs/reiserfs/super.c | 10 +++++---- fs/xfs/linux-2.6/xfs_super.c | 8 +++---- fs/xfs/xfs_fsops.c | 11 ++++++---- fs/xfs/xfs_fsops.h | 2 +- include/linux/fs.h | 4 ++-- 12 files changed, 107 insertions(+), 68 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index cfbfa15a46b..ec6a9392a17 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -97,8 +97,8 @@ prototypes: void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - void (*write_super_lockfs) (struct super_block *); - void (*unlockfs) (struct super_block *); + int (*freeze_fs) (struct super_block *); + int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); @@ -119,8 +119,8 @@ delete_inode: no put_super: yes yes no write_super: no yes read sync_fs: no no read -write_super_lockfs: ? -unlockfs: ? +freeze_fs: ? +unfreeze_fs: ? statfs: no no no remount_fs: yes yes maybe (see below) clear_inode: no diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index ef19afa186a..deeeed0faa8 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -210,8 +210,8 @@ struct super_operations { void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - void (*write_super_lockfs) (struct super_block *); - void (*unlockfs) (struct super_block *); + int (*freeze_fs) (struct super_block *); + int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); @@ -270,11 +270,11 @@ or bottom half). a superblock. The second parameter indicates whether the method should wait until the write out has been completed. Optional. - write_super_lockfs: called when VFS is locking a filesystem and + freeze_fs: called when VFS is locking a filesystem and forcing it into a consistent state. This method is currently used by the Logical Volume Manager (LVM). - unlockfs: called when VFS is unlocking a filesystem and making it writable + unfreeze_fs: called when VFS is unlocking a filesystem and making it writable again. statfs: called when the VFS needs to get filesystem statistics. This diff --git a/fs/buffer.c b/fs/buffer.c index c26da785938..87f9e537b8c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -221,8 +221,8 @@ struct super_block *freeze_bdev(struct block_device *bdev) sync_blockdev(sb->s_bdev); - if (sb->s_op->write_super_lockfs) - sb->s_op->write_super_lockfs(sb); + if (sb->s_op->freeze_fs) + sb->s_op->freeze_fs(sb); } sync_blockdev(bdev); @@ -242,8 +242,8 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb) if (sb) { BUG_ON(sb->s_bdev != bdev); - if (sb->s_op->unlockfs) - sb->s_op->unlockfs(sb); + if (sb->s_op->unfreeze_fs) + sb->s_op->unfreeze_fs(sb); sb->s_frozen = SB_UNFROZEN; smp_wmb(); wake_up(&sb->s_wait_unfrozen); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5d047a030a7..b70d90e08a3 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -48,8 +48,8 @@ static int ext3_load_journal(struct super_block *, struct ext3_super_block *, unsigned long journal_devnum); static int ext3_create_journal(struct super_block *, struct ext3_super_block *, unsigned int); -static void ext3_commit_super (struct super_block * sb, - struct ext3_super_block * es, +static int ext3_commit_super(struct super_block *sb, + struct ext3_super_block *es, int sync); static void ext3_mark_recovery_complete(struct super_block * sb, struct ext3_super_block * es); @@ -60,9 +60,9 @@ static const char *ext3_decode_error(struct super_block * sb, int errno, char nbuf[16]); static int ext3_remount (struct super_block * sb, int * flags, char * data); static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); -static void ext3_unlockfs(struct super_block *sb); +static int ext3_unfreeze(struct super_block *sb); static void ext3_write_super (struct super_block * sb); -static void ext3_write_super_lockfs(struct super_block *sb); +static int ext3_freeze(struct super_block *sb); /* * Wrappers for journal_start/end. @@ -759,8 +759,8 @@ static const struct super_operations ext3_sops = { .put_super = ext3_put_super, .write_super = ext3_write_super, .sync_fs = ext3_sync_fs, - .write_super_lockfs = ext3_write_super_lockfs, - .unlockfs = ext3_unlockfs, + .freeze_fs = ext3_freeze, + .unfreeze_fs = ext3_unfreeze, .statfs = ext3_statfs, .remount_fs = ext3_remount, .clear_inode = ext3_clear_inode, @@ -2311,21 +2311,23 @@ static int ext3_create_journal(struct super_block * sb, return 0; } -static void ext3_commit_super (struct super_block * sb, - struct ext3_super_block * es, +static int ext3_commit_super(struct super_block *sb, + struct ext3_super_block *es, int sync) { struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; + int error = 0; if (!sbh) - return; + return error; es->s_wtime = cpu_to_le32(get_seconds()); es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb)); es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb)); BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); if (sync) - sync_dirty_buffer(sbh); + error = sync_dirty_buffer(sbh); + return error; } @@ -2439,12 +2441,14 @@ static int ext3_sync_fs(struct super_block *sb, int wait) * LVM calls this function before a (read-only) snapshot is created. This * gives us a chance to flush the journal completely and mark the fs clean. */ -static void ext3_write_super_lockfs(struct super_block *sb) +static int ext3_freeze(struct super_block *sb) { + int error = 0; + journal_t *journal; sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) { - journal_t *journal = EXT3_SB(sb)->s_journal; + journal = EXT3_SB(sb)->s_journal; /* Now we set up the journal barrier. */ journal_lock_updates(journal); @@ -2453,20 +2457,28 @@ static void ext3_write_super_lockfs(struct super_block *sb) * We don't want to clear needs_recovery flag when we failed * to flush the journal. */ - if (journal_flush(journal) < 0) - return; + error = journal_flush(journal); + if (error < 0) + goto out; /* Journal blocked and flushed, clear needs_recovery flag. */ EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); - ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); + error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); + if (error) + goto out; } + return 0; + +out: + journal_unlock_updates(journal); + return error; } /* * Called by LVM after the snapshot is done. We need to reset the RECOVER * flag here, even though the filesystem is not technically dirty yet. */ -static void ext3_unlockfs(struct super_block *sb) +static int ext3_unfreeze(struct super_block *sb) { if (!(sb->s_flags & MS_RDONLY)) { lock_super(sb); @@ -2476,6 +2488,7 @@ static void ext3_unlockfs(struct super_block *sb) unlock_super(sb); journal_unlock_updates(EXT3_SB(sb)->s_journal); } + return 0; } static int ext3_remount (struct super_block * sb, int * flags, char * data) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8f7e0be8ab1..e5f06a5f045 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -51,7 +51,7 @@ struct proc_dir_entry *ext4_proc_root; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); -static void ext4_commit_super(struct super_block *sb, +static int ext4_commit_super(struct super_block *sb, struct ext4_super_block *es, int sync); static void ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); @@ -62,9 +62,9 @@ static const char *ext4_decode_error(struct super_block *sb, int errno, char nbuf[16]); static int ext4_remount(struct super_block *sb, int *flags, char *data); static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); -static void ext4_unlockfs(struct super_block *sb); +static int ext4_unfreeze(struct super_block *sb); static void ext4_write_super(struct super_block *sb); -static void ext4_write_super_lockfs(struct super_block *sb); +static int ext4_freeze(struct super_block *sb); ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, @@ -978,8 +978,8 @@ static const struct super_operations ext4_sops = { .put_super = ext4_put_super, .write_super = ext4_write_super, .sync_fs = ext4_sync_fs, - .write_super_lockfs = ext4_write_super_lockfs, - .unlockfs = ext4_unlockfs, + .freeze_fs = ext4_freeze, + .unfreeze_fs = ext4_unfreeze, .statfs = ext4_statfs, .remount_fs = ext4_remount, .clear_inode = ext4_clear_inode, @@ -2888,13 +2888,14 @@ static int ext4_load_journal(struct super_block *sb, return 0; } -static void ext4_commit_super(struct super_block *sb, +static int ext4_commit_super(struct super_block *sb, struct ext4_super_block *es, int sync) { struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; + int error = 0; if (!sbh) - return; + return error; if (buffer_write_io_error(sbh)) { /* * Oh, dear. A previous attempt to write the @@ -2918,14 +2919,19 @@ static void ext4_commit_super(struct super_block *sb, BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); if (sync) { - sync_dirty_buffer(sbh); - if (buffer_write_io_error(sbh)) { + error = sync_dirty_buffer(sbh); + if (error) + return error; + + error = buffer_write_io_error(sbh); + if (error) { printk(KERN_ERR "EXT4-fs: I/O error while writing " "superblock for %s.\n", sb->s_id); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } } + return error; } @@ -3058,12 +3064,14 @@ static int ext4_sync_fs(struct super_block *sb, int wait) * LVM calls this function before a (read-only) snapshot is created. This * gives us a chance to flush the journal completely and mark the fs clean. */ -static void ext4_write_super_lockfs(struct super_block *sb) +static int ext4_freeze(struct super_block *sb) { + int error = 0; + journal_t *journal; sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) { - journal_t *journal = EXT4_SB(sb)->s_journal; + journal = EXT4_SB(sb)->s_journal; if (journal) { /* Now we set up the journal barrier. */ @@ -3073,21 +3081,29 @@ static void ext4_write_super_lockfs(struct super_block *sb) * We don't want to clear needs_recovery flag when we * failed to flush the journal. */ - if (jbd2_journal_flush(journal) < 0) - return; + error = jbd2_journal_flush(journal); + if (error < 0) + goto out; } /* Journal blocked and flushed, clear needs_recovery flag. */ EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); + error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); + if (error) + goto out; } + return 0; +out: + jbd2_journal_unlock_updates(journal); + return error; } /* * Called by LVM after the snapshot is done. We need to reset the RECOVER * flag here, even though the filesystem is not technically dirty yet. */ -static void ext4_unlockfs(struct super_block *sb) +static int ext4_unfreeze(struct super_block *sb) { if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { lock_super(sb); @@ -3097,6 +3113,7 @@ static void ext4_unlockfs(struct super_block *sb) unlock_super(sb); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } + return 0; } static int ext4_remount(struct super_block *sb, int *flags, char *data) diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 777783deddc..320323d0347 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -211,18 +211,18 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) } /** - * gfs2_write_super_lockfs - prevent further writes to the filesystem + * gfs2_freeze - prevent further writes to the filesystem * @sb: the VFS structure for the filesystem * */ -static void gfs2_write_super_lockfs(struct super_block *sb) +static int gfs2_freeze(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; int error; if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - return; + return -EINVAL; for (;;) { error = gfs2_freeze_fs(sdp); @@ -242,17 +242,19 @@ static void gfs2_write_super_lockfs(struct super_block *sb) fs_err(sdp, "retrying...\n"); msleep(1000); } + return 0; } /** - * gfs2_unlockfs - reallow writes to the filesystem + * gfs2_unfreeze - reallow writes to the filesystem * @sb: the VFS structure for the filesystem * */ -static void gfs2_unlockfs(struct super_block *sb) +static int gfs2_unfreeze(struct super_block *sb) { gfs2_unfreeze_fs(sb->s_fs_info); + return 0; } /** @@ -688,8 +690,8 @@ const struct super_operations gfs2_super_ops = { .put_super = gfs2_put_super, .write_super = gfs2_write_super, .sync_fs = gfs2_sync_fs, - .write_super_lockfs = gfs2_write_super_lockfs, - .unlockfs = gfs2_unlockfs, + .freeze_fs = gfs2_freeze, + .unfreeze_fs = gfs2_unfreeze, .statfs = gfs2_statfs, .remount_fs = gfs2_remount_fs, .clear_inode = gfs2_clear_inode, diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 0dae345e481..b37d1f78b85 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -543,7 +543,7 @@ out_kfree: return ret; } -static void jfs_write_super_lockfs(struct super_block *sb) +static int jfs_freeze(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; @@ -553,9 +553,10 @@ static void jfs_write_super_lockfs(struct super_block *sb) lmLogShutdown(log); updateSuper(sb, FM_CLEAN); } + return 0; } -static void jfs_unlockfs(struct super_block *sb) +static int jfs_unfreeze(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; @@ -568,6 +569,7 @@ static void jfs_unlockfs(struct super_block *sb) else txResume(sb); } + return 0; } static int jfs_get_sb(struct file_system_type *fs_type, @@ -735,8 +737,8 @@ static const struct super_operations jfs_super_operations = { .delete_inode = jfs_delete_inode, .put_super = jfs_put_super, .sync_fs = jfs_sync_fs, - .write_super_lockfs = jfs_write_super_lockfs, - .unlockfs = jfs_unlockfs, + .freeze_fs = jfs_freeze, + .unfreeze_fs = jfs_unfreeze, .statfs = jfs_statfs, .remount_fs = jfs_remount, .show_options = jfs_show_options, diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index c55651f1407..f3c820b7582 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -83,7 +83,7 @@ static void reiserfs_write_super(struct super_block *s) reiserfs_sync_fs(s, 1); } -static void reiserfs_write_super_lockfs(struct super_block *s) +static int reiserfs_freeze(struct super_block *s) { struct reiserfs_transaction_handle th; reiserfs_write_lock(s); @@ -101,11 +101,13 @@ static void reiserfs_write_super_lockfs(struct super_block *s) } s->s_dirt = 0; reiserfs_write_unlock(s); + return 0; } -static void reiserfs_unlockfs(struct super_block *s) +static int reiserfs_unfreeze(struct super_block *s) { reiserfs_allow_writes(s); + return 0; } extern const struct in_core_key MAX_IN_CORE_KEY; @@ -613,8 +615,8 @@ static const struct super_operations reiserfs_sops = { .put_super = reiserfs_put_super, .write_super = reiserfs_write_super, .sync_fs = reiserfs_sync_fs, - .write_super_lockfs = reiserfs_write_super_lockfs, - .unlockfs = reiserfs_unlockfs, + .freeze_fs = reiserfs_freeze, + .unfreeze_fs = reiserfs_unfreeze, .statfs = reiserfs_statfs, .remount_fs = reiserfs_remount, .show_options = generic_show_options, diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index be846d606ae..95a97108036 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1269,14 +1269,14 @@ xfs_fs_remount( * need to take care of the metadata. Once that's done write a dummy * record to dirty the log in case of a crash while frozen. */ -STATIC void -xfs_fs_lockfs( +STATIC int +xfs_fs_freeze( struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); xfs_quiesce_attr(mp); - xfs_fs_log_dummy(mp); + return -xfs_fs_log_dummy(mp); } STATIC int @@ -1557,7 +1557,7 @@ static struct super_operations xfs_super_operations = { .put_super = xfs_fs_put_super, .write_super = xfs_fs_write_super, .sync_fs = xfs_fs_sync_super, - .write_super_lockfs = xfs_fs_lockfs, + .freeze_fs = xfs_fs_freeze, .statfs = xfs_fs_statfs, .remount_fs = xfs_fs_remount, .show_options = xfs_fs_show_options, diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 852b6d32e8d..680d0e0ec93 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -595,17 +595,19 @@ out: return 0; } -void +int xfs_fs_log_dummy( xfs_mount_t *mp) { xfs_trans_t *tp; xfs_inode_t *ip; + int error; tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); - if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) { + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (error) { xfs_trans_cancel(tp, 0); - return; + return error; } ip = mp->m_rootip; @@ -615,9 +617,10 @@ xfs_fs_log_dummy( xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); - xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; } int diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index 300d0c9d61a..88435e0a77c 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h @@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, xfs_fsop_resblks_t *outval); extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); -extern void xfs_fs_log_dummy(xfs_mount_t *mp); +extern int xfs_fs_log_dummy(xfs_mount_t *mp); #endif /* __XFS_FSOPS_H__ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 0b87b29f479..3e59182de9d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1377,8 +1377,8 @@ struct super_operations { void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - void (*write_super_lockfs) (struct super_block *); - void (*unlockfs) (struct super_block *); + int (*freeze_fs) (struct super_block *); + int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); -- cgit v1.2.3-70-g09d2