diff options
Diffstat (limited to 'include/linux')
91 files changed, 2230 insertions, 1980 deletions
diff --git a/include/linux/Kbuild b/include/linux/Kbuild index a67b6227d27..ca9b9b9bd33 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -67,6 +67,7 @@ header-y += falloc.h header-y += fd.h header-y += fdreg.h header-y += fib_rules.h +header-y += fiemap.h header-y += firewire-cdev.h header-y += firewire-constants.h header-y += fuse.h diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 78199151c00..d047f846c3e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -257,6 +257,40 @@ void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_s4_no_nvs(void); #endif /* CONFIG_PM_SLEEP */ + +#define OSC_QUERY_TYPE 0 +#define OSC_SUPPORT_TYPE 1 +#define OSC_CONTROL_TYPE 2 +#define OSC_SUPPORT_MASKS 0x1f + +/* _OSC DW0 Definition */ +#define OSC_QUERY_ENABLE 1 +#define OSC_REQUEST_ERROR 2 +#define OSC_INVALID_UUID_ERROR 4 +#define OSC_INVALID_REVISION_ERROR 8 +#define OSC_CAPABILITIES_MASK_ERROR 16 + +/* _OSC DW1 Definition (OS Support Fields) */ +#define OSC_EXT_PCI_CONFIG_SUPPORT 1 +#define OSC_ACTIVE_STATE_PWR_SUPPORT 2 +#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT 4 +#define OSC_PCI_SEGMENT_GROUPS_SUPPORT 8 +#define OSC_MSI_SUPPORT 16 + +/* _OSC DW1 Definition (OS Control Fields) */ +#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 1 +#define OSC_SHPC_NATIVE_HP_CONTROL 2 +#define OSC_PCI_EXPRESS_PME_CONTROL 4 +#define OSC_PCI_EXPRESS_AER_CONTROL 8 +#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL 16 + +#define OSC_CONTROL_MASKS (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \ + OSC_SHPC_NATIVE_HP_CONTROL | \ + OSC_PCI_EXPRESS_PME_CONTROL | \ + OSC_PCI_EXPRESS_AER_CONTROL | \ + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) + +extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags); #else /* CONFIG_ACPI */ static inline int early_acpi_boot_init(void) diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index 91a773993a5..850f39b33e7 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h @@ -10,8 +10,13 @@ #ifndef _LINUX_AUTO_DEV_IOCTL_H #define _LINUX_AUTO_DEV_IOCTL_H +#include <linux/auto_fs.h> + +#ifdef __KERNEL__ #include <linux/string.h> -#include <linux/types.h> +#else +#include <string.h> +#endif /* __KERNEL__ */ #define AUTOFS_DEVICE_NAME "autofs" diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h index c21e5972a3e..63265852b7d 100644 --- a/include/linux/auto_fs.h +++ b/include/linux/auto_fs.h @@ -17,11 +17,13 @@ #ifdef __KERNEL__ #include <linux/fs.h> #include <linux/limits.h> +#include <linux/types.h> +#include <linux/ioctl.h> +#else #include <asm/types.h> +#include <sys/ioctl.h> #endif /* __KERNEL__ */ -#include <linux/ioctl.h> - /* This file describes autofs v3 */ #define AUTOFS_PROTO_VERSION 3 diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 77b4a9e4600..6638b8148de 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -35,8 +35,7 @@ struct linux_binprm{ #endif struct mm_struct *mm; unsigned long p; /* current top of mem */ - unsigned int sh_bang:1, - misc_bang:1, + unsigned int cred_prepared:1,/* true if creds already prepared (multiple * preps happen for interpreters) */ cap_effective:1;/* true if has elevated effective capabilities, diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 455d83219fa..bc3ab707369 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -146,10 +146,10 @@ extern void *alloc_large_system_hash(const char *tablename, #define HASH_EARLY 0x00000001 /* Allocating during early boot? */ -/* Only NUMA needs hash distribution. - * IA64 and x86_64 have sufficient vmalloc space. +/* Only NUMA needs hash distribution. 64bit NUMA architectures have + * sufficient vmalloc space. */ -#if defined(CONFIG_NUMA) && (defined(CONFIG_IA64) || defined(CONFIG_X86_64)) +#if defined(CONFIG_NUMA) && defined(CONFIG_64BIT) #define HASHDIST_DEFAULT 1 #else #define HASHDIST_DEFAULT 0 diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f19fd9045ea..7b73bb8f197 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -216,7 +216,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); -int block_page_mkwrite(struct vm_area_struct *vma, struct page *page, +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); @@ -332,22 +332,10 @@ extern int __set_page_dirty_buffers(struct page *page); static inline void buffer_init(void) {} static inline int try_to_free_buffers(struct page *page) { return 1; } -static inline int sync_blockdev(struct block_device *bdev) { return 0; } static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } -static inline void invalidate_bdev(struct block_device *bdev) {} - -static inline struct super_block *freeze_bdev(struct block_device *sb) -{ - return NULL; -} - -static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) -{ - return 0; -} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 499900d0cee..4316a546beb 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -15,6 +15,7 @@ #include <linux/cgroupstats.h> #include <linux/prio_heap.h> #include <linux/rwsem.h> +#include <linux/idr.h> #ifdef CONFIG_CGROUPS @@ -22,6 +23,7 @@ struct cgroupfs_root; struct cgroup_subsys; struct inode; struct cgroup; +struct css_id; extern int cgroup_init_early(void); extern int cgroup_init(void); @@ -47,18 +49,24 @@ enum cgroup_subsys_id { /* Per-subsystem/per-cgroup state maintained by the system. */ struct cgroup_subsys_state { - /* The cgroup that this subsystem is attached to. Useful + /* + * The cgroup that this subsystem is attached to. Useful * for subsystems that want to know about the cgroup - * hierarchy structure */ + * hierarchy structure + */ struct cgroup *cgroup; - /* State maintained by the cgroup system to allow subsystems + /* + * State maintained by the cgroup system to allow subsystems * to be "busy". Should be accessed via css_get(), - * css_tryget() and and css_put(). */ + * css_tryget() and and css_put(). + */ atomic_t refcnt; unsigned long flags; + /* ID for this css, if possible */ + struct css_id *id; }; /* bits in struct cgroup_subsys_state flags field */ @@ -120,19 +128,26 @@ static inline void css_put(struct cgroup_subsys_state *css) enum { /* Control Group is dead */ CGRP_REMOVED, - /* Control Group has previously had a child cgroup or a task, - * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */ + /* + * Control Group has previously had a child cgroup or a task, + * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) + */ CGRP_RELEASABLE, /* Control Group requires release notifications to userspace */ CGRP_NOTIFY_ON_RELEASE, + /* + * A thread in rmdir() is wating for this cgroup. + */ + CGRP_WAIT_ON_RMDIR, }; struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ - /* count users of this cgroup. >0 means busy, but doesn't - * necessarily indicate the number of tasks in the - * cgroup */ + /* + * count users of this cgroup. >0 means busy, but doesn't + * necessarily indicate the number of tasks in the cgroup + */ atomic_t count; /* @@ -142,7 +157,7 @@ struct cgroup { struct list_head sibling; /* my parent's children */ struct list_head children; /* my children */ - struct cgroup *parent; /* my parent */ + struct cgroup *parent; /* my parent */ struct dentry *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ @@ -177,11 +192,12 @@ struct cgroup { struct rcu_head rcu_head; }; -/* A css_set is a structure holding pointers to a set of +/* + * A css_set is a structure holding pointers to a set of * cgroup_subsys_state objects. This saves space in the task struct * object and speeds up fork()/exit(), since a single inc/dec and a - * list_add()/del() can bump the reference count on the entire - * cgroup set for a task. + * list_add()/del() can bump the reference count on the entire cgroup + * set for a task. */ struct css_set { @@ -226,13 +242,8 @@ struct cgroup_map_cb { void *state; }; -/* struct cftype: - * - * The files in the cgroup filesystem mostly have a very simple read/write - * handling, some common function will take care of it. Nevertheless some cases - * (read tasks) are special and therefore I define this structure for every - * kind of file. - * +/* + * struct cftype: handler definitions for cgroup control files * * When reading/writing to a file: * - the cgroup to use is file->f_dentry->d_parent->d_fsdata @@ -241,10 +252,17 @@ struct cgroup_map_cb { #define MAX_CFTYPE_NAME 64 struct cftype { - /* By convention, the name should begin with the name of the - * subsystem, followed by a period */ + /* + * By convention, the name should begin with the name of the + * subsystem, followed by a period + */ char name[MAX_CFTYPE_NAME]; int private; + /* + * If not 0, file mode is set to this value, otherwise it will + * be figured out automatically + */ + mode_t mode; /* * If non-zero, defines the maximum length of string that can @@ -319,15 +337,20 @@ struct cgroup_scanner { void (*process_task)(struct task_struct *p, struct cgroup_scanner *scan); struct ptr_heap *heap; + void *data; }; -/* Add a new file to the given cgroup directory. Should only be - * called by subsystems from within a populate() method */ +/* + * Add a new file to the given cgroup directory. Should only be + * called by subsystems from within a populate() method + */ int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, const struct cftype *cft); -/* Add a set of new files to the given cgroup directory. Should - * only be called by subsystems from within a populate() method */ +/* + * Add a set of new files to the given cgroup directory. Should + * only be called by subsystems from within a populate() method + */ int cgroup_add_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, const struct cftype cft[], @@ -339,15 +362,15 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); int cgroup_task_count(const struct cgroup *cgrp); -/* Return true if the cgroup is a descendant of the current cgroup */ -int cgroup_is_descendant(const struct cgroup *cgrp); +/* Return true if cgrp is a descendant of the task's cgroup */ +int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); /* Control Group subsystem type. See Documentation/cgroups.txt for details */ struct cgroup_subsys { struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss, struct cgroup *cgrp); - void (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); + int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, struct task_struct *tsk); @@ -364,6 +387,11 @@ struct cgroup_subsys { int active; int disabled; int early_init; + /* + * True if this subsys uses ID. ID is not available before cgroup_init() + * (not available in early_init time.) + */ + bool use_id; #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; @@ -386,6 +414,9 @@ struct cgroup_subsys { */ struct cgroupfs_root *root; struct list_head sibling; + /* used when use_id == true */ + struct idr idr; + spinlock_t id_lock; }; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; @@ -419,7 +450,8 @@ struct cgroup_iter { struct list_head *task; }; -/* To iterate across the tasks in a cgroup: +/* + * To iterate across the tasks in a cgroup: * * 1) call cgroup_iter_start to intialize an iterator * @@ -428,9 +460,10 @@ struct cgroup_iter { * * 3) call cgroup_iter_end() to destroy the iterator. * - * Or, call cgroup_scan_tasks() to iterate through every task in a cpuset. - * - cgroup_scan_tasks() holds the css_set_lock when calling the test_task() - * callback, but not while calling the process_task() callback. + * Or, call cgroup_scan_tasks() to iterate through every task in a + * cgroup - cgroup_scan_tasks() holds the css_set_lock when calling + * the test_task() callback, but not while calling the process_task() + * callback. */ void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it); struct task_struct *cgroup_iter_next(struct cgroup *cgrp, @@ -439,6 +472,44 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); +/* + * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works + * if cgroup_subsys.use_id == true. It can be used for looking up and scanning. + * CSS ID is assigned at cgroup allocation (create) automatically + * and removed when subsys calls free_css_id() function. This is because + * the lifetime of cgroup_subsys_state is subsys's matter. + * + * Looking up and scanning function should be called under rcu_read_lock(). + * Taking cgroup_mutex()/hierarchy_mutex() is not necessary for following calls. + * But the css returned by this routine can be "not populated yet" or "being + * destroyed". The caller should check css and cgroup's status. + */ + +/* + * Typically Called at ->destroy(), or somewhere the subsys frees + * cgroup_subsys_state. + */ +void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css); + +/* Find a cgroup_subsys_state which has given ID */ + +struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id); + +/* + * Get a cgroup whose id is greater than or equal to id under tree of root. + * Returning a cgroup_subsys_state or NULL. + */ +struct cgroup_subsys_state *css_get_next(struct cgroup_subsys *ss, int id, + struct cgroup_subsys_state *root, int *foundid); + +/* Returns true if root is ancestor of cg */ +bool css_is_ancestor(struct cgroup_subsys_state *cg, + const struct cgroup_subsys_state *root); + +/* Get id and depth of css */ +unsigned short css_id(struct cgroup_subsys_state *css); +unsigned short css_depth(struct cgroup_subsys_state *css); + #else /* !CONFIG_CGROUPS */ static inline int cgroup_init_early(void) { return 0; } diff --git a/include/linux/compat.h b/include/linux/compat.h index b880864672d..9723edd6455 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -191,6 +191,12 @@ asmlinkage ssize_t compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen); asmlinkage ssize_t compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen); +asmlinkage ssize_t compat_sys_preadv(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, u32 pos_high, u32 pos_low); +asmlinkage ssize_t compat_sys_pwritev(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, u32 pos_high, u32 pos_low); int compat_do_execve(char * filename, compat_uptr_t __user *argv, compat_uptr_t __user *envp, struct pt_regs * regs); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index c2747ac2ae4..2643d848df9 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -23,7 +23,6 @@ #include <linux/node.h> #include <linux/compiler.h> #include <linux/cpumask.h> -#include <linux/mutex.h> struct cpu { int node_id; /* The node which contains the CPU */ @@ -103,16 +102,6 @@ extern struct sysdev_class cpu_sysdev_class; #ifdef CONFIG_HOTPLUG_CPU /* Stop CPUs going up and down. */ -static inline void cpuhotplug_mutex_lock(struct mutex *cpu_hp_mutex) -{ - mutex_lock(cpu_hp_mutex); -} - -static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) -{ - mutex_unlock(cpu_hp_mutex); -} - extern void get_online_cpus(void); extern void put_online_cpus(void); #define hotcpu_notifier(fn, pri) { \ @@ -126,11 +115,6 @@ int cpu_down(unsigned int cpu); #else /* CONFIG_HOTPLUG_CPU */ -static inline void cpuhotplug_mutex_lock(struct mutex *cpu_hp_mutex) -{ } -static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) -{ } - #define get_online_cpus() do { } while (0) #define put_online_cpus() do { } while (0) #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 2e0d79678de..05ea1dd7d68 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -12,6 +12,7 @@ #include <linux/cpumask.h> #include <linux/nodemask.h> #include <linux/cgroup.h> +#include <linux/mm.h> #ifdef CONFIG_CPUSETS @@ -29,19 +30,29 @@ void cpuset_init_current_mems_allowed(void); void cpuset_update_task_memory_state(void); int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); -extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask); -extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask); +extern int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask); +extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask); -static int inline cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) +static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) { return number_of_cpusets <= 1 || - __cpuset_zone_allowed_softwall(z, gfp_mask); + __cpuset_node_allowed_softwall(node, gfp_mask); } -static int inline cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) +static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) { return number_of_cpusets <= 1 || - __cpuset_zone_allowed_hardwall(z, gfp_mask); + __cpuset_node_allowed_hardwall(node, gfp_mask); +} + +static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) +{ + return cpuset_node_allowed_softwall(zone_to_nid(z), gfp_mask); +} + +static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) +{ + return cpuset_node_allowed_hardwall(zone_to_nid(z), gfp_mask); } extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, @@ -112,6 +123,16 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) return 1; } +static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) +{ + return 1; +} + +static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) +{ + return 1; +} + static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) { return 1; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 8209e08969f..66ec05a5795 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -139,6 +139,9 @@ struct target_type { dm_ioctl_fn ioctl; dm_merge_fn merge; dm_busy_fn busy; + + /* For internal device-mapper use. */ + struct list_head list; }; struct io_restrictions { diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h index 600c5fb2daa..5e8b11d88f6 100644 --- a/include/linux/dm-dirty-log.h +++ b/include/linux/dm-dirty-log.h @@ -28,6 +28,9 @@ struct dm_dirty_log_type { const char *name; struct module *module; + /* For internal device-mapper use */ + struct list_head list; + int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, unsigned argc, char **argv); void (*dtr)(struct dm_dirty_log *log); @@ -113,6 +116,16 @@ struct dm_dirty_log_type { */ int (*status)(struct dm_dirty_log *log, status_type_t status_type, char *result, unsigned maxlen); + + /* + * is_remote_recovering is necessary for cluster mirroring. It provides + * a way to detect recovery on another node, so we aren't writing + * concurrently. This function is likely to block (when a cluster log + * is used). + * + * Returns: 0, 1 + */ + int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region); }; int dm_dirty_log_type_register(struct dm_dirty_log_type *type); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index af1dab41674..1a455f1f86d 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -11,6 +11,7 @@ #define DMA_PTE_READ (1) #define DMA_PTE_WRITE (2) +#define DMA_PTE_SNP (1 << 11) struct intel_iommu; struct dmar_domain; diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index a667637b54e..f45a8ae5f82 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -13,10 +13,20 @@ /* For O_CLOEXEC and O_NONBLOCK */ #include <linux/fcntl.h> -/* Flags for eventfd2. */ +/* + * CAREFUL: Check include/asm-generic/fcntl.h when defining + * new flags, since they might collide with O_* ones. We want + * to re-use O_* flags that couldn't possibly have a meaning + * from eventfd, in order to leave a free define-space for + * shared O_* flags. + */ +#define EFD_SEMAPHORE (1 << 0) #define EFD_CLOEXEC O_CLOEXEC #define EFD_NONBLOCK O_NONBLOCK +#define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) +#define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE) + struct file *eventfd_fget(int fd); int eventfd_signal(struct file *file, int n); diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index dd495b8c309..e263acaa405 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -893,9 +893,8 @@ extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); /* ioctl.c */ -extern int ext3_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); -extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long); +extern long ext3_ioctl(struct file *, unsigned int, unsigned long); +extern long ext3_compat_ioctl(struct file *, unsigned int, unsigned long); /* namei.c */ extern int ext3_orphan_add(handle_t *, struct inode *); diff --git a/include/linux/fb.h b/include/linux/fb.h index 31527e17076..f563c501393 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -123,6 +123,7 @@ struct dentry; #define FB_ACCEL_TRIDENT_3DIMAGE 51 /* Trident 3DImage */ #define FB_ACCEL_TRIDENT_BLADE3D 52 /* Trident Blade3D */ #define FB_ACCEL_TRIDENT_BLADEXP 53 /* Trident BladeXP */ +#define FB_ACCEL_CIRRUS_ALPINE 53 /* Cirrus Logic 543x/544x/5480 */ #define FB_ACCEL_NEOMAGIC_NM2070 90 /* NeoMagic NM2070 */ #define FB_ACCEL_NEOMAGIC_NM2090 91 /* NeoMagic NM2090 */ #define FB_ACCEL_NEOMAGIC_NM2093 92 /* NeoMagic NM2093 */ @@ -960,15 +961,7 @@ extern struct fb_info *registered_fb[FB_MAX]; extern int num_registered_fb; extern struct class *fb_class; -static inline int lock_fb_info(struct fb_info *info) -{ - mutex_lock(&info->lock); - if (!info->fbops) { - mutex_unlock(&info->lock); - return 0; - } - return 1; -} +extern int lock_fb_info(struct fb_info *info); static inline void unlock_fb_info(struct fb_info *info) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 87e7bfc5ebd..a09e17c8f5f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1741,6 +1741,8 @@ extern void drop_collected_mounts(struct vfsmount *); extern int vfs_statfs(struct dentry *, struct kstatfs *); +extern int current_umask(void); + /* /sys/fs */ extern struct kobject *fs_kobj; @@ -1878,12 +1880,25 @@ extern struct block_device *open_by_devnum(dev_t, fmode_t); extern void invalidate_bdev(struct block_device *); extern int sync_blockdev(struct block_device *bdev); extern struct super_block *freeze_bdev(struct block_device *); +extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); extern int fsync_super(struct super_block *); extern int fsync_no_super(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} +static inline int sync_blockdev(struct block_device *bdev) { return 0; } +static inline void invalidate_bdev(struct block_device *bdev) {} + +static inline struct super_block *freeze_bdev(struct block_device *sb) +{ + return NULL; +} + +static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + return 0; +} #endif extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 18b467dbe27..78a05bfcd8e 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -4,12 +4,10 @@ #include <linux/path.h> struct fs_struct { - atomic_t count; /* This usage count is used by check_unsafe_exec() for - * security checking purposes - therefore it may not be - * incremented, except by clone(CLONE_FS). - */ + int users; rwlock_t lock; int umask; + int in_exec; struct path root, pwd; }; @@ -19,6 +17,8 @@ extern void exit_fs(struct task_struct *); extern void set_fs_root(struct fs_struct *, struct path *); extern void set_fs_pwd(struct fs_struct *, struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); -extern void put_fs_struct(struct fs_struct *); +extern void free_fs_struct(struct fs_struct *); +extern void daemonize_fs_struct(void); +extern int unshare_fs_struct(void); #endif /* _LINUX_FS_STRUCT_H */ diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h new file mode 100644 index 00000000000..84d3532dd3e --- /dev/null +++ b/include/linux/fscache-cache.h @@ -0,0 +1,505 @@ +/* General filesystem caching backing cache interface + * + * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE!!! See: + * + * Documentation/filesystems/caching/backend-api.txt + * + * for a description of the cache backend interface declared here. + */ + +#ifndef _LINUX_FSCACHE_CACHE_H +#define _LINUX_FSCACHE_CACHE_H + +#include <linux/fscache.h> +#include <linux/sched.h> +#include <linux/slow-work.h> + +#define NR_MAXCACHES BITS_PER_LONG + +struct fscache_cache; +struct fscache_cache_ops; +struct fscache_object; +struct fscache_operation; + +/* + * cache tag definition + */ +struct fscache_cache_tag { + struct list_head link; + struct fscache_cache *cache; /* cache referred to by this tag */ + unsigned long flags; +#define FSCACHE_TAG_RESERVED 0 /* T if tag is reserved for a cache */ + atomic_t usage; + char name[0]; /* tag name */ +}; + +/* + * cache definition + */ +struct fscache_cache { + const struct fscache_cache_ops *ops; + struct fscache_cache_tag *tag; /* tag representing this cache */ + struct kobject *kobj; /* system representation of this cache */ + struct list_head link; /* link in list of caches */ + size_t max_index_size; /* maximum size of index data */ + char identifier[36]; /* cache label */ + + /* node management */ + struct work_struct op_gc; /* operation garbage collector */ + struct list_head object_list; /* list of data/index objects */ + struct list_head op_gc_list; /* list of ops to be deleted */ + spinlock_t object_list_lock; + spinlock_t op_gc_list_lock; + atomic_t object_count; /* no. of live objects in this cache */ + struct fscache_object *fsdef; /* object for the fsdef index */ + unsigned long flags; +#define FSCACHE_IOERROR 0 /* cache stopped on I/O error */ +#define FSCACHE_CACHE_WITHDRAWN 1 /* cache has been withdrawn */ +}; + +extern wait_queue_head_t fscache_cache_cleared_wq; + +/* + * operation to be applied to a cache object + * - retrieval initiation operations are done in the context of the process + * that issued them, and not in an async thread pool + */ +typedef void (*fscache_operation_release_t)(struct fscache_operation *op); +typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); + +struct fscache_operation { + union { + struct work_struct fast_work; /* record for fast ops */ + struct slow_work slow_work; /* record for (very) slow ops */ + }; + struct list_head pend_link; /* link in object->pending_ops */ + struct fscache_object *object; /* object to be operated upon */ + + unsigned long flags; +#define FSCACHE_OP_TYPE 0x000f /* operation type */ +#define FSCACHE_OP_FAST 0x0001 /* - fast op, processor may not sleep for disk */ +#define FSCACHE_OP_SLOW 0x0002 /* - (very) slow op, processor may sleep for disk */ +#define FSCACHE_OP_MYTHREAD 0x0003 /* - processing is done be issuing thread, not pool */ +#define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ +#define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ +#define FSCACHE_OP_DEAD 6 /* op is now dead */ + + atomic_t usage; + unsigned debug_id; /* debugging ID */ + + /* operation processor callback + * - can be NULL if FSCACHE_OP_WAITING is going to be used to perform + * the op in a non-pool thread */ + fscache_operation_processor_t processor; + + /* operation releaser */ + fscache_operation_release_t release; +}; + +extern atomic_t fscache_op_debug_id; +extern const struct slow_work_ops fscache_op_slow_work_ops; + +extern void fscache_enqueue_operation(struct fscache_operation *); +extern void fscache_put_operation(struct fscache_operation *); + +/** + * fscache_operation_init - Do basic initialisation of an operation + * @op: The operation to initialise + * @release: The release function to assign + * + * Do basic initialisation of an operation. The caller must still set flags, + * object, either fast_work or slow_work if necessary, and processor if needed. + */ +static inline void fscache_operation_init(struct fscache_operation *op, + fscache_operation_release_t release) +{ + atomic_set(&op->usage, 1); + op->debug_id = atomic_inc_return(&fscache_op_debug_id); + op->release = release; + INIT_LIST_HEAD(&op->pend_link); +} + +/** + * fscache_operation_init_slow - Do additional initialisation of a slow op + * @op: The operation to initialise + * @processor: The processor function to assign + * + * Do additional initialisation of an operation as required for slow work. + */ +static inline +void fscache_operation_init_slow(struct fscache_operation *op, + fscache_operation_processor_t processor) +{ + op->processor = processor; + slow_work_init(&op->slow_work, &fscache_op_slow_work_ops); +} + +/* + * data read operation + */ +struct fscache_retrieval { + struct fscache_operation op; + struct address_space *mapping; /* netfs pages */ + fscache_rw_complete_t end_io_func; /* function to call on I/O completion */ + void *context; /* netfs read context (pinned) */ + struct list_head to_do; /* list of things to be done by the backend */ + unsigned long start_time; /* time at which retrieval started */ +}; + +typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op, + struct page *page, + gfp_t gfp); + +typedef int (*fscache_pages_retrieval_func_t)(struct fscache_retrieval *op, + struct list_head *pages, + unsigned *nr_pages, + gfp_t gfp); + +/** + * fscache_get_retrieval - Get an extra reference on a retrieval operation + * @op: The retrieval operation to get a reference on + * + * Get an extra reference on a retrieval operation. + */ +static inline +struct fscache_retrieval *fscache_get_retrieval(struct fscache_retrieval *op) +{ + atomic_inc(&op->op.usage); + return op; +} + +/** + * fscache_enqueue_retrieval - Enqueue a retrieval operation for processing + * @op: The retrieval operation affected + * + * Enqueue a retrieval operation for processing by the FS-Cache thread pool. + */ +static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) +{ + fscache_enqueue_operation(&op->op); +} + +/** + * fscache_put_retrieval - Drop a reference to a retrieval operation + * @op: The retrieval operation affected + * + * Drop a reference to a retrieval operation. + */ +static inline void fscache_put_retrieval(struct fscache_retrieval *op) +{ + fscache_put_operation(&op->op); +} + +/* + * cached page storage work item + * - used to do three things: + * - batch writes to the cache + * - do cache writes asynchronously + * - defer writes until cache object lookup completion + */ +struct fscache_storage { + struct fscache_operation op; + pgoff_t store_limit; /* don't write more than this */ +}; + +/* + * cache operations + */ +struct fscache_cache_ops { + /* name of cache provider */ + const char *name; + + /* allocate an object record for a cookie */ + struct fscache_object *(*alloc_object)(struct fscache_cache *cache, + struct fscache_cookie *cookie); + + /* look up the object for a cookie */ + void (*lookup_object)(struct fscache_object *object); + + /* finished looking up */ + void (*lookup_complete)(struct fscache_object *object); + + /* increment the usage count on this object (may fail if unmounting) */ + struct fscache_object *(*grab_object)(struct fscache_object *object); + + /* pin an object in the cache */ + int (*pin_object)(struct fscache_object *object); + + /* unpin an object in the cache */ + void (*unpin_object)(struct fscache_object *object); + + /* store the updated auxilliary data on an object */ + void (*update_object)(struct fscache_object *object); + + /* discard the resources pinned by an object and effect retirement if + * necessary */ + void (*drop_object)(struct fscache_object *object); + + /* dispose of a reference to an object */ + void (*put_object)(struct fscache_object *object); + + /* sync a cache */ + void (*sync_cache)(struct fscache_cache *cache); + + /* notification that the attributes of a non-index object (such as + * i_size) have changed */ + int (*attr_changed)(struct fscache_object *object); + + /* reserve space for an object's data and associated metadata */ + int (*reserve_space)(struct fscache_object *object, loff_t i_size); + + /* request a backing block for a page be read or allocated in the + * cache */ + fscache_page_retrieval_func_t read_or_alloc_page; + + /* request backing blocks for a list of pages be read or allocated in + * the cache */ + fscache_pages_retrieval_func_t read_or_alloc_pages; + + /* request a backing block for a page be allocated in the cache so that + * it can be written directly */ + fscache_page_retrieval_func_t allocate_page; + + /* request backing blocks for pages be allocated in the cache so that + * they can be written directly */ + fscache_pages_retrieval_func_t allocate_pages; + + /* write a page to its backing block in the cache */ + int (*write_page)(struct fscache_storage *op, struct page *page); + + /* detach backing block from a page (optional) + * - must release the cookie lock before returning + * - may sleep + */ + void (*uncache_page)(struct fscache_object *object, + struct page *page); + + /* dissociate a cache from all the pages it was backing */ + void (*dissociate_pages)(struct fscache_cache *cache); +}; + +/* + * data file or index object cookie + * - a file will only appear in one cache + * - a request to cache a file may or may not be honoured, subject to + * constraints such as disk space + * - indices are created on disk just-in-time + */ +struct fscache_cookie { + atomic_t usage; /* number of users of this cookie */ + atomic_t n_children; /* number of children of this cookie */ + spinlock_t lock; + struct hlist_head backing_objects; /* object(s) backing this file/index */ + const struct fscache_cookie_def *def; /* definition */ + struct fscache_cookie *parent; /* parent of this entry */ + void *netfs_data; /* back pointer to netfs */ + struct radix_tree_root stores; /* pages to be stored on this cookie */ +#define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ + + unsigned long flags; +#define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */ +#define FSCACHE_COOKIE_CREATING 1 /* T if non-index object being created still */ +#define FSCACHE_COOKIE_NO_DATA_YET 2 /* T if new object with no cached data yet */ +#define FSCACHE_COOKIE_PENDING_FILL 3 /* T if pending initial fill on object */ +#define FSCACHE_COOKIE_FILLING 4 /* T if filling object incrementally */ +#define FSCACHE_COOKIE_UNAVAILABLE 5 /* T if cookie is unavailable (error, etc) */ +}; + +extern struct fscache_cookie fscache_fsdef_index; + +/* + * on-disk cache file or index handle + */ +struct fscache_object { + enum fscache_object_state { + FSCACHE_OBJECT_INIT, /* object in initial unbound state */ + FSCACHE_OBJECT_LOOKING_UP, /* looking up object */ + FSCACHE_OBJECT_CREATING, /* creating object */ + + /* active states */ + FSCACHE_OBJECT_AVAILABLE, /* cleaning up object after creation */ + FSCACHE_OBJECT_ACTIVE, /* object is usable */ + FSCACHE_OBJECT_UPDATING, /* object is updating */ + + /* terminal states */ + FSCACHE_OBJECT_DYING, /* object waiting for accessors to finish */ + FSCACHE_OBJECT_LC_DYING, /* object cleaning up after lookup/create */ + FSCACHE_OBJECT_ABORT_INIT, /* abort the init state */ + FSCACHE_OBJECT_RELEASING, /* releasing object */ + FSCACHE_OBJECT_RECYCLING, /* retiring object */ + FSCACHE_OBJECT_WITHDRAWING, /* withdrawing object */ + FSCACHE_OBJECT_DEAD, /* object is now dead */ + } state; + + int debug_id; /* debugging ID */ + int n_children; /* number of child objects */ + int n_ops; /* number of ops outstanding on object */ + int n_obj_ops; /* number of object ops outstanding on object */ + int n_in_progress; /* number of ops in progress */ + int n_exclusive; /* number of exclusive ops queued */ + spinlock_t lock; /* state and operations lock */ + + unsigned long lookup_jif; /* time at which lookup started */ + unsigned long event_mask; /* events this object is interested in */ + unsigned long events; /* events to be processed by this object + * (order is important - using fls) */ +#define FSCACHE_OBJECT_EV_REQUEUE 0 /* T if object should be requeued */ +#define FSCACHE_OBJECT_EV_UPDATE 1 /* T if object should be updated */ +#define FSCACHE_OBJECT_EV_CLEARED 2 /* T if accessors all gone */ +#define FSCACHE_OBJECT_EV_ERROR 3 /* T if fatal error occurred during processing */ +#define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ +#define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ +#define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ + + unsigned long flags; +#define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ +#define FSCACHE_OBJECT_PENDING_WRITE 1 /* T if object has pending write */ +#define FSCACHE_OBJECT_WAITING 2 /* T if object is waiting on its parent */ + + struct list_head cache_link; /* link in cache->object_list */ + struct hlist_node cookie_link; /* link in cookie->backing_objects */ + struct fscache_cache *cache; /* cache that supplied this object */ + struct fscache_cookie *cookie; /* netfs's file/index object */ + struct fscache_object *parent; /* parent object */ + struct slow_work work; /* attention scheduling record */ + struct list_head dependents; /* FIFO of dependent objects */ + struct list_head dep_link; /* link in parent's dependents list */ + struct list_head pending_ops; /* unstarted operations on this object */ + pgoff_t store_limit; /* current storage limit */ +}; + +extern const char *fscache_object_states[]; + +#define fscache_object_is_active(obj) \ + (!test_bit(FSCACHE_IOERROR, &(obj)->cache->flags) && \ + (obj)->state >= FSCACHE_OBJECT_AVAILABLE && \ + (obj)->state < FSCACHE_OBJECT_DYING) + +extern const struct slow_work_ops fscache_object_slow_work_ops; + +/** + * fscache_object_init - Initialise a cache object description + * @object: Object description + * + * Initialise a cache object description to its basic values. + * + * See Documentation/filesystems/caching/backend-api.txt for a complete + * description. + */ +static inline +void fscache_object_init(struct fscache_object *object, + struct fscache_cookie *cookie, + struct fscache_cache *cache) +{ + atomic_inc(&cache->object_count); + + object->state = FSCACHE_OBJECT_INIT; + spin_lock_init(&object->lock); + INIT_LIST_HEAD(&object->cache_link); + INIT_HLIST_NODE(&object->cookie_link); + vslow_work_init(&object->work, &fscache_object_slow_work_ops); + INIT_LIST_HEAD(&object->dependents); + INIT_LIST_HEAD(&object->dep_link); + INIT_LIST_HEAD(&object->pending_ops); + object->n_children = 0; + object->n_ops = object->n_in_progress = object->n_exclusive = 0; + object->events = object->event_mask = 0; + object->flags = 0; + object->store_limit = 0; + object->cache = cache; + object->cookie = cookie; + object->parent = NULL; +} + +extern void fscache_object_lookup_negative(struct fscache_object *object); +extern void fscache_obtained_object(struct fscache_object *object); + +/** + * fscache_object_destroyed - Note destruction of an object in a cache + * @cache: The cache from which the object came + * + * Note the destruction and deallocation of an object record in a cache. + */ +static inline void fscache_object_destroyed(struct fscache_cache *cache) +{ + if (atomic_dec_and_test(&cache->object_count)) + wake_up_all(&fscache_cache_cleared_wq); +} + +/** + * fscache_object_lookup_error - Note an object encountered an error + * @object: The object on which the error was encountered + * + * Note that an object encountered a fatal error (usually an I/O error) and + * that it should be withdrawn as soon as possible. + */ +static inline void fscache_object_lookup_error(struct fscache_object *object) +{ + set_bit(FSCACHE_OBJECT_EV_ERROR, &object->events); +} + +/** + * fscache_set_store_limit - Set the maximum size to be stored in an object + * @object: The object to set the maximum on + * @i_size: The limit to set in bytes + * + * Set the maximum size an object is permitted to reach, implying the highest + * byte that may be written. Intended to be called by the attr_changed() op. + * + * See Documentation/filesystems/caching/backend-api.txt for a complete + * description. + */ +static inline +void fscache_set_store_limit(struct fscache_object *object, loff_t i_size) +{ + object->store_limit = i_size >> PAGE_SHIFT; + if (i_size & ~PAGE_MASK) + object->store_limit++; +} + +/** + * fscache_end_io - End a retrieval operation on a page + * @op: The FS-Cache operation covering the retrieval + * @page: The page that was to be fetched + * @error: The error code (0 if successful) + * + * Note the end of an operation to retrieve a page, as covered by a particular + * operation record. + */ +static inline void fscache_end_io(struct fscache_retrieval *op, + struct page *page, int error) +{ + op->end_io_func(page, op->context, error); +} + +/* + * out-of-line cache backend functions + */ +extern void fscache_init_cache(struct fscache_cache *cache, + const struct fscache_cache_ops *ops, + const char *idfmt, + ...) __attribute__ ((format (printf, 3, 4))); + +extern int fscache_add_cache(struct fscache_cache *cache, + struct fscache_object *fsdef, + const char *tagname); +extern void fscache_withdraw_cache(struct fscache_cache *cache); + +extern void fscache_io_error(struct fscache_cache *cache); + +extern void fscache_mark_pages_cached(struct fscache_retrieval *op, + struct pagevec *pagevec); + +extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object, + const void *data, + uint16_t datalen); + +#endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h new file mode 100644 index 00000000000..6d8ee466e0a --- /dev/null +++ b/include/linux/fscache.h @@ -0,0 +1,618 @@ +/* General filesystem caching interface + * + * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE!!! See: + * + * Documentation/filesystems/caching/netfs-api.txt + * + * for a description of the network filesystem interface declared here. + */ + +#ifndef _LINUX_FSCACHE_H +#define _LINUX_FSCACHE_H + +#include <linux/fs.h> +#include <linux/list.h> +#include <linux/pagemap.h> +#include <linux/pagevec.h> + +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +#define fscache_available() (1) +#define fscache_cookie_valid(cookie) (cookie) +#else +#define fscache_available() (0) +#define fscache_cookie_valid(cookie) (0) +#endif + + +/* + * overload PG_private_2 to give us PG_fscache - this is used to indicate that + * a page is currently backed by a local disk cache + */ +#define PageFsCache(page) PagePrivate2((page)) +#define SetPageFsCache(page) SetPagePrivate2((page)) +#define ClearPageFsCache(page) ClearPagePrivate2((page)) +#define TestSetPageFsCache(page) TestSetPagePrivate2((page)) +#define TestClearPageFsCache(page) TestClearPagePrivate2((page)) + +/* pattern used to fill dead space in an index entry */ +#define FSCACHE_INDEX_DEADFILL_PATTERN 0x79 + +struct pagevec; +struct fscache_cache_tag; +struct fscache_cookie; +struct fscache_netfs; + +typedef void (*fscache_rw_complete_t)(struct page *page, + void *context, + int error); + +/* result of index entry consultation */ +enum fscache_checkaux { + FSCACHE_CHECKAUX_OKAY, /* entry okay as is */ + FSCACHE_CHECKAUX_NEEDS_UPDATE, /* entry requires update */ + FSCACHE_CHECKAUX_OBSOLETE, /* entry requires deletion */ +}; + +/* + * fscache cookie definition + */ +struct fscache_cookie_def { + /* name of cookie type */ + char name[16]; + + /* cookie type */ + uint8_t type; +#define FSCACHE_COOKIE_TYPE_INDEX 0 +#define FSCACHE_COOKIE_TYPE_DATAFILE 1 + + /* select the cache into which to insert an entry in this index + * - optional + * - should return a cache identifier or NULL to cause the cache to be + * inherited from the parent if possible or the first cache picked + * for a non-index file if not + */ + struct fscache_cache_tag *(*select_cache)( + const void *parent_netfs_data, + const void *cookie_netfs_data); + + /* get an index key + * - should store the key data in the buffer + * - should return the amount of amount stored + * - not permitted to return an error + * - the netfs data from the cookie being used as the source is + * presented + */ + uint16_t (*get_key)(const void *cookie_netfs_data, + void *buffer, + uint16_t bufmax); + + /* get certain file attributes from the netfs data + * - this function can be absent for an index + * - not permitted to return an error + * - the netfs data from the cookie being used as the source is + * presented + */ + void (*get_attr)(const void *cookie_netfs_data, uint64_t *size); + + /* get the auxilliary data from netfs data + * - this function can be absent if the index carries no state data + * - should store the auxilliary data in the buffer + * - should return the amount of amount stored + * - not permitted to return an error + * - the netfs data from the cookie being used as the source is + * presented + */ + uint16_t (*get_aux)(const void *cookie_netfs_data, + void *buffer, + uint16_t bufmax); + + /* consult the netfs about the state of an object + * - this function can be absent if the index carries no state data + * - the netfs data from the cookie being used as the target is + * presented, as is the auxilliary data + */ + enum fscache_checkaux (*check_aux)(void *cookie_netfs_data, + const void *data, + uint16_t datalen); + + /* get an extra reference on a read context + * - this function can be absent if the completion function doesn't + * require a context + */ + void (*get_context)(void *cookie_netfs_data, void *context); + + /* release an extra reference on a read context + * - this function can be absent if the completion function doesn't + * require a context + */ + void (*put_context)(void *cookie_netfs_data, void *context); + + /* indicate pages that now have cache metadata retained + * - this function should mark the specified pages as now being cached + * - the pages will have been marked with PG_fscache before this is + * called, so this is optional + */ + void (*mark_pages_cached)(void *cookie_netfs_data, + struct address_space *mapping, + struct pagevec *cached_pvec); + + /* indicate the cookie is no longer cached + * - this function is called when the backing store currently caching + * a cookie is removed + * - the netfs should use this to clean up any markers indicating + * cached pages + * - this is mandatory for any object that may have data + */ + void (*now_uncached)(void *cookie_netfs_data); +}; + +/* + * fscache cached network filesystem type + * - name, version and ops must be filled in before registration + * - all other fields will be set during registration + */ +struct fscache_netfs { + uint32_t version; /* indexing version */ + const char *name; /* filesystem name */ + struct fscache_cookie *primary_index; + struct list_head link; /* internal link */ +}; + +/* + * slow-path functions for when there is actually caching available, and the + * netfs does actually have a valid token + * - these are not to be called directly + * - these are undefined symbols when FS-Cache is not configured and the + * optimiser takes care of not using them + */ +extern int __fscache_register_netfs(struct fscache_netfs *); +extern void __fscache_unregister_netfs(struct fscache_netfs *); +extern struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *); +extern void __fscache_release_cache_tag(struct fscache_cache_tag *); + +extern struct fscache_cookie *__fscache_acquire_cookie( + struct fscache_cookie *, + const struct fscache_cookie_def *, + void *); +extern void __fscache_relinquish_cookie(struct fscache_cookie *, int); +extern void __fscache_update_cookie(struct fscache_cookie *); +extern int __fscache_attr_changed(struct fscache_cookie *); +extern int __fscache_read_or_alloc_page(struct fscache_cookie *, + struct page *, + fscache_rw_complete_t, + void *, + gfp_t); +extern int __fscache_read_or_alloc_pages(struct fscache_cookie *, + struct address_space *, + struct list_head *, + unsigned *, + fscache_rw_complete_t, + void *, + gfp_t); +extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t); +extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t); +extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); +extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); +extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); + +/** + * fscache_register_netfs - Register a filesystem as desiring caching services + * @netfs: The description of the filesystem + * + * Register a filesystem as desiring caching services if they're available. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_register_netfs(struct fscache_netfs *netfs) +{ + if (fscache_available()) + return __fscache_register_netfs(netfs); + else + return 0; +} + +/** + * fscache_unregister_netfs - Indicate that a filesystem no longer desires + * caching services + * @netfs: The description of the filesystem + * + * Indicate that a filesystem no longer desires caching services for the + * moment. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_unregister_netfs(struct fscache_netfs *netfs) +{ + if (fscache_available()) + __fscache_unregister_netfs(netfs); +} + +/** + * fscache_lookup_cache_tag - Look up a cache tag + * @name: The name of the tag to search for + * + * Acquire a specific cache referral tag that can be used to select a specific + * cache in which to cache an index. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name) +{ + if (fscache_available()) + return __fscache_lookup_cache_tag(name); + else + return NULL; +} + +/** + * fscache_release_cache_tag - Release a cache tag + * @tag: The tag to release + * + * Release a reference to a cache referral tag previously looked up. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_release_cache_tag(struct fscache_cache_tag *tag) +{ + if (fscache_available()) + __fscache_release_cache_tag(tag); +} + +/** + * fscache_acquire_cookie - Acquire a cookie to represent a cache object + * @parent: The cookie that's to be the parent of this one + * @def: A description of the cache object, including callback operations + * @netfs_data: An arbitrary piece of data to be kept in the cookie to + * represent the cache object to the netfs + * + * This function is used to inform FS-Cache about part of an index hierarchy + * that can be used to locate files. This is done by requesting a cookie for + * each index in the path to the file. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +struct fscache_cookie *fscache_acquire_cookie( + struct fscache_cookie *parent, + const struct fscache_cookie_def *def, + void *netfs_data) +{ + if (fscache_cookie_valid(parent)) + return __fscache_acquire_cookie(parent, def, netfs_data); + else + return NULL; +} + +/** + * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding + * it + * @cookie: The cookie being returned + * @retire: True if the cache object the cookie represents is to be discarded + * + * This function returns a cookie to the cache, forcibly discarding the + * associated cache object if retire is set to true. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) +{ + if (fscache_cookie_valid(cookie)) + __fscache_relinquish_cookie(cookie, retire); +} + +/** + * fscache_update_cookie - Request that a cache object be updated + * @cookie: The cookie representing the cache object + * + * Request an update of the index data for the cache object associated with the + * cookie. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_update_cookie(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + __fscache_update_cookie(cookie); +} + +/** + * fscache_pin_cookie - Pin a data-storage cache object in its cache + * @cookie: The cookie representing the cache object + * + * Permit data-storage cache objects to be pinned in the cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_pin_cookie(struct fscache_cookie *cookie) +{ + return -ENOBUFS; +} + +/** + * fscache_pin_cookie - Unpin a data-storage cache object in its cache + * @cookie: The cookie representing the cache object + * + * Permit data-storage cache objects to be unpinned from the cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_unpin_cookie(struct fscache_cookie *cookie) +{ +} + +/** + * fscache_attr_changed - Notify cache that an object's attributes changed + * @cookie: The cookie representing the cache object + * + * Send a notification to the cache indicating that an object's attributes have + * changed. This includes the data size. These attributes will be obtained + * through the get_attr() cookie definition op. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_attr_changed(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_attr_changed(cookie); + else + return -ENOBUFS; +} + +/** + * fscache_reserve_space - Reserve data space for a cached object + * @cookie: The cookie representing the cache object + * @i_size: The amount of space to be reserved + * + * Reserve an amount of space in the cache for the cache object attached to a + * cookie so that a write to that object within the space can always be + * honoured. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) +{ + return -ENOBUFS; +} + +/** + * fscache_read_or_alloc_page - Read a page from the cache or allocate a block + * in which to store it + * @cookie: The cookie representing the cache object + * @page: The netfs page to fill if possible + * @end_io_func: The callback to invoke when and if the page is filled + * @context: An arbitrary piece of data to pass on to end_io_func() + * @gfp: The conditions under which memory allocation should be made + * + * Read a page from the cache, or if that's not possible make a potential + * one-block reservation in the cache into which the page may be stored once + * fetched from the server. + * + * If the page is not backed by the cache object, or if it there's some reason + * it can't be, -ENOBUFS will be returned and nothing more will be done for + * that page. + * + * Else, if that page is backed by the cache, a read will be initiated directly + * to the netfs's page and 0 will be returned by this function. The + * end_io_func() callback will be invoked when the operation terminates on a + * completion or failure. Note that the callback may be invoked before the + * return. + * + * Else, if the page is unbacked, -ENODATA is returned and a block may have + * been allocated in the cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_read_or_alloc_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_read_or_alloc_page(cookie, page, end_io_func, + context, gfp); + else + return -ENOBUFS; +} + +/** + * fscache_read_or_alloc_pages - Read pages from the cache and/or allocate + * blocks in which to store them + * @cookie: The cookie representing the cache object + * @mapping: The netfs inode mapping to which the pages will be attached + * @pages: A list of potential netfs pages to be filled + * @end_io_func: The callback to invoke when and if each page is filled + * @context: An arbitrary piece of data to pass on to end_io_func() + * @gfp: The conditions under which memory allocation should be made + * + * Read a set of pages from the cache, or if that's not possible, attempt to + * make a potential one-block reservation for each page in the cache into which + * that page may be stored once fetched from the server. + * + * If some pages are not backed by the cache object, or if it there's some + * reason they can't be, -ENOBUFS will be returned and nothing more will be + * done for that pages. + * + * Else, if some of the pages are backed by the cache, a read will be initiated + * directly to the netfs's page and 0 will be returned by this function. The + * end_io_func() callback will be invoked when the operation terminates on a + * completion or failure. Note that the callback may be invoked before the + * return. + * + * Else, if a page is unbacked, -ENODATA is returned and a block may have + * been allocated in the cache. + * + * Because the function may want to return all of -ENOBUFS, -ENODATA and 0 in + * regard to different pages, the return values are prioritised in that order. + * Any pages submitted for reading are removed from the pages list. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_read_or_alloc_pages(cookie, mapping, pages, + nr_pages, end_io_func, + context, gfp); + else + return -ENOBUFS; +} + +/** + * fscache_alloc_page - Allocate a block in which to store a page + * @cookie: The cookie representing the cache object + * @page: The netfs page to allocate a page for + * @gfp: The conditions under which memory allocation should be made + * + * Request Allocation a block in the cache in which to store a netfs page + * without retrieving any contents from the cache. + * + * If the page is not backed by a file then -ENOBUFS will be returned and + * nothing more will be done, and no reservation will be made. + * + * Else, a block will be allocated if one wasn't already, and 0 will be + * returned + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_alloc_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_alloc_page(cookie, page, gfp); + else + return -ENOBUFS; +} + +/** + * fscache_write_page - Request storage of a page in the cache + * @cookie: The cookie representing the cache object + * @page: The netfs page to store + * @gfp: The conditions under which memory allocation should be made + * + * Request the contents of the netfs page be written into the cache. This + * request may be ignored if no cache block is currently allocated, in which + * case it will return -ENOBUFS. + * + * If a cache block was already allocated, a write will be initiated and 0 will + * be returned. The PG_fscache_write page bit is set immediately and will then + * be cleared at the completion of the write to indicate the success or failure + * of the operation. Note that the completion may happen before the return. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_write_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_write_page(cookie, page, gfp); + else + return -ENOBUFS; +} + +/** + * fscache_uncache_page - Indicate that caching is no longer required on a page + * @cookie: The cookie representing the cache object + * @page: The netfs page that was being cached. + * + * Tell the cache that we no longer want a page to be cached and that it should + * remove any knowledge of the netfs page it may have. + * + * Note that this cannot cancel any outstanding I/O operations between this + * page and the cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_uncache_page(struct fscache_cookie *cookie, + struct page *page) +{ + if (fscache_cookie_valid(cookie)) + __fscache_uncache_page(cookie, page); +} + +/** + * fscache_check_page_write - Ask if a page is being writing to the cache + * @cookie: The cookie representing the cache object + * @page: The netfs page that is being cached. + * + * Ask the cache if a page is being written to the cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +bool fscache_check_page_write(struct fscache_cookie *cookie, + struct page *page) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_check_page_write(cookie, page); + return false; +} + +/** + * fscache_wait_on_page_write - Wait for a page to complete writing to the cache + * @cookie: The cookie representing the cache object + * @page: The netfs page that is being cached. + * + * Ask the cache to wake us up when a page is no longer being written to the + * cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_wait_on_page_write(struct fscache_cookie *cookie, + struct page *page) +{ + if (fscache_cookie_valid(cookie)) + __fscache_wait_on_page_write(cookie, page); +} + +#endif /* _LINUX_FSCACHE_H */ diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index d9051d717d2..7ef1caf5026 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -95,14 +95,15 @@ struct fsl_usb2_platform_data { #define FSL_USB2_PORT0_ENABLED 0x00000001 #define FSL_USB2_PORT1_ENABLED 0x00000002 +struct spi_device; + struct fsl_spi_platform_data { u32 initial_spmode; /* initial SPMODE value */ - u16 bus_num; + s16 bus_num; bool qe_mode; /* board specific information */ u16 max_chipselect; - void (*activate_cs)(u8 cs, u8 polarity); - void (*deactivate_cs)(u8 cs, u8 polarity); + void (*cs_control)(struct spi_device *spi, bool on); u32 sysclk; }; diff --git a/include/linux/hdreg.h b/include/linux/hdreg.h index ed21bd3dbd2..29ee2873f4a 100644 --- a/include/linux/hdreg.h +++ b/include/linux/hdreg.h @@ -1,68 +1,6 @@ #ifndef _LINUX_HDREG_H #define _LINUX_HDREG_H -#ifdef __KERNEL__ -#include <linux/ata.h> - -/* - * This file contains some defines for the AT-hd-controller. - * Various sources. - */ - -/* ide.c has its own port definitions in "ide.h" */ - -#define HD_IRQ 14 - -/* Hd controller regs. Ref: IBM AT Bios-listing */ -#define HD_DATA 0x1f0 /* _CTL when writing */ -#define HD_ERROR 0x1f1 /* see err-bits */ -#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */ -#define HD_SECTOR 0x1f3 /* starting sector */ -#define HD_LCYL 0x1f4 /* starting cylinder */ -#define HD_HCYL 0x1f5 /* high byte of starting cyl */ -#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */ -#define HD_STATUS 0x1f7 /* see status-bits */ -#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */ -#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */ -#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */ - -#define HD_CMD 0x3f6 /* used for resets */ -#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */ - -/* remainder is shared between hd.c, ide.c, ide-cd.c, and the hdparm utility */ - -/* Bits of HD_STATUS */ -#define ERR_STAT 0x01 -#define INDEX_STAT 0x02 -#define ECC_STAT 0x04 /* Corrected error */ -#define DRQ_STAT 0x08 -#define SEEK_STAT 0x10 -#define SRV_STAT 0x10 -#define WRERR_STAT 0x20 -#define READY_STAT 0x40 -#define BUSY_STAT 0x80 - -/* Bits for HD_ERROR */ -#define MARK_ERR 0x01 /* Bad address mark */ -#define ILI_ERR 0x01 /* Illegal Length Indication (ATAPI) */ -#define TRK0_ERR 0x02 /* couldn't find track 0 */ -#define EOM_ERR 0x02 /* End Of Media (ATAPI) */ -#define ABRT_ERR 0x04 /* Command aborted */ -#define MCR_ERR 0x08 /* media change request */ -#define ID_ERR 0x10 /* ID field not found */ -#define MC_ERR 0x20 /* media changed */ -#define ECC_ERR 0x40 /* Uncorrectable ECC error */ -#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ -#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ -#define LFS_ERR 0xf0 /* Last Failed Sense (ATAPI) */ - -/* Bits of HD_NSECTOR */ -#define CD 0x01 -#define IO 0x02 -#define REL 0x04 -#define TAG_MASK 0xf8 -#endif /* __KERNEL__ */ - #include <linux/types.h> /* @@ -191,6 +129,7 @@ typedef struct hd_drive_hob_hdr { #define TASKFILE_INVALID 0x7fff #endif +#ifndef __KERNEL__ /* ATA/ATAPI Commands pre T13 Spec */ #define WIN_NOP 0x00 /* @@ -379,6 +318,7 @@ typedef struct hd_drive_hob_hdr { #define SECURITY_ERASE_UNIT 0xBD #define SECURITY_FREEZE_LOCK 0xBE #define SECURITY_DISABLE_PASSWORD 0xBF +#endif /* __KERNEL__ */ struct hd_geometry { unsigned char heads; @@ -448,6 +388,7 @@ enum { #define __NEW_HD_DRIVE_ID +#ifndef __KERNEL__ /* * Structure returned by HDIO_GET_IDENTITY, as per ANSI NCITS ATA6 rev.1b spec. * @@ -699,6 +640,7 @@ struct hd_driveid { * 7:0 Signature */ }; +#endif /* __KERNEL__ */ /* * IDE "nice" flags. These are used on a per drive basis to determine diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 13875ce9112..1fcb7126a01 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -19,8 +19,21 @@ static inline void flush_kernel_dcache_page(struct page *page) } #endif -#ifdef CONFIG_HIGHMEM +#include <asm/kmap_types.h> + +#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT) + +void debug_kmap_atomic(enum km_type type); +#else + +static inline void debug_kmap_atomic(enum km_type type) +{ +} + +#endif + +#ifdef CONFIG_HIGHMEM #include <asm/highmem.h> /* declarations for linux/mm/highmem.c */ @@ -44,8 +57,6 @@ static inline void *kmap(struct page *page) #define kunmap(page) do { (void) (page); } while (0) -#include <asm/kmap_types.h> - static inline void *kmap_atomic(struct page *page, enum km_type idx) { pagefault_disable(); diff --git a/include/linux/i2c/at24.h b/include/linux/i2c/at24.h index f6edd522a92..8ace93024d6 100644 --- a/include/linux/i2c/at24.h +++ b/include/linux/i2c/at24.h @@ -2,6 +2,7 @@ #define _LINUX_AT24_H #include <linux/types.h> +#include <linux/memory.h> /* * As seen through Linux I2C, differences between the most common types of I2C @@ -23,6 +24,9 @@ struct at24_platform_data { #define AT24_FLAG_READONLY 0x40 /* sysfs-entry will be read-only */ #define AT24_FLAG_IRUGO 0x20 /* sysfs-entry will be world-readable */ #define AT24_FLAG_TAKE8ADDR 0x10 /* take always 8 addresses (24c00) */ + + void (*setup)(struct memory_accessor *, void *context); + void *context; }; #endif /* _LINUX_AT24_H */ diff --git a/include/linux/ide.h b/include/linux/ide.h index d5d832271f4..a5d26f66ef7 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -265,7 +265,7 @@ enum { IDE_TFLAG_WRITE = (1 << 12), IDE_TFLAG_CUSTOM_HANDLER = (1 << 13), IDE_TFLAG_DMA_PIO_FALLBACK = (1 << 14), - IDE_TFLAG_IN_HOB_FEATURE = (1 << 15), + IDE_TFLAG_IN_HOB_ERROR = (1 << 15), IDE_TFLAG_IN_HOB_NSECT = (1 << 16), IDE_TFLAG_IN_HOB_LBAL = (1 << 17), IDE_TFLAG_IN_HOB_LBAM = (1 << 18), @@ -273,10 +273,10 @@ enum { IDE_TFLAG_IN_HOB_LBA = IDE_TFLAG_IN_HOB_LBAL | IDE_TFLAG_IN_HOB_LBAM | IDE_TFLAG_IN_HOB_LBAH, - IDE_TFLAG_IN_HOB = IDE_TFLAG_IN_HOB_FEATURE | + IDE_TFLAG_IN_HOB = IDE_TFLAG_IN_HOB_ERROR | IDE_TFLAG_IN_HOB_NSECT | IDE_TFLAG_IN_HOB_LBA, - IDE_TFLAG_IN_FEATURE = (1 << 20), + IDE_TFLAG_IN_ERROR = (1 << 20), IDE_TFLAG_IN_NSECT = (1 << 21), IDE_TFLAG_IN_LBAL = (1 << 22), IDE_TFLAG_IN_LBAM = (1 << 23), @@ -310,8 +310,12 @@ enum { struct ide_taskfile { u8 hob_data; /* 0: high data byte (for TASKFILE IOCTL) */ + /* 1-5: additional data to support LBA48 */ + union { + u8 hob_error; /* read: error */ + u8 hob_feature; /* write: feature */ + }; - u8 hob_feature; /* 1-5: additional data to support LBA48 */ u8 hob_nsect; u8 hob_lbal; u8 hob_lbam; @@ -352,6 +356,8 @@ struct ide_cmd { unsigned int nbytes; unsigned int nleft; + unsigned int last_xfer_len; + struct scatterlist *cursg; unsigned int cursg_ofs; @@ -375,7 +381,7 @@ enum { * With each packet command, we allocate a buffer of IDE_PC_BUFFER_SIZE bytes. * This is used for several packet commands (not for READ/WRITE commands). */ -#define IDE_PC_BUFFER_SIZE 256 +#define IDE_PC_BUFFER_SIZE 64 #define ATAPI_WAIT_PC (60 * HZ) struct ide_atapi_pc { @@ -413,9 +419,6 @@ struct ide_atapi_pc { struct idetape_bh *bh; char *b_data; - struct scatterlist *sg; - unsigned int sg_cnt; - unsigned long timeout; }; @@ -456,11 +459,6 @@ enum { IDE_AFLAG_TOCADDR_AS_BCD = (1 << 3), /* TOC track numbers are in BCD. */ IDE_AFLAG_TOCTRACKS_AS_BCD = (1 << 4), - /* - * Drive does not provide data in multiples of SECTOR_SIZE - * when more than one interrupt is needed. - */ - IDE_AFLAG_LIMIT_NFRAMES = (1 << 5), /* Saved TOC information is current. */ IDE_AFLAG_TOC_VALID = (1 << 6), /* We think that the drive door is locked. */ @@ -605,7 +603,7 @@ struct ide_drive_s { unsigned int bios_cyl; /* BIOS/fdisk/LILO number of cyls */ unsigned int cyl; /* "real" number of cyls */ - unsigned int drive_data; /* used by set_pio_mode/selectproc */ + unsigned int drive_data; /* used by set_pio_mode/dev_select() */ unsigned int failures; /* current failure count */ unsigned int max_failures; /* maximum allowed failure count */ u64 probed_capacity;/* initial reported media capacity (ide-cd only currently) */ @@ -661,9 +659,9 @@ struct ide_tp_ops { void (*exec_command)(struct hwif_s *, u8); u8 (*read_status)(struct hwif_s *); u8 (*read_altstatus)(struct hwif_s *); + void (*write_devctl)(struct hwif_s *, u8); - void (*set_irq)(struct hwif_s *, int); - + void (*dev_select)(ide_drive_t *); void (*tf_load)(ide_drive_t *, struct ide_cmd *); void (*tf_read)(ide_drive_t *, struct ide_cmd *); @@ -681,7 +679,6 @@ extern const struct ide_tp_ops default_tp_ops; * @init_dev: host specific initialization of a device * @set_pio_mode: routine to program host for PIO mode * @set_dma_mode: routine to program host for DMA mode - * @selectproc: tweaks hardware to select drive * @reset_poll: chipset polling based on hba specifics * @pre_reset: chipset specific changes to default for device-hba resets * @resetproc: routine to reset controller after a disk reset @@ -698,7 +695,6 @@ struct ide_port_ops { void (*init_dev)(ide_drive_t *); void (*set_pio_mode)(ide_drive_t *, const u8); void (*set_dma_mode)(ide_drive_t *, const u8); - void (*selectproc)(ide_drive_t *); int (*reset_poll)(ide_drive_t *); void (*pre_reset)(ide_drive_t *); void (*resetproc)(ide_drive_t *); @@ -719,8 +715,10 @@ struct ide_dma_ops { int (*dma_end)(struct ide_drive_s *); int (*dma_test_irq)(struct ide_drive_s *); void (*dma_lost_irq)(struct ide_drive_s *); + /* below ones are optional */ + int (*dma_check)(struct ide_drive_s *, struct ide_cmd *); int (*dma_timer_expiry)(struct ide_drive_s *); - void (*dma_timeout)(struct ide_drive_s *); + void (*dma_clear)(struct ide_drive_s *); /* * The following method is optional and only required to be * implemented for the SFF-8038i compatible controllers. @@ -1169,18 +1167,15 @@ void ide_tf_dump(const char *, struct ide_taskfile *); void ide_exec_command(ide_hwif_t *, u8); u8 ide_read_status(ide_hwif_t *); u8 ide_read_altstatus(ide_hwif_t *); +void ide_write_devctl(ide_hwif_t *, u8); -void ide_set_irq(ide_hwif_t *, int); - +void ide_dev_select(ide_drive_t *); void ide_tf_load(ide_drive_t *, struct ide_cmd *); void ide_tf_read(ide_drive_t *, struct ide_cmd *); void ide_input_data(ide_drive_t *, struct ide_cmd *, void *, unsigned int); void ide_output_data(ide_drive_t *, struct ide_cmd *, void *, unsigned int); -int ide_io_buffers(ide_drive_t *, struct ide_atapi_pc *, unsigned int, int); - -extern void SELECT_DRIVE(ide_drive_t *); void SELECT_MASK(ide_drive_t *, int); u8 ide_read_error(ide_drive_t *); @@ -1226,6 +1221,8 @@ ide_startstop_t ide_issue_pc(ide_drive_t *, struct ide_cmd *); ide_startstop_t do_rw_taskfile(ide_drive_t *, struct ide_cmd *); +void ide_pio_bytes(ide_drive_t *, struct ide_cmd *, unsigned int, unsigned int); + void ide_finish_cmd(ide_drive_t *, struct ide_cmd *, u8); int ide_raw_taskfile(ide_drive_t *, struct ide_cmd *, u8 *, u16); @@ -1443,8 +1440,8 @@ ide_startstop_t ide_dma_intr(ide_drive_t *); int ide_allocate_dma_engine(ide_hwif_t *); void ide_release_dma_engine(ide_hwif_t *); -int ide_build_sglist(ide_drive_t *, struct ide_cmd *); -void ide_destroy_dmatable(ide_drive_t *); +int ide_dma_prepare(ide_drive_t *, struct ide_cmd *); +void ide_dma_unmap_sg(ide_drive_t *, struct ide_cmd *); #ifdef CONFIG_BLK_DEV_IDEDMA_SFF int config_drive_for_dma(ide_drive_t *); @@ -1462,7 +1459,6 @@ static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; } #endif /* CONFIG_BLK_DEV_IDEDMA_SFF */ void ide_dma_lost_irq(ide_drive_t *); -void ide_dma_timeout(ide_drive_t *); ide_startstop_t ide_dma_timeout_retry(ide_drive_t *, int); #else @@ -1478,8 +1474,10 @@ static inline void ide_check_dma_crc(ide_drive_t *drive) { ; } static inline ide_startstop_t ide_dma_intr(ide_drive_t *drive) { return ide_stopped; } static inline ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) { return ide_stopped; } static inline void ide_release_dma_engine(ide_hwif_t *hwif) { ; } -static inline int ide_build_sglist(ide_drive_t *drive, - struct ide_cmd *cmd) { return 0; } +static inline int ide_dma_prepare(ide_drive_t *drive, + struct ide_cmd *cmd) { return 1; } +static inline void ide_dma_unmap_sg(ide_drive_t *drive, + struct ide_cmd *cmd) { ; } #endif /* CONFIG_BLK_DEV_IDEDMA */ #ifdef CONFIG_BLK_DEV_IDEACPI diff --git a/include/linux/idr.h b/include/linux/idr.h index dd846df8cd3..e968db71e33 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -106,6 +106,7 @@ int idr_get_new(struct idr *idp, void *ptr, int *id); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data); +void *idr_get_next(struct idr *idp, int *nextid); void *idr_replace(struct idr *idp, void *ptr, int id); void idr_remove(struct idr *idp, int id); void idr_remove_all(struct idr *idp); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 1d6c71d96ed..77214ead1a3 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -123,7 +123,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) - +#define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */ /* IOTLB_REG */ #define DMA_TLB_FLUSH_GRANU_OFFSET 60 diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 8a7bfb1b6ca..3af4ffd591b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -21,6 +21,7 @@ #define IOMMU_READ (1) #define IOMMU_WRITE (2) +#define IOMMU_CACHE (4) /* DMA cache coherency */ struct device; @@ -28,6 +29,8 @@ struct iommu_domain { void *priv; }; +#define IOMMU_CAP_CACHE_COHERENCY 0x1 + struct iommu_ops { int (*domain_init)(struct iommu_domain *domain); void (*domain_destroy)(struct iommu_domain *domain); @@ -39,6 +42,8 @@ struct iommu_ops { size_t size); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, unsigned long iova); + int (*domain_has_cap)(struct iommu_domain *domain, + unsigned long cap); }; #ifdef CONFIG_IOMMU_API @@ -57,6 +62,8 @@ extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, size_t size); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, unsigned long iova); +extern int iommu_domain_has_cap(struct iommu_domain *domain, + unsigned long cap); #else /* CONFIG_IOMMU_API */ @@ -107,6 +114,12 @@ static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, return 0; } +static inline int domain_has_cap(struct iommu_domain *domain, + unsigned long cap) +{ + return 0; +} + #endif /* CONFIG_IOMMU_API */ #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 4d248b3f132..8815a3456b3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -649,6 +649,12 @@ struct transaction_s int t_handle_count; /* + * This transaction is being forced and some process is + * waiting for it to finish. + */ + int t_synchronous_commit:1; + + /* * For use by the filesystem to store fs-specific data * structures associated with the transaction */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f81d80f47dc..556d781e69f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -242,6 +242,7 @@ extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); +void log_buf_kexec_setup(void); #else static inline int vprintk(const char *s, va_list args) __attribute__ ((format (printf, 1, 0))); @@ -253,6 +254,9 @@ static inline int printk_ratelimit(void) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ unsigned int interval_msec) \ { return false; } +static inline void log_buf_kexec_setup(void) +{ +} #endif extern int printk_needs_cpu(int cpu); @@ -353,6 +357,8 @@ static inline char *pack_hex_byte(char *buf, u8 byte) printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info(fmt, ...) \ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +#define pr_cont(fmt, ...) \ + printk(KERN_CONT fmt, ##__VA_ARGS__) /* If you are writing a driver, please use dev_dbg instead */ #if defined(DEBUG) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 5a58ea3e91e..da5a5a1f4cd 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -364,6 +364,23 @@ do { \ #endif /* CONFIG_LOCK_STAT */ +#ifdef CONFIG_LOCKDEP + +/* + * On lockdep we dont want the hand-coded irq-enable of + * _raw_*_lock_flags() code, because lockdep assumes + * that interrupts are not re-enabled during lock-acquire: + */ +#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \ + LOCK_CONTENDED((_lock), (try), (lock)) + +#else /* CONFIG_LOCKDEP */ + +#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \ + lockfl((_lock), (flags)) + +#endif /* CONFIG_LOCKDEP */ + #ifdef CONFIG_GENERIC_HARDIRQS extern void early_init_irq_lock_class(void); #else diff --git a/include/linux/loop.h b/include/linux/loop.h index 6ffd6db5bb0..40725447f5e 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -160,5 +160,6 @@ int loop_unregister_transfer(int number); #define LOOP_SET_STATUS64 0x4C04 #define LOOP_GET_STATUS64 0x4C05 #define LOOP_CHANGE_FD 0x4C06 +#define LOOP_SET_CAPACITY 0x4C07 #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 326f45c8653..18146c980b6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -88,9 +88,6 @@ extern void mem_cgroup_end_migration(struct mem_cgroup *mem, /* * For memory reclaim. */ -extern int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem); -extern long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem); - extern int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem); extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority); @@ -104,6 +101,8 @@ struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone); struct zone_reclaim_stat* mem_cgroup_get_reclaim_stat_from_page(struct page *page); +extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, + struct task_struct *p); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -209,16 +208,6 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *mem, { } -static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) -{ - return 0; -} - -static inline int mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem) -{ - return 0; -} - static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) { return 0; @@ -270,6 +259,11 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) return NULL; } +static inline void +mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) +{ +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/memory.h b/include/linux/memory.h index 3fdc10806d3..42767d1a62e 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -99,4 +99,15 @@ enum mem_add_context { BOOT, HOTPLUG }; #define hotplug_memory_notifier(fn, pri) do { } while (0) #endif +/* + * 'struct memory_accessor' is a generic interface to provide + * in-kernel access to persistent memory such as i2c or SPI EEPROMs + */ +struct memory_accessor { + ssize_t (*read)(struct memory_accessor *, char *buf, off_t offset, + size_t count); + ssize_t (*write)(struct memory_accessor *, const char *buf, + off_t offset, size_t count); +}; + #endif /* _LINUX_MEMORY_H_ */ diff --git a/include/linux/mm.h b/include/linux/mm.h index b1ea37fc7a2..bff1f0d475c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -135,6 +135,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ +#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -187,7 +188,7 @@ struct vm_operations_struct { /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ - int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); + int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); /* called by access_process_vm when get_user_pages() fails, typically * for use by special VMAs that can switch between memory and hardware @@ -834,6 +835,7 @@ int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); +void account_page_dirtied(struct page *page, struct address_space *mapping); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); @@ -1077,7 +1079,7 @@ static inline void setup_per_cpu_pageset(void) {} #endif /* nommu.c */ -extern atomic_t mmap_pages_allocated; +extern atomic_long_t mmap_pages_allocated; /* prio_tree.c */ void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d84feb7bdbf..0e80e26ecf2 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -11,6 +11,7 @@ #include <linux/rwsem.h> #include <linux/completion.h> #include <linux/cpumask.h> +#include <linux/page-debug-flags.h> #include <asm/page.h> #include <asm/mmu.h> @@ -94,6 +95,9 @@ struct page { void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ +#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS + unsigned long debug_flags; /* Use atomic bitops on this */ +#endif }; /* diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1aca6cebbb7..26ef24076b7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -806,6 +806,14 @@ extern struct zone *next_zone(struct zone *zone); zone; \ zone = next_zone(zone)) +#define for_each_populated_zone(zone) \ + for (zone = (first_online_pgdat())->node_zones; \ + zone; \ + zone = next_zone(zone)) \ + if (!populated_zone(zone)) \ + ; /* do nothing */ \ + else + static inline struct zone *zonelist_zone(struct zoneref *zoneref) { return zoneref->zone; diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 830bbcd449d..3a059298cc1 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -22,6 +22,8 @@ struct proc_mounts { int event; }; +struct fs_struct; + extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct fs_struct *); extern void __put_mnt_ns(struct mnt_namespace *ns); diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 5c42821da2d..068a0c9946a 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -11,21 +11,11 @@ */ #ifdef CONFIG_BLOCK -struct mpage_data { - struct bio *bio; - sector_t last_block_in_bio; - get_block_t *get_block; - unsigned use_writepage; -}; - struct writeback_control; -struct bio *mpage_bio_submit(int rw, struct bio *bio); int mpage_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, get_block_t get_block); int mpage_readpage(struct page *page, get_block_t get_block); -int __mpage_writepage(struct page *page, struct writeback_control *wbc, - void *data); int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); int mpage_writepage(struct page *page, get_block_t *get_block, diff --git a/include/linux/msi.h b/include/linux/msi.h index d2b8a1e8ca1..6991ab5b24d 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -20,20 +20,23 @@ extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); struct msi_desc { struct { - __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */ + __u8 is_msix : 1; + __u8 multiple: 3; /* log2 number of messages */ __u8 maskbit : 1; /* mask-pending bit supported ? */ - __u8 masked : 1; __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ __u8 pos; /* Location of the msi capability */ - __u32 maskbits_mask; /* mask bits mask */ __u16 entry_nr; /* specific enabled entry */ unsigned default_irq; /* default pre-assigned irq */ - }msi_attrib; + } msi_attrib; + u32 masked; /* mask bits */ unsigned int irq; struct list_head list; - void __iomem *mask_base; + union { + void __iomem *mask_base; + u8 mask_pos; + }; struct pci_dev *dev; /* Last set MSI message */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8cc8807f77d..fdffb413b19 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -166,8 +166,7 @@ struct nfs_inode { */ struct radix_tree_root nfs_page_tree; - unsigned long ncommit, - npages; + unsigned long npages; /* Open contexts for shared mmap writes */ struct list_head open_files; @@ -186,6 +185,9 @@ struct nfs_inode { fmode_t delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ +#ifdef CONFIG_NFS_FSCACHE + struct fscache_cookie *fscache; +#endif struct inode vfs_inode; }; @@ -207,6 +209,9 @@ struct nfs_inode { #define NFS_INO_STALE (1) /* possible stale inode */ #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ #define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ +#define NFS_INO_FLUSHING (4) /* inode is flushing out data */ +#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ +#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { @@ -260,6 +265,11 @@ static inline int NFS_STALE(const struct inode *inode) return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags); } +static inline int NFS_FSCACHE(const struct inode *inode) +{ + return test_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); +} + static inline __u64 NFS_FILEID(const struct inode *inode) { return NFS_I(inode)->fileid; @@ -506,6 +516,8 @@ extern int nfs_readpages(struct file *, struct address_space *, struct list_head *, unsigned); extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); extern void nfs_readdata_release(void *data); +extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, + struct page *); /* * Allocate nfs_read_data structures @@ -583,6 +595,7 @@ extern void * nfs_root_data(void); #define NFSDBG_CALLBACK 0x0100 #define NFSDBG_CLIENT 0x0200 #define NFSDBG_MOUNT 0x0400 +#define NFSDBG_FSCACHE 0x0800 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 9bb81aec91c..6ad75948cbf 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -64,6 +64,10 @@ struct nfs_client { char cl_ipaddr[48]; unsigned char cl_id_uniquifier; #endif + +#ifdef CONFIG_NFS_FSCACHE + struct fscache_cookie *fscache; /* client index cache cookie */ +#endif }; /* @@ -96,16 +100,28 @@ struct nfs_server { unsigned int acdirmin; unsigned int acdirmax; unsigned int namelen; + unsigned int options; /* extra options enabled by mount */ +#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ struct nfs_fsid fsid; __u64 maxfilesize; /* maximum file size */ unsigned long mount_time; /* when this fs was mounted */ dev_t s_dev; /* superblock dev numbers */ +#ifdef CONFIG_NFS_FSCACHE + struct nfs_fscache_key *fscache_key; /* unique key for superblock */ + struct fscache_cookie *fscache; /* superblock cookie */ +#endif + #ifdef CONFIG_NFS_V4 u32 attr_bitmask[2];/* V4 bitmask representing the set of attributes supported on this filesystem */ + u32 cache_consistency_bitmask[2]; + /* V4 bitmask representing the subset + of change attribute, size, ctime + and mtime attributes supported by + the server */ u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h index 1cb9a3fed2b..68b10f5f890 100644 --- a/include/linux/nfs_iostat.h +++ b/include/linux/nfs_iostat.h @@ -116,4 +116,16 @@ enum nfs_stat_eventcounters { __NFSIOS_COUNTSMAX, }; +/* + * NFS local caching servicing counters + */ +enum nfs_stat_fscachecounters { + NFSIOS_FSCACHE_PAGES_READ_OK, + NFSIOS_FSCACHE_PAGES_READ_FAIL, + NFSIOS_FSCACHE_PAGES_WRITTEN_OK, + NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, + NFSIOS_FSCACHE_PAGES_UNCACHED, + __NFSIOS_FSCACHEMAX, +}; + #endif /* _LINUX_NFS_IOSTAT */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 43a713fce11..b89c34e40bc 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -27,12 +27,8 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid } struct nfs_fattr { - unsigned short valid; /* which fields are valid */ - __u64 pre_size; /* pre_op_attr.size */ - struct timespec pre_mtime; /* pre_op_attr.mtime */ - struct timespec pre_ctime; /* pre_op_attr.ctime */ - enum nfs_ftype type; /* always use NFSv2 types */ - __u32 mode; + unsigned int valid; /* which fields are valid */ + umode_t mode; __u32 nlink; __u32 uid; __u32 gid; @@ -52,19 +48,55 @@ struct nfs_fattr { struct timespec atime; struct timespec mtime; struct timespec ctime; - __u32 bitmap[2]; /* NFSv4 returned attribute bitmap */ __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ + __u64 pre_size; /* pre_op_attr.size */ + struct timespec pre_mtime; /* pre_op_attr.mtime */ + struct timespec pre_ctime; /* pre_op_attr.ctime */ unsigned long time_start; unsigned long gencount; }; -#define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ -#define NFS_ATTR_FATTR 0x0002 /* post-op attributes */ -#define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */ -#define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */ -#define NFS_ATTR_WCC_V4 0x0010 /* pre-op change attribute */ -#define NFS_ATTR_FATTR_V4_REFERRAL 0x0020 /* NFSv4 referral */ +#define NFS_ATTR_FATTR_TYPE (1U << 0) +#define NFS_ATTR_FATTR_MODE (1U << 1) +#define NFS_ATTR_FATTR_NLINK (1U << 2) +#define NFS_ATTR_FATTR_OWNER (1U << 3) +#define NFS_ATTR_FATTR_GROUP (1U << 4) +#define NFS_ATTR_FATTR_RDEV (1U << 5) +#define NFS_ATTR_FATTR_SIZE (1U << 6) +#define NFS_ATTR_FATTR_PRESIZE (1U << 7) +#define NFS_ATTR_FATTR_BLOCKS_USED (1U << 8) +#define NFS_ATTR_FATTR_SPACE_USED (1U << 9) +#define NFS_ATTR_FATTR_FSID (1U << 10) +#define NFS_ATTR_FATTR_FILEID (1U << 11) +#define NFS_ATTR_FATTR_ATIME (1U << 12) +#define NFS_ATTR_FATTR_MTIME (1U << 13) +#define NFS_ATTR_FATTR_CTIME (1U << 14) +#define NFS_ATTR_FATTR_PREMTIME (1U << 15) +#define NFS_ATTR_FATTR_PRECTIME (1U << 16) +#define NFS_ATTR_FATTR_CHANGE (1U << 17) +#define NFS_ATTR_FATTR_PRECHANGE (1U << 18) +#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 19) /* NFSv4 referral */ + +#define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ + | NFS_ATTR_FATTR_MODE \ + | NFS_ATTR_FATTR_NLINK \ + | NFS_ATTR_FATTR_OWNER \ + | NFS_ATTR_FATTR_GROUP \ + | NFS_ATTR_FATTR_RDEV \ + | NFS_ATTR_FATTR_SIZE \ + | NFS_ATTR_FATTR_FSID \ + | NFS_ATTR_FATTR_FILEID \ + | NFS_ATTR_FATTR_ATIME \ + | NFS_ATTR_FATTR_MTIME \ + | NFS_ATTR_FATTR_CTIME) +#define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \ + | NFS_ATTR_FATTR_BLOCKS_USED) +#define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \ + | NFS_ATTR_FATTR_SPACE_USED) +#define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \ + | NFS_ATTR_FATTR_SPACE_USED \ + | NFS_ATTR_FATTR_CHANGE) /* * Info on the file system @@ -836,6 +868,7 @@ struct nfs_rpc_ops { int (*lock)(struct file *, int, struct file_lock *); int (*lock_check_bounds)(const struct file_lock *); void (*clear_acl_cache)(struct inode *); + void (*close_context)(struct nfs_open_context *ctx, int); }; /* diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index afad7dec1b3..7b370c7cfef 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -8,6 +8,7 @@ struct mnt_namespace; struct uts_namespace; struct ipc_namespace; struct pid_namespace; +struct fs_struct; /* * A structure to contain pointers to all per-process diff --git a/include/linux/page-debug-flags.h b/include/linux/page-debug-flags.h new file mode 100644 index 00000000000..b0638fd91e9 --- /dev/null +++ b/include/linux/page-debug-flags.h @@ -0,0 +1,30 @@ +#ifndef LINUX_PAGE_DEBUG_FLAGS_H +#define LINUX_PAGE_DEBUG_FLAGS_H + +/* + * page->debug_flags bits: + * + * PAGE_DEBUG_FLAG_POISON is set for poisoned pages. This is used to + * implement generic debug pagealloc feature. The pages are filled with + * poison patterns and set this flag after free_pages(). The poisoned + * pages are verified whether the patterns are not corrupted and clear + * the flag before alloc_pages(). + */ + +enum page_debug_flags { + PAGE_DEBUG_FLAG_POISON, /* Page is poisoned */ +}; + +/* + * Ensure that CONFIG_WANT_PAGE_DEBUG_FLAGS reliably + * gets turned off when no debug features are enabling it! + */ + +#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS +#if !defined(CONFIG_PAGE_POISONING) \ +/* && !defined(CONFIG_PAGE_DEBUG_SOMETHING_ELSE) && ... */ +#error WANT_PAGE_DEBUG_FLAGS is turned on with no debug features! +#endif +#endif /* CONFIG_WANT_PAGE_DEBUG_FLAGS */ + +#endif /* LINUX_PAGE_DEBUG_FLAGS_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 219a523ecdb..62214c7d2d9 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -82,6 +82,7 @@ enum pageflags { PG_arch_1, PG_reserved, PG_private, /* If pagecache, has fs-private data */ + PG_private_2, /* If pagecache, has fs aux data */ PG_writeback, /* Page is under writeback */ #ifdef CONFIG_PAGEFLAGS_EXTENDED PG_head, /* A head page */ @@ -96,6 +97,8 @@ enum pageflags { PG_swapbacked, /* Page is backed by RAM/swap */ #ifdef CONFIG_UNEVICTABLE_LRU PG_unevictable, /* Page is "unevictable" */ +#endif +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT PG_mlocked, /* Page is vma mlocked */ #endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR @@ -106,6 +109,12 @@ enum pageflags { /* Filesystems */ PG_checked = PG_owner_priv_1, + /* Two page bits are conscripted by FS-Cache to maintain local caching + * state. These bits are set on pages belonging to the netfs's inodes + * when those inodes are being locally cached. + */ + PG_fscache = PG_private_2, /* page backed by cache */ + /* XEN */ PG_pinned = PG_owner_priv_1, PG_savepinned = PG_dirty, @@ -180,7 +189,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } struct page; /* forward declaration */ -TESTPAGEFLAG(Locked, locked) +TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked) PAGEFLAG(Error, error) PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) @@ -192,8 +201,6 @@ PAGEFLAG(Checked, checked) /* Used by some filesystems */ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ PAGEFLAG(SavePinned, savepinned); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) -PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) - __SETPAGEFLAG(Private, private) PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) __PAGEFLAG(SlobPage, slob_page) @@ -203,6 +210,16 @@ __PAGEFLAG(SlubFrozen, slub_frozen) __PAGEFLAG(SlubDebug, slub_debug) /* + * Private page markings that may be used by the filesystem that owns the page + * for its own purposes. + * - PG_private and PG_private_2 cause releasepage() and co to be invoked + */ +PAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) + __CLEARPAGEFLAG(Private, private) +PAGEFLAG(Private2, private_2) TESTSCFLAG(Private2, private_2) +PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) + +/* * Only test-and-set exist for PG_writeback. The unconditional operators are * risky: they bypass page accounting. */ @@ -234,20 +251,20 @@ PAGEFLAG_FALSE(SwapCache) #ifdef CONFIG_UNEVICTABLE_LRU PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) TESTCLEARFLAG(Unevictable, unevictable) +#else +PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) + SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) + __CLEARPAGEFLAG_NOOP(Unevictable) +#endif +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT #define MLOCK_PAGES 1 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) TESTSCFLAG(Mlocked, mlocked) - #else - #define MLOCK_PAGES 0 PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) - -PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) - SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) - __CLEARPAGEFLAG_NOOP(Unevictable) #endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR @@ -367,9 +384,13 @@ static inline void __ClearPageTail(struct page *page) #ifdef CONFIG_UNEVICTABLE_LRU #define __PG_UNEVICTABLE (1 << PG_unevictable) -#define __PG_MLOCKED (1 << PG_mlocked) #else #define __PG_UNEVICTABLE 0 +#endif + +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT +#define __PG_MLOCKED (1 << PG_mlocked) +#else #define __PG_MLOCKED 0 #endif @@ -378,9 +399,10 @@ static inline void __ClearPageTail(struct page *page) * these flags set. It they are, there is a problem. */ #define PAGE_FLAGS_CHECK_AT_FREE \ - (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ - 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ - 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ + (1 << PG_lru | 1 << PG_locked | \ + 1 << PG_private | 1 << PG_private_2 | \ + 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ + 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ __PG_UNEVICTABLE | __PG_MLOCKED) /* @@ -391,4 +413,16 @@ static inline void __ClearPageTail(struct page *page) #define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) #endif /* !__GENERATING_BOUNDS_H */ + +/** + * page_has_private - Determine if page has private stuff + * @page: The page to be checked + * + * Determine if a page has private stuff, indicating that release routines + * should be invoked upon it. + */ +#define page_has_private(page) \ + ((page)->flags & ((1 << PG_private) | \ + (1 << PG_private_2))) + #endif /* PAGE_FLAGS_H */ diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 602cc1fdee9..7339c7bf733 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -91,24 +91,23 @@ static inline void page_cgroup_init(void) #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP #include <linux/swap.h> -extern struct mem_cgroup * -swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem); -extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent); +extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id); +extern unsigned short lookup_swap_cgroup(swp_entry_t ent); extern int swap_cgroup_swapon(int type, unsigned long max_pages); extern void swap_cgroup_swapoff(int type); #else #include <linux/swap.h> static inline -struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) +unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) { - return NULL; + return 0; } static inline -struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) +unsigned short lookup_swap_cgroup(swp_entry_t ent) { - return NULL; + return 0; } static inline int diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 01ca0856caf..34da5230faa 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -18,9 +18,14 @@ * Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page * allocation mode flags. */ -#define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ -#define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ -#define AS_MM_ALL_LOCKS (__GFP_BITS_SHIFT + 2) /* under mm_take_all_locks() */ +enum mapping_flags { + AS_EIO = __GFP_BITS_SHIFT + 0, /* IO error on async write */ + AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */ + AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */ +#ifdef CONFIG_UNEVICTABLE_LRU + AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */ +#endif +}; static inline void mapping_set_error(struct address_space *mapping, int error) { @@ -33,7 +38,6 @@ static inline void mapping_set_error(struct address_space *mapping, int error) } #ifdef CONFIG_UNEVICTABLE_LRU -#define AS_UNEVICTABLE (__GFP_BITS_SHIFT + 2) /* e.g., ramdisk, SHM_LOCK */ static inline void mapping_set_unevictable(struct address_space *mapping) { @@ -380,6 +384,11 @@ static inline void wait_on_page_writeback(struct page *page) extern void end_page_writeback(struct page *page); /* + * Add an arbitrary waiter to a page's wait queue + */ +extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter); + +/* * Fault a userspace page into pagetables. Return non-zero on a fault. * * This assumes that two userspace pages are always sufficient. That's diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 7b2886fa7fd..bab82f4c571 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -24,7 +24,6 @@ void __pagevec_release(struct pagevec *pvec); void __pagevec_free(struct pagevec *pvec); void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); void pagevec_strip(struct pagevec *pvec); -void pagevec_swap_free(struct pagevec *pvec); unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_pages); unsigned pagevec_lookup_tag(struct pagevec *pvec, diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 042c166f65d..092e82e0048 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -10,72 +10,25 @@ #include <linux/acpi.h> -#define OSC_QUERY_TYPE 0 -#define OSC_SUPPORT_TYPE 1 -#define OSC_CONTROL_TYPE 2 -#define OSC_SUPPORT_MASKS 0x1f - -/* - * _OSC DW0 Definition - */ -#define OSC_QUERY_ENABLE 1 -#define OSC_REQUEST_ERROR 2 -#define OSC_INVALID_UUID_ERROR 4 -#define OSC_INVALID_REVISION_ERROR 8 -#define OSC_CAPABILITIES_MASK_ERROR 16 - -/* - * _OSC DW1 Definition (OS Support Fields) - */ -#define OSC_EXT_PCI_CONFIG_SUPPORT 1 -#define OSC_ACTIVE_STATE_PWR_SUPPORT 2 -#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT 4 -#define OSC_PCI_SEGMENT_GROUPS_SUPPORT 8 -#define OSC_MSI_SUPPORT 16 - -/* - * _OSC DW1 Definition (OS Control Fields) - */ -#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 1 -#define OSC_SHPC_NATIVE_HP_CONTROL 2 -#define OSC_PCI_EXPRESS_PME_CONTROL 4 -#define OSC_PCI_EXPRESS_AER_CONTROL 8 -#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL 16 - -#define OSC_CONTROL_MASKS (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \ - OSC_SHPC_NATIVE_HP_CONTROL | \ - OSC_PCI_EXPRESS_PME_CONTROL | \ - OSC_PCI_EXPRESS_AER_CONTROL | \ - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) - #ifdef CONFIG_ACPI -extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); -int pci_acpi_osc_support(acpi_handle handle, u32 flags); static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { - /* Find root host bridge */ - while (pdev->bus->self) - pdev = pdev->bus->self; - - return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus), - pdev->bus->number); + struct pci_bus *pbus = pdev->bus; + /* Find a PCI root bus */ + while (pbus->parent) + pbus = pbus->parent; + return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus), + pbus->number); } static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) { - int seg = pci_domain_nr(pbus), busnr = pbus->number; - struct pci_dev *bridge = pbus->self; - if (bridge) - return DEVICE_ACPI_HANDLE(&(bridge->dev)); - return acpi_get_pci_rootbridge_handle(seg, busnr); + if (pbus->parent) + return DEVICE_ACPI_HANDLE(&(pbus->self->dev)); + return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus), + pbus->number); } #else -#if !defined(AE_ERROR) -typedef u32 acpi_status; -#define AE_ERROR (acpi_status) (0x0001) -#endif -static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) -{return AE_ERROR;} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { return NULL; } #endif diff --git a/include/linux/pci.h b/include/linux/pci.h index df364413261..a7fe4bbd7ff 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -52,6 +52,7 @@ #include <asm/atomic.h> #include <linux/device.h> #include <linux/io.h> +#include <linux/irqreturn.h> /* Include the ID list */ #include <linux/pci_ids.h> @@ -93,6 +94,12 @@ enum { /* #6: expansion ROM resource */ PCI_ROM_RESOURCE, + /* device specific resources */ +#ifdef CONFIG_PCI_IOV + PCI_IOV_RESOURCES, + PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1, +#endif + /* resources assigned to buses behind the bridge */ #define PCI_BRIDGE_RESOURCE_NUM 4 @@ -180,6 +187,7 @@ struct pci_cap_saved_state { struct pcie_link_state; struct pci_vpd; +struct pci_sriov; /* * The pci_dev structure is used to describe PCI devices. @@ -257,6 +265,8 @@ struct pci_dev { unsigned int is_managed:1; unsigned int is_pcie:1; unsigned int state_saved:1; + unsigned int is_physfn:1; + unsigned int is_virtfn:1; pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ @@ -270,6 +280,12 @@ struct pci_dev { struct list_head msi_list; #endif struct pci_vpd *vpd; +#ifdef CONFIG_PCI_IOV + union { + struct pci_sriov *sriov; /* SR-IOV capability related */ + struct pci_dev *physfn; /* the PF this VF is associated with */ + }; +#endif }; extern struct pci_dev *alloc_pci_dev(void); @@ -341,6 +357,15 @@ struct pci_bus { #define pci_bus_b(n) list_entry(n, struct pci_bus, node) #define to_pci_bus(n) container_of(n, struct pci_bus, dev) +/* + * Returns true if the pci bus is root (behind host-pci bridge), + * false otherwise + */ +static inline bool pci_is_root_bus(struct pci_bus *pbus) +{ + return !(pbus->parent); +} + #ifdef CONFIG_PCI_MSI static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { @@ -528,7 +553,7 @@ void pcibios_update_irq(struct pci_dev *, int irq); /* Generic PCI functions used internally */ extern struct pci_bus *pci_find_bus(int domain, int busnr); -void pci_bus_add_devices(struct pci_bus *bus); +void pci_bus_add_devices(const struct pci_bus *bus); struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, @@ -702,6 +727,9 @@ int pci_back_from_sleep(struct pci_dev *dev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); +#ifdef CONFIG_HOTPLUG +unsigned int pci_rescan_bus(struct pci_bus *bus); +#endif /* Vital product data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); @@ -709,7 +737,7 @@ ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void int pci_vpd_truncate(struct pci_dev *dev, size_t size); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ -void pci_bus_assign_resources(struct pci_bus *bus); +void pci_bus_assign_resources(const struct pci_bus *bus); void pci_bus_size_bridges(struct pci_bus *bus); int pci_claim_resource(struct pci_dev *, int); void pci_assign_unassigned_resources(void); @@ -790,7 +818,7 @@ struct msix_entry { #ifndef CONFIG_PCI_MSI -static inline int pci_enable_msi(struct pci_dev *dev) +static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) { return -1; } @@ -800,6 +828,10 @@ static inline void pci_msi_shutdown(struct pci_dev *dev) static inline void pci_disable_msi(struct pci_dev *dev) { } +static inline int pci_msix_table_size(struct pci_dev *dev) +{ + return 0; +} static inline int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) { @@ -821,9 +853,10 @@ static inline int pci_msi_enabled(void) return 0; } #else -extern int pci_enable_msi(struct pci_dev *dev); +extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); extern void pci_msi_shutdown(struct pci_dev *dev); extern void pci_disable_msi(struct pci_dev *dev); +extern int pci_msix_table_size(struct pci_dev *dev); extern int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec); extern void pci_msix_shutdown(struct pci_dev *dev); @@ -842,6 +875,8 @@ static inline int pcie_aspm_enabled(void) extern int pcie_aspm_enabled(void); #endif +#define pci_enable_msi(pdev) pci_enable_msi_block(pdev, 1) + #ifdef CONFIG_HT_IRQ /* The functions a driver should call */ int ht_create_irq(struct pci_dev *dev, int idx); @@ -1195,5 +1230,23 @@ int pci_ext_cfg_avail(struct pci_dev *dev); void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar); +#ifdef CONFIG_PCI_IOV +extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); +extern void pci_disable_sriov(struct pci_dev *dev); +extern irqreturn_t pci_sriov_migration(struct pci_dev *dev); +#else +static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + return -ENODEV; +} +static inline void pci_disable_sriov(struct pci_dev *dev) +{ +} +static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev) +{ + return IRQ_NONE; +} +#endif + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e5816dd3337..170f8b1f22d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -526,6 +526,7 @@ #define PCI_DEVICE_ID_AMD_OPUS_7443 0x7443 #define PCI_DEVICE_ID_AMD_VIPER_7443 0x7443 #define PCI_DEVICE_ID_AMD_OPUS_7445 0x7445 +#define PCI_DEVICE_ID_AMD_8111_PCI 0x7460 #define PCI_DEVICE_ID_AMD_8111_LPC 0x7468 #define PCI_DEVICE_ID_AMD_8111_IDE 0x7469 #define PCI_DEVICE_ID_AMD_8111_SMBUS2 0x746a @@ -2396,6 +2397,7 @@ #define PCI_DEVICE_ID_INTEL_82801CA_12 0x248c #define PCI_DEVICE_ID_INTEL_82801DB_0 0x24c0 #define PCI_DEVICE_ID_INTEL_82801DB_1 0x24c1 +#define PCI_DEVICE_ID_INTEL_82801DB_2 0x24c2 #define PCI_DEVICE_ID_INTEL_82801DB_3 0x24c3 #define PCI_DEVICE_ID_INTEL_82801DB_5 0x24c5 #define PCI_DEVICE_ID_INTEL_82801DB_6 0x24c6 diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 027815b4635..e4d08c1b2e0 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -235,7 +235,7 @@ #define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ #define PCI_PM_CTRL 4 /* PM control and status register */ #define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ -#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 /* No reset for D3hot->D0 */ #define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ #define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ #define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ @@ -375,6 +375,7 @@ #define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ #define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ #define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ +#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ #define PCI_EXP_DEVCAP 4 /* Device capabilities */ @@ -487,6 +488,8 @@ #define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ +#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ /* Extended Capabilities (PCI-X 2.0 and Express) */ #define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) @@ -498,6 +501,7 @@ #define PCI_EXT_CAP_ID_DSN 3 #define PCI_EXT_CAP_ID_PWR 4 #define PCI_EXT_CAP_ID_ARI 14 +#define PCI_EXT_CAP_ID_SRIOV 16 /* Advanced Error Reporting */ #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ @@ -615,4 +619,35 @@ #define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ #define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ +/* Single Root I/O Virtualization */ +#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ +#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ +#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ +#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ +#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ +#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ +#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ +#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ +#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ +#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ +#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ +#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ +#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ +#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ +#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ +#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ +#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ +#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ +#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ +#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ +#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ +#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ +#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ +#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ +#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ +#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ +#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ +#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ +#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ + #endif /* LINUX_PCI_REGS_H */ diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index 6cd91e3f982..b4c79545330 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -16,29 +16,30 @@ #define PCIE_ANY_PORT 7 /* Service Type */ -#define PCIE_PORT_SERVICE_PME 1 /* Power Management Event */ -#define PCIE_PORT_SERVICE_AER 2 /* Advanced Error Reporting */ -#define PCIE_PORT_SERVICE_HP 4 /* Native Hotplug */ -#define PCIE_PORT_SERVICE_VC 8 /* Virtual Channel */ +#define PCIE_PORT_SERVICE_PME_SHIFT 0 /* Power Management Event */ +#define PCIE_PORT_SERVICE_PME (1 << PCIE_PORT_SERVICE_PME_SHIFT) +#define PCIE_PORT_SERVICE_AER_SHIFT 1 /* Advanced Error Reporting */ +#define PCIE_PORT_SERVICE_AER (1 << PCIE_PORT_SERVICE_AER_SHIFT) +#define PCIE_PORT_SERVICE_HP_SHIFT 2 /* Native Hotplug */ +#define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) +#define PCIE_PORT_SERVICE_VC_SHIFT 3 /* Virtual Channel */ +#define PCIE_PORT_SERVICE_VC (1 << PCIE_PORT_SERVICE_VC_SHIFT) /* Root/Upstream/Downstream Port's Interrupt Mode */ +#define PCIE_PORT_NO_IRQ (-1) #define PCIE_PORT_INTx_MODE 0 #define PCIE_PORT_MSI_MODE 1 #define PCIE_PORT_MSIX_MODE 2 -struct pcie_port_service_id { - __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/ - __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */ - __u32 class, class_mask; /* (class,subclass,prog-if) triplet */ - __u32 port_type, service_type; /* Port Entity */ - kernel_ulong_t driver_data; +struct pcie_port_data { + int port_type; /* Type of the port */ + int port_irq_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ }; struct pcie_device { int irq; /* Service IRQ/MSI/MSI-X Vector */ - int interrupt_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ - struct pcie_port_service_id id; /* Service ID */ - struct pci_dev *port; /* Root/Upstream/Downstream Port */ + struct pci_dev *port; /* Root/Upstream/Downstream Port */ + u32 service; /* Port service this device represents */ void *priv_data; /* Service Private Data */ struct device device; /* Generic Device Interface */ }; @@ -56,10 +57,9 @@ static inline void* get_service_data(struct pcie_device *dev) struct pcie_port_service_driver { const char *name; - int (*probe) (struct pcie_device *dev, - const struct pcie_port_service_id *id); + int (*probe) (struct pcie_device *dev); void (*remove) (struct pcie_device *dev); - int (*suspend) (struct pcie_device *dev, pm_message_t state); + int (*suspend) (struct pcie_device *dev); int (*resume) (struct pcie_device *dev); /* Service Error Recovery Handler */ @@ -68,7 +68,9 @@ struct pcie_port_service_driver { /* Link Reset Capability - AER service driver specific */ pci_ers_result_t (*reset_link) (struct pci_dev *dev); - const struct pcie_port_service_id *id_table; + int port_type; /* Type of the port this driver can handle */ + u32 service; /* Port service this device represents */ + struct device_driver driver; }; #define to_service_driver(d) \ diff --git a/include/linux/poison.h b/include/linux/poison.h index 9f31683728f..6729f7dcd60 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -17,6 +17,9 @@ */ #define TIMER_ENTRY_STATIC ((void *) 0x74737461) +/********** mm/debug-pagealloc.c **********/ +#define PAGE_POISON 0xaa + /********** mm/slab.c **********/ /* * Magic nums for obj red zoning. diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 98b93ca4db0..67c15653fc2 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -94,6 +94,7 @@ extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); +extern void exit_ptrace(struct task_struct *tracer); extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h deleted file mode 100644 index e98900671ca..00000000000 --- a/include/linux/raid/bitmap.h +++ /dev/null @@ -1,288 +0,0 @@ -/* - * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 - * - * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc. - */ -#ifndef BITMAP_H -#define BITMAP_H 1 - -#define BITMAP_MAJOR_LO 3 -/* version 4 insists the bitmap is in little-endian order - * with version 3, it is host-endian which is non-portable - */ -#define BITMAP_MAJOR_HI 4 -#define BITMAP_MAJOR_HOSTENDIAN 3 - -#define BITMAP_MINOR 39 - -/* - * in-memory bitmap: - * - * Use 16 bit block counters to track pending writes to each "chunk". - * The 2 high order bits are special-purpose, the first is a flag indicating - * whether a resync is needed. The second is a flag indicating whether a - * resync is active. - * This means that the counter is actually 14 bits: - * - * +--------+--------+------------------------------------------------+ - * | resync | resync | counter | - * | needed | active | | - * | (0-1) | (0-1) | (0-16383) | - * +--------+--------+------------------------------------------------+ - * - * The "resync needed" bit is set when: - * a '1' bit is read from storage at startup. - * a write request fails on some drives - * a resync is aborted on a chunk with 'resync active' set - * It is cleared (and resync-active set) when a resync starts across all drives - * of the chunk. - * - * - * The "resync active" bit is set when: - * a resync is started on all drives, and resync_needed is set. - * resync_needed will be cleared (as long as resync_active wasn't already set). - * It is cleared when a resync completes. - * - * The counter counts pending write requests, plus the on-disk bit. - * When the counter is '1' and the resync bits are clear, the on-disk - * bit can be cleared aswell, thus setting the counter to 0. - * When we set a bit, or in the counter (to start a write), if the fields is - * 0, we first set the disk bit and set the counter to 1. - * - * If the counter is 0, the on-disk bit is clear and the stipe is clean - * Anything that dirties the stipe pushes the counter to 2 (at least) - * and sets the on-disk bit (lazily). - * If a periodic sweep find the counter at 2, it is decremented to 1. - * If the sweep find the counter at 1, the on-disk bit is cleared and the - * counter goes to zero. - * - * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block - * counters as a fallback when "page" memory cannot be allocated: - * - * Normal case (page memory allocated): - * - * page pointer (32-bit) - * - * [ ] ------+ - * | - * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) - * c1 c2 c2048 - * - * Hijacked case (page memory allocation failed): - * - * hijacked page pointer (32-bit) - * - * [ ][ ] (no page memory allocated) - * counter #1 (16-bit) counter #2 (16-bit) - * - */ - -#ifdef __KERNEL__ - -#define PAGE_BITS (PAGE_SIZE << 3) -#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) - -typedef __u16 bitmap_counter_t; -#define COUNTER_BITS 16 -#define COUNTER_BIT_SHIFT 4 -#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8) -#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3) - -#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1))) -#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2))) -#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1) -#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) -#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) -#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) - -/* how many counters per page? */ -#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) -/* same, except a shift value for more efficient bitops */ -#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) -/* same, except a mask value for more efficient bitops */ -#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) - -#define BITMAP_BLOCK_SIZE 512 -#define BITMAP_BLOCK_SHIFT 9 - -/* how many blocks per chunk? (this is variable) */ -#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT) -#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT) -#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1) - -/* when hijacked, the counters and bits represent even larger "chunks" */ -/* there will be 1024 chunks represented by each counter in the page pointers */ -#define PAGEPTR_BLOCK_RATIO(bitmap) \ - (CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1) -#define PAGEPTR_BLOCK_SHIFT(bitmap) \ - (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1) -#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1) - -/* - * on-disk bitmap: - * - * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap - * file a page at a time. There's a superblock at the start of the file. - */ - -/* map chunks (bits) to file pages - offset by the size of the superblock */ -#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3)) - -#endif - -/* - * bitmap structures: - */ - -#define BITMAP_MAGIC 0x6d746962 - -/* use these for bitmap->flags and bitmap->sb->state bit-fields */ -enum bitmap_state { - BITMAP_STALE = 0x002, /* the bitmap file is out of date or had -EIO */ - BITMAP_WRITE_ERROR = 0x004, /* A write error has occurred */ - BITMAP_HOSTENDIAN = 0x8000, -}; - -/* the superblock at the front of the bitmap file -- little endian */ -typedef struct bitmap_super_s { - __le32 magic; /* 0 BITMAP_MAGIC */ - __le32 version; /* 4 the bitmap major for now, could change... */ - __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */ - __le64 events; /* 24 event counter for the bitmap (1)*/ - __le64 events_cleared;/*32 event counter when last bit cleared (2) */ - __le64 sync_size; /* 40 the size of the md device's sync range(3) */ - __le32 state; /* 48 bitmap state information */ - __le32 chunksize; /* 52 the bitmap chunk size in bytes */ - __le32 daemon_sleep; /* 56 seconds between disk flushes */ - __le32 write_behind; /* 60 number of outstanding write-behind writes */ - - __u8 pad[256 - 64]; /* set to zero */ -} bitmap_super_t; - -/* notes: - * (1) This event counter is updated before the eventcounter in the md superblock - * When a bitmap is loaded, it is only accepted if this event counter is equal - * to, or one greater than, the event counter in the superblock. - * (2) This event counter is updated when the other one is *if*and*only*if* the - * array is not degraded. As bits are not cleared when the array is degraded, - * this represents the last time that any bits were cleared. - * If a device is being added that has an event count with this value or - * higher, it is accepted as conforming to the bitmap. - * (3)This is the number of sectors represented by the bitmap, and is the range that - * resync happens across. For raid1 and raid5/6 it is the size of individual - * devices. For raid10 it is the size of the array. - */ - -#ifdef __KERNEL__ - -/* the in-memory bitmap is represented by bitmap_pages */ -struct bitmap_page { - /* - * map points to the actual memory page - */ - char *map; - /* - * in emergencies (when map cannot be alloced), hijack the map - * pointer and use it as two counters itself - */ - unsigned int hijacked:1; - /* - * count of dirty bits on the page - */ - unsigned int count:31; -}; - -/* keep track of bitmap file pages that have pending writes on them */ -struct page_list { - struct list_head list; - struct page *page; -}; - -/* the main bitmap structure - one per mddev */ -struct bitmap { - struct bitmap_page *bp; - unsigned long pages; /* total number of pages in the bitmap */ - unsigned long missing_pages; /* number of pages not yet allocated */ - - mddev_t *mddev; /* the md device that the bitmap is for */ - - int counter_bits; /* how many bits per block counter */ - - /* bitmap chunksize -- how much data does each bit represent? */ - unsigned long chunksize; - unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */ - unsigned long chunks; /* total number of data chunks for the array */ - - /* We hold a count on the chunk currently being synced, and drop - * it when the last block is started. If the resync is aborted - * midway, we need to be able to drop that count, so we remember - * the counted chunk.. - */ - unsigned long syncchunk; - - __u64 events_cleared; - int need_sync; - - /* bitmap spinlock */ - spinlock_t lock; - - long offset; /* offset from superblock if file is NULL */ - struct file *file; /* backing disk file */ - struct page *sb_page; /* cached copy of the bitmap file superblock */ - struct page **filemap; /* list of cache pages for the file */ - unsigned long *filemap_attr; /* attributes associated w/ filemap pages */ - unsigned long file_pages; /* number of pages in the file */ - int last_page_size; /* bytes in the last page */ - - unsigned long flags; - - int allclean; - - unsigned long max_write_behind; /* write-behind mode */ - atomic_t behind_writes; - - /* - * the bitmap daemon - periodically wakes up and sweeps the bitmap - * file, cleaning up bits and flushing out pages to disk as necessary - */ - unsigned long daemon_lastrun; /* jiffies of last run */ - unsigned long daemon_sleep; /* how many seconds between updates? */ - unsigned long last_end_sync; /* when we lasted called end_sync to - * update bitmap with resync progress */ - - atomic_t pending_writes; /* pending writes to the bitmap file */ - wait_queue_head_t write_wait; - wait_queue_head_t overflow_wait; - -}; - -/* the bitmap API */ - -/* these are used only by md/bitmap */ -int bitmap_create(mddev_t *mddev); -void bitmap_flush(mddev_t *mddev); -void bitmap_destroy(mddev_t *mddev); - -void bitmap_print_sb(struct bitmap *bitmap); -void bitmap_update_sb(struct bitmap *bitmap); - -int bitmap_setallbits(struct bitmap *bitmap); -void bitmap_write_all(struct bitmap *bitmap); - -void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e); - -/* these are exported */ -int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int behind); -void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int success, int behind); -int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int degraded); -void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted); -void bitmap_close_sync(struct bitmap *bitmap); -void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); - -void bitmap_unplug(struct bitmap *bitmap); -void bitmap_daemon_work(struct bitmap *bitmap); -#endif - -#endif diff --git a/include/linux/raid/linear.h b/include/linux/raid/linear.h deleted file mode 100644 index f38b9c586af..00000000000 --- a/include/linux/raid/linear.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef _LINEAR_H -#define _LINEAR_H - -#include <linux/raid/md.h> - -struct dev_info { - mdk_rdev_t *rdev; - sector_t num_sectors; - sector_t start_sector; -}; - -typedef struct dev_info dev_info_t; - -struct linear_private_data -{ - struct linear_private_data *prev; /* earlier version */ - dev_info_t **hash_table; - sector_t spacing; - sector_t array_sectors; - int sector_shift; /* shift before dividing - * by spacing - */ - dev_info_t disks[0]; -}; - - -typedef struct linear_private_data linear_conf_t; - -#define mddev_to_conf(mddev) ((linear_conf_t *) mddev->private) - -#endif diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h deleted file mode 100644 index 82bea14cae1..00000000000 --- a/include/linux/raid/md.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - md.h : Multiple Devices driver for Linux - Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman - Copyright (C) 1994-96 Marc ZYNGIER - <zyngier@ufr-info-p7.ibp.fr> or - <maz@gloups.fdn.fr> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - You should have received a copy of the GNU General Public License - (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _MD_H -#define _MD_H - -#include <linux/blkdev.h> -#include <linux/seq_file.h> - -/* - * 'md_p.h' holds the 'physical' layout of RAID devices - * 'md_u.h' holds the user <=> kernel API - * - * 'md_k.h' holds kernel internal definitions - */ - -#include <linux/raid/md_p.h> -#include <linux/raid/md_u.h> -#include <linux/raid/md_k.h> - -#ifdef CONFIG_MD - -/* - * Different major versions are not compatible. - * Different minor versions are only downward compatible. - * Different patchlevel versions are downward and upward compatible. - */ -#define MD_MAJOR_VERSION 0 -#define MD_MINOR_VERSION 90 -/* - * MD_PATCHLEVEL_VERSION indicates kernel functionality. - * >=1 means different superblock formats are selectable using SET_ARRAY_INFO - * and major_version/minor_version accordingly - * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT - * in the super status byte - * >=3 means that bitmap superblock version 4 is supported, which uses - * little-ending representation rather than host-endian - */ -#define MD_PATCHLEVEL_VERSION 3 - -extern int mdp_major; - -extern int register_md_personality(struct mdk_personality *p); -extern int unregister_md_personality(struct mdk_personality *p); -extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), - mddev_t *mddev, const char *name); -extern void md_unregister_thread(mdk_thread_t *thread); -extern void md_wakeup_thread(mdk_thread_t *thread); -extern void md_check_recovery(mddev_t *mddev); -extern void md_write_start(mddev_t *mddev, struct bio *bi); -extern void md_write_end(mddev_t *mddev); -extern void md_done_sync(mddev_t *mddev, int blocks, int ok); -extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); - -extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, - sector_t sector, int size, struct page *page); -extern void md_super_wait(mddev_t *mddev); -extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, - struct page *page, int rw); -extern void md_do_sync(mddev_t *mddev); -extern void md_new_event(mddev_t *mddev); -extern int md_allow_write(mddev_t *mddev); -extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); - -#endif /* CONFIG_MD */ -#endif - diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h deleted file mode 100644 index 9743e4dbc91..00000000000 --- a/include/linux/raid/md_k.h +++ /dev/null @@ -1,402 +0,0 @@ -/* - md_k.h : kernel internal structure of the Linux MD driver - Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - You should have received a copy of the GNU General Public License - (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _MD_K_H -#define _MD_K_H - -/* and dm-bio-list.h is not under include/linux because.... ??? */ -#include "../../../drivers/md/dm-bio-list.h" - -#ifdef CONFIG_BLOCK - -#define LEVEL_MULTIPATH (-4) -#define LEVEL_LINEAR (-1) -#define LEVEL_FAULTY (-5) - -/* we need a value for 'no level specified' and 0 - * means 'raid0', so we need something else. This is - * for internal use only - */ -#define LEVEL_NONE (-1000000) - -#define MaxSector (~(sector_t)0) - -typedef struct mddev_s mddev_t; -typedef struct mdk_rdev_s mdk_rdev_t; - -/* - * options passed in raidrun: - */ - -/* Currently this must fit in an 'int' */ -#define MAX_CHUNK_SIZE (1<<30) - -/* - * MD's 'extended' device - */ -struct mdk_rdev_s -{ - struct list_head same_set; /* RAID devices within the same set */ - - sector_t size; /* Device size (in blocks) */ - mddev_t *mddev; /* RAID array if running */ - long last_events; /* IO event timestamp */ - - struct block_device *bdev; /* block device handle */ - - struct page *sb_page; - int sb_loaded; - __u64 sb_events; - sector_t data_offset; /* start of data in array */ - sector_t sb_start; /* offset of the super block (in 512byte sectors) */ - int sb_size; /* bytes in the superblock */ - int preferred_minor; /* autorun support */ - - struct kobject kobj; - - /* A device can be in one of three states based on two flags: - * Not working: faulty==1 in_sync==0 - * Fully working: faulty==0 in_sync==1 - * Working, but not - * in sync with array - * faulty==0 in_sync==0 - * - * It can never have faulty==1, in_sync==1 - * This reduces the burden of testing multiple flags in many cases - */ - - unsigned long flags; -#define Faulty 1 /* device is known to have a fault */ -#define In_sync 2 /* device is in_sync with rest of array */ -#define WriteMostly 4 /* Avoid reading if at all possible */ -#define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */ -#define AllReserved 6 /* If whole device is reserved for - * one array */ -#define AutoDetected 7 /* added by auto-detect */ -#define Blocked 8 /* An error occured on an externally - * managed array, don't allow writes - * until it is cleared */ -#define StateChanged 9 /* Faulty or Blocked has changed during - * interrupt, so it needs to be - * notified by the thread */ - wait_queue_head_t blocked_wait; - - int desc_nr; /* descriptor index in the superblock */ - int raid_disk; /* role of device in array */ - int saved_raid_disk; /* role that device used to have in the - * array and could again if we did a partial - * resync from the bitmap - */ - sector_t recovery_offset;/* If this device has been partially - * recovered, this is where we were - * up to. - */ - - atomic_t nr_pending; /* number of pending requests. - * only maintained for arrays that - * support hot removal - */ - atomic_t read_errors; /* number of consecutive read errors that - * we have tried to ignore. - */ - atomic_t corrected_errors; /* number of corrected read errors, - * for reporting to userspace and storing - * in superblock. - */ - struct work_struct del_work; /* used for delayed sysfs removal */ - - struct sysfs_dirent *sysfs_state; /* handle for 'state' - * sysfs entry */ -}; - -struct mddev_s -{ - void *private; - struct mdk_personality *pers; - dev_t unit; - int md_minor; - struct list_head disks; - unsigned long flags; -#define MD_CHANGE_DEVS 0 /* Some device status has changed */ -#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ -#define MD_CHANGE_PENDING 2 /* superblock update in progress */ - - int ro; - - struct gendisk *gendisk; - - struct kobject kobj; - int hold_active; -#define UNTIL_IOCTL 1 -#define UNTIL_STOP 2 - - /* Superblock information */ - int major_version, - minor_version, - patch_version; - int persistent; - int external; /* metadata is - * managed externally */ - char metadata_type[17]; /* externally set*/ - int chunk_size; - time_t ctime, utime; - int level, layout; - char clevel[16]; - int raid_disks; - int max_disks; - sector_t size; /* used size of component devices */ - sector_t array_sectors; /* exported array size */ - __u64 events; - - char uuid[16]; - - /* If the array is being reshaped, we need to record the - * new shape and an indication of where we are up to. - * This is written to the superblock. - * If reshape_position is MaxSector, then no reshape is happening (yet). - */ - sector_t reshape_position; - int delta_disks, new_level, new_layout, new_chunk; - - struct mdk_thread_s *thread; /* management thread */ - struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ - sector_t curr_resync; /* last block scheduled */ - unsigned long resync_mark; /* a recent timestamp */ - sector_t resync_mark_cnt;/* blocks written at resync_mark */ - sector_t curr_mark_cnt; /* blocks scheduled now */ - - sector_t resync_max_sectors; /* may be set by personality */ - - sector_t resync_mismatches; /* count of sectors where - * parity/replica mismatch found - */ - - /* allow user-space to request suspension of IO to regions of the array */ - sector_t suspend_lo; - sector_t suspend_hi; - /* if zero, use the system-wide default */ - int sync_speed_min; - int sync_speed_max; - - /* resync even though the same disks are shared among md-devices */ - int parallel_resync; - - int ok_start_degraded; - /* recovery/resync flags - * NEEDED: we might need to start a resync/recover - * RUNNING: a thread is running, or about to be started - * SYNC: actually doing a resync, not a recovery - * RECOVER: doing recovery, or need to try it. - * INTR: resync needs to be aborted for some reason - * DONE: thread is done and is waiting to be reaped - * REQUEST: user-space has requested a sync (used with SYNC) - * CHECK: user-space request for for check-only, no repair - * RESHAPE: A reshape is happening - * - * If neither SYNC or RESHAPE are set, then it is a recovery. - */ -#define MD_RECOVERY_RUNNING 0 -#define MD_RECOVERY_SYNC 1 -#define MD_RECOVERY_RECOVER 2 -#define MD_RECOVERY_INTR 3 -#define MD_RECOVERY_DONE 4 -#define MD_RECOVERY_NEEDED 5 -#define MD_RECOVERY_REQUESTED 6 -#define MD_RECOVERY_CHECK 7 -#define MD_RECOVERY_RESHAPE 8 -#define MD_RECOVERY_FROZEN 9 - - unsigned long recovery; - int recovery_disabled; /* if we detect that recovery - * will always fail, set this - * so we don't loop trying */ - - int in_sync; /* know to not need resync */ - struct mutex reconfig_mutex; - atomic_t active; /* general refcount */ - atomic_t openers; /* number of active opens */ - - int changed; /* true if we might need to reread partition info */ - int degraded; /* whether md should consider - * adding a spare - */ - int barriers_work; /* initialised to true, cleared as soon - * as a barrier request to slave - * fails. Only supported - */ - struct bio *biolist; /* bios that need to be retried - * because BIO_RW_BARRIER is not supported - */ - - atomic_t recovery_active; /* blocks scheduled, but not written */ - wait_queue_head_t recovery_wait; - sector_t recovery_cp; - sector_t resync_min; /* user requested sync - * starts here */ - sector_t resync_max; /* resync should pause - * when it gets here */ - - struct sysfs_dirent *sysfs_state; /* handle for 'array_state' - * file in sysfs. - */ - struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */ - - struct work_struct del_work; /* used for delayed sysfs removal */ - - spinlock_t write_lock; - wait_queue_head_t sb_wait; /* for waiting on superblock updates */ - atomic_t pending_writes; /* number of active superblock writes */ - - unsigned int safemode; /* if set, update "clean" superblock - * when no writes pending. - */ - unsigned int safemode_delay; - struct timer_list safemode_timer; - atomic_t writes_pending; - struct request_queue *queue; /* for plugging ... */ - - atomic_t write_behind; /* outstanding async IO */ - unsigned int max_write_behind; /* 0 = sync */ - - struct bitmap *bitmap; /* the bitmap for the device */ - struct file *bitmap_file; /* the bitmap file */ - long bitmap_offset; /* offset from superblock of - * start of bitmap. May be - * negative, but not '0' - */ - long default_bitmap_offset; /* this is the offset to use when - * hot-adding a bitmap. It should - * eventually be settable by sysfs. - */ - - struct list_head all_mddevs; -}; - - -static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev) -{ - int faulty = test_bit(Faulty, &rdev->flags); - if (atomic_dec_and_test(&rdev->nr_pending) && faulty) - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); -} - -static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) -{ - atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); -} - -struct mdk_personality -{ - char *name; - int level; - struct list_head list; - struct module *owner; - int (*make_request)(struct request_queue *q, struct bio *bio); - int (*run)(mddev_t *mddev); - int (*stop)(mddev_t *mddev); - void (*status)(struct seq_file *seq, mddev_t *mddev); - /* error_handler must set ->faulty and clear ->in_sync - * if appropriate, and should abort recovery if needed - */ - void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev); - int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev); - int (*hot_remove_disk) (mddev_t *mddev, int number); - int (*spare_active) (mddev_t *mddev); - sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster); - int (*resize) (mddev_t *mddev, sector_t sectors); - int (*check_reshape) (mddev_t *mddev); - int (*start_reshape) (mddev_t *mddev); - int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); - /* quiesce moves between quiescence states - * 0 - fully active - * 1 - no new requests allowed - * others - reserved - */ - void (*quiesce) (mddev_t *mddev, int state); -}; - - -struct md_sysfs_entry { - struct attribute attr; - ssize_t (*show)(mddev_t *, char *); - ssize_t (*store)(mddev_t *, const char *, size_t); -}; - - -static inline char * mdname (mddev_t * mddev) -{ - return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; -} - -/* - * iterates through some rdev ringlist. It's safe to remove the - * current 'rdev'. Dont touch 'tmp' though. - */ -#define rdev_for_each_list(rdev, tmp, head) \ - list_for_each_entry_safe(rdev, tmp, head, same_set) - -/* - * iterates through the 'same array disks' ringlist - */ -#define rdev_for_each(rdev, tmp, mddev) \ - list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) - -#define rdev_for_each_rcu(rdev, mddev) \ - list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) - -typedef struct mdk_thread_s { - void (*run) (mddev_t *mddev); - mddev_t *mddev; - wait_queue_head_t wqueue; - unsigned long flags; - struct task_struct *tsk; - unsigned long timeout; -} mdk_thread_t; - -#define THREAD_WAKEUP 0 - -#define __wait_event_lock_irq(wq, condition, lock, cmd) \ -do { \ - wait_queue_t __wait; \ - init_waitqueue_entry(&__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ - spin_lock_irq(&lock); \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define wait_event_lock_irq(wq, condition, lock, cmd) \ -do { \ - if (condition) \ - break; \ - __wait_event_lock_irq(wq, condition, lock, cmd); \ -} while (0) - -static inline void safe_put_page(struct page *p) -{ - if (p) put_page(p); -} - -#endif /* CONFIG_BLOCK */ -#endif - diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h index 7192035fc4b..fb1abb3367e 100644 --- a/include/linux/raid/md_u.h +++ b/include/linux/raid/md_u.h @@ -15,6 +15,24 @@ #ifndef _MD_U_H #define _MD_U_H +/* + * Different major versions are not compatible. + * Different minor versions are only downward compatible. + * Different patchlevel versions are downward and upward compatible. + */ +#define MD_MAJOR_VERSION 0 +#define MD_MINOR_VERSION 90 +/* + * MD_PATCHLEVEL_VERSION indicates kernel functionality. + * >=1 means different superblock formats are selectable using SET_ARRAY_INFO + * and major_version/minor_version accordingly + * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT + * in the super status byte + * >=3 means that bitmap superblock version 4 is supported, which uses + * little-ending representation rather than host-endian + */ +#define MD_PATCHLEVEL_VERSION 3 + /* ioctls */ /* status */ @@ -46,6 +64,12 @@ #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) +/* 63 partitions with the alternate major number (mdp) */ +#define MdpMinorShift 6 +#ifdef __KERNEL__ +extern int mdp_major; +#endif + typedef struct mdu_version_s { int major; int minor; @@ -85,6 +109,17 @@ typedef struct mdu_array_info_s { } mdu_array_info_t; +/* non-obvious values for 'level' */ +#define LEVEL_MULTIPATH (-4) +#define LEVEL_LINEAR (-1) +#define LEVEL_FAULTY (-5) + +/* we need a value for 'no level specified' and 0 + * means 'raid0', so we need something else. This is + * for internal use only + */ +#define LEVEL_NONE (-1000000) + typedef struct mdu_disk_info_s { /* * configuration/status of one particular disk diff --git a/include/linux/raid/multipath.h b/include/linux/raid/multipath.h deleted file mode 100644 index 6f53fc177a4..00000000000 --- a/include/linux/raid/multipath.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef _MULTIPATH_H -#define _MULTIPATH_H - -#include <linux/raid/md.h> - -struct multipath_info { - mdk_rdev_t *rdev; -}; - -struct multipath_private_data { - mddev_t *mddev; - struct multipath_info *multipaths; - int raid_disks; - int working_disks; - spinlock_t device_lock; - struct list_head retry_list; - - mempool_t *pool; -}; - -typedef struct multipath_private_data multipath_conf_t; - -/* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((multipath_conf_t *) mddev->private) - -/* - * this is our 'private' 'collective' MULTIPATH buffer head. - * it contains information about what kind of IO operations were started - * for this MULTIPATH operation, and about their status: - */ - -struct multipath_bh { - mddev_t *mddev; - struct bio *master_bio; - struct bio bio; - int path; - struct list_head retry_list; -}; -#endif diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h new file mode 100644 index 00000000000..d92480f8285 --- /dev/null +++ b/include/linux/raid/pq.h @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2003 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +#ifndef LINUX_RAID_RAID6_H +#define LINUX_RAID_RAID6_H + +#ifdef __KERNEL__ + +/* Set to 1 to use kernel-wide empty_zero_page */ +#define RAID6_USE_EMPTY_ZERO_PAGE 0 +#include <linux/blkdev.h> + +/* We need a pre-zeroed page... if we don't want to use the kernel-provided + one define it here */ +#if RAID6_USE_EMPTY_ZERO_PAGE +# define raid6_empty_zero_page empty_zero_page +#else +extern const char raid6_empty_zero_page[PAGE_SIZE]; +#endif + +#else /* ! __KERNEL__ */ +/* Used for testing in user space */ + +#include <errno.h> +#include <inttypes.h> +#include <limits.h> +#include <stddef.h> +#include <sys/mman.h> +#include <sys/types.h> + +/* Not standard, but glibc defines it */ +#define BITS_PER_LONG __WORDSIZE + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +#ifndef PAGE_SIZE +# define PAGE_SIZE 4096 +#endif +extern const char raid6_empty_zero_page[PAGE_SIZE]; + +#define __init +#define __exit +#define __attribute_const__ __attribute__((const)) +#define noinline __attribute__((noinline)) + +#define preempt_enable() +#define preempt_disable() +#define cpu_has_feature(x) 1 +#define enable_kernel_altivec() +#define disable_kernel_altivec() + +#define EXPORT_SYMBOL(sym) +#define MODULE_LICENSE(licence) +#define subsys_initcall(x) +#define module_exit(x) +#endif /* __KERNEL__ */ + +/* Routine choices */ +struct raid6_calls { + void (*gen_syndrome)(int, size_t, void **); + int (*valid)(void); /* Returns 1 if this routine set is usable */ + const char *name; /* Name of this routine set */ + int prefer; /* Has special performance attribute */ +}; + +/* Selected algorithm */ +extern struct raid6_calls raid6_call; + +/* Algorithm list */ +extern const struct raid6_calls * const raid6_algos[]; +int raid6_select_algo(void); + +/* Return values from chk_syndrome */ +#define RAID6_OK 0 +#define RAID6_P_BAD 1 +#define RAID6_Q_BAD 2 +#define RAID6_PQ_BAD 3 + +/* Galois field tables */ +extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256))); +extern const u8 raid6_gfexp[256] __attribute__((aligned(256))); +extern const u8 raid6_gfinv[256] __attribute__((aligned(256))); +extern const u8 raid6_gfexi[256] __attribute__((aligned(256))); + +/* Recovery routines */ +void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, + void **ptrs); +void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs); +void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, + void **ptrs); + +/* Some definitions to allow code to be compiled for testing in userspace */ +#ifndef __KERNEL__ + +# define jiffies raid6_jiffies() +# define printk printf +# define GFP_KERNEL 0 +# define __get_free_pages(x, y) ((unsigned long)mmap(NULL, PAGE_SIZE << (y), \ + PROT_READ|PROT_WRITE, \ + MAP_PRIVATE|MAP_ANONYMOUS,\ + 0, 0)) +# define free_pages(x, y) munmap((void *)(x), (y)*PAGE_SIZE) + +static inline void cpu_relax(void) +{ + /* Nothing */ +} + +#undef HZ +#define HZ 1000 +static inline uint32_t raid6_jiffies(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec*1000 + tv.tv_usec/1000; +} + +#endif /* ! __KERNEL__ */ + +#endif /* LINUX_RAID_RAID6_H */ diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h deleted file mode 100644 index fd42aa87c39..00000000000 --- a/include/linux/raid/raid0.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef _RAID0_H -#define _RAID0_H - -#include <linux/raid/md.h> - -struct strip_zone -{ - sector_t zone_start; /* Zone offset in md_dev (in sectors) */ - sector_t dev_start; /* Zone offset in real dev (in sectors) */ - sector_t sectors; /* Zone size in sectors */ - int nb_dev; /* # of devices attached to the zone */ - mdk_rdev_t **dev; /* Devices attached to the zone */ -}; - -struct raid0_private_data -{ - struct strip_zone **hash_table; /* Table of indexes into strip_zone */ - struct strip_zone *strip_zone; - mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ - int nr_strip_zones; - - sector_t spacing; - int sector_shift; /* shift this before divide by spacing */ -}; - -typedef struct raid0_private_data raid0_conf_t; - -#define mddev_to_conf(mddev) ((raid0_conf_t *) mddev->private) - -#endif diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h deleted file mode 100644 index 0a9ba7c3302..00000000000 --- a/include/linux/raid/raid1.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef _RAID1_H -#define _RAID1_H - -#include <linux/raid/md.h> - -typedef struct mirror_info mirror_info_t; - -struct mirror_info { - mdk_rdev_t *rdev; - sector_t head_position; -}; - -/* - * memory pools need a pointer to the mddev, so they can force an unplug - * when memory is tight, and a count of the number of drives that the - * pool was allocated for, so they know how much to allocate and free. - * mddev->raid_disks cannot be used, as it can change while a pool is active - * These two datums are stored in a kmalloced struct. - */ - -struct pool_info { - mddev_t *mddev; - int raid_disks; -}; - - -typedef struct r1bio_s r1bio_t; - -struct r1_private_data_s { - mddev_t *mddev; - mirror_info_t *mirrors; - int raid_disks; - int last_used; - sector_t next_seq_sect; - spinlock_t device_lock; - - struct list_head retry_list; - /* queue pending writes and submit them on unplug */ - struct bio_list pending_bio_list; - /* queue of writes that have been unplugged */ - struct bio_list flushing_bio_list; - - /* for use when syncing mirrors: */ - - spinlock_t resync_lock; - int nr_pending; - int nr_waiting; - int nr_queued; - int barrier; - sector_t next_resync; - int fullsync; /* set to 1 if a full sync is needed, - * (fresh device added). - * Cleared when a sync completes. - */ - - wait_queue_head_t wait_barrier; - - struct pool_info *poolinfo; - - struct page *tmppage; - - mempool_t *r1bio_pool; - mempool_t *r1buf_pool; -}; - -typedef struct r1_private_data_s conf_t; - -/* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((conf_t *) mddev->private) - -/* - * this is our 'private' RAID1 bio. - * - * it contains information about what kind of IO operations were started - * for this RAID1 operation, and about their status: - */ - -struct r1bio_s { - atomic_t remaining; /* 'have we finished' count, - * used from IRQ handlers - */ - atomic_t behind_remaining; /* number of write-behind ios remaining - * in this BehindIO request - */ - sector_t sector; - int sectors; - unsigned long state; - mddev_t *mddev; - /* - * original bio going to /dev/mdx - */ - struct bio *master_bio; - /* - * if the IO is in READ direction, then this is where we read - */ - int read_disk; - - struct list_head retry_list; - struct bitmap_update *bitmap_update; - /* - * if the IO is in WRITE direction, then multiple bios are used. - * We choose the number when they are allocated. - */ - struct bio *bios[0]; - /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ -}; - -/* when we get a read error on a read-only array, we redirect to another - * device without failing the first device, or trying to over-write to - * correct the read error. To keep track of bad blocks on a per-bio - * level, we store IO_BLOCKED in the appropriate 'bios' pointer - */ -#define IO_BLOCKED ((struct bio*)1) - -/* bits for r1bio.state */ -#define R1BIO_Uptodate 0 -#define R1BIO_IsSync 1 -#define R1BIO_Degraded 2 -#define R1BIO_BehindIO 3 -#define R1BIO_Barrier 4 -#define R1BIO_BarrierRetry 5 -/* For write-behind requests, we call bi_end_io when - * the last non-write-behind device completes, providing - * any write was successful. Otherwise we call when - * any write-behind write succeeds, otherwise we call - * with failure when last write completes (and all failed). - * Record that bi_end_io was called with this flag... - */ -#define R1BIO_Returned 6 - -#endif diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h deleted file mode 100644 index e9091cfeb28..00000000000 --- a/include/linux/raid/raid10.h +++ /dev/null @@ -1,123 +0,0 @@ -#ifndef _RAID10_H -#define _RAID10_H - -#include <linux/raid/md.h> - -typedef struct mirror_info mirror_info_t; - -struct mirror_info { - mdk_rdev_t *rdev; - sector_t head_position; -}; - -typedef struct r10bio_s r10bio_t; - -struct r10_private_data_s { - mddev_t *mddev; - mirror_info_t *mirrors; - int raid_disks; - spinlock_t device_lock; - - /* geometry */ - int near_copies; /* number of copies layed out raid0 style */ - int far_copies; /* number of copies layed out - * at large strides across drives - */ - int far_offset; /* far_copies are offset by 1 stripe - * instead of many - */ - int copies; /* near_copies * far_copies. - * must be <= raid_disks - */ - sector_t stride; /* distance between far copies. - * This is size / far_copies unless - * far_offset, in which case it is - * 1 stripe. - */ - - int chunk_shift; /* shift from chunks to sectors */ - sector_t chunk_mask; - - struct list_head retry_list; - /* queue pending writes and submit them on unplug */ - struct bio_list pending_bio_list; - - - spinlock_t resync_lock; - int nr_pending; - int nr_waiting; - int nr_queued; - int barrier; - sector_t next_resync; - int fullsync; /* set to 1 if a full sync is needed, - * (fresh device added). - * Cleared when a sync completes. - */ - - wait_queue_head_t wait_barrier; - - mempool_t *r10bio_pool; - mempool_t *r10buf_pool; - struct page *tmppage; -}; - -typedef struct r10_private_data_s conf_t; - -/* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((conf_t *) mddev->private) - -/* - * this is our 'private' RAID10 bio. - * - * it contains information about what kind of IO operations were started - * for this RAID10 operation, and about their status: - */ - -struct r10bio_s { - atomic_t remaining; /* 'have we finished' count, - * used from IRQ handlers - */ - sector_t sector; /* virtual sector number */ - int sectors; - unsigned long state; - mddev_t *mddev; - /* - * original bio going to /dev/mdx - */ - struct bio *master_bio; - /* - * if the IO is in READ direction, then this is where we read - */ - int read_slot; - - struct list_head retry_list; - /* - * if the IO is in WRITE direction, then multiple bios are used, - * one for each copy. - * When resyncing we also use one for each copy. - * When reconstructing, we use 2 bios, one for read, one for write. - * We choose the number when they are allocated. - */ - struct { - struct bio *bio; - sector_t addr; - int devnum; - } devs[0]; -}; - -/* when we get a read error on a read-only array, we redirect to another - * device without failing the first device, or trying to over-write to - * correct the read error. To keep track of bad blocks on a per-bio - * level, we store IO_BLOCKED in the appropriate 'bios' pointer - */ -#define IO_BLOCKED ((struct bio*)1) - -/* bits for r10bio.state */ -#define R10BIO_Uptodate 0 -#define R10BIO_IsSync 1 -#define R10BIO_IsRecover 2 -#define R10BIO_Degraded 3 -#endif diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h deleted file mode 100644 index 3b267279245..00000000000 --- a/include/linux/raid/raid5.h +++ /dev/null @@ -1,402 +0,0 @@ -#ifndef _RAID5_H -#define _RAID5_H - -#include <linux/raid/md.h> -#include <linux/raid/xor.h> - -/* - * - * Each stripe contains one buffer per disc. Each buffer can be in - * one of a number of states stored in "flags". Changes between - * these states happen *almost* exclusively under a per-stripe - * spinlock. Some very specific changes can happen in bi_end_io, and - * these are not protected by the spin lock. - * - * The flag bits that are used to represent these states are: - * R5_UPTODATE and R5_LOCKED - * - * State Empty == !UPTODATE, !LOCK - * We have no data, and there is no active request - * State Want == !UPTODATE, LOCK - * A read request is being submitted for this block - * State Dirty == UPTODATE, LOCK - * Some new data is in this buffer, and it is being written out - * State Clean == UPTODATE, !LOCK - * We have valid data which is the same as on disc - * - * The possible state transitions are: - * - * Empty -> Want - on read or write to get old data for parity calc - * Empty -> Dirty - on compute_parity to satisfy write/sync request.(RECONSTRUCT_WRITE) - * Empty -> Clean - on compute_block when computing a block for failed drive - * Want -> Empty - on failed read - * Want -> Clean - on successful completion of read request - * Dirty -> Clean - on successful completion of write request - * Dirty -> Clean - on failed write - * Clean -> Dirty - on compute_parity to satisfy write/sync (RECONSTRUCT or RMW) - * - * The Want->Empty, Want->Clean, Dirty->Clean, transitions - * all happen in b_end_io at interrupt time. - * Each sets the Uptodate bit before releasing the Lock bit. - * This leaves one multi-stage transition: - * Want->Dirty->Clean - * This is safe because thinking that a Clean buffer is actually dirty - * will at worst delay some action, and the stripe will be scheduled - * for attention after the transition is complete. - * - * There is one possibility that is not covered by these states. That - * is if one drive has failed and there is a spare being rebuilt. We - * can't distinguish between a clean block that has been generated - * from parity calculations, and a clean block that has been - * successfully written to the spare ( or to parity when resyncing). - * To distingush these states we have a stripe bit STRIPE_INSYNC that - * is set whenever a write is scheduled to the spare, or to the parity - * disc if there is no spare. A sync request clears this bit, and - * when we find it set with no buffers locked, we know the sync is - * complete. - * - * Buffers for the md device that arrive via make_request are attached - * to the appropriate stripe in one of two lists linked on b_reqnext. - * One list (bh_read) for read requests, one (bh_write) for write. - * There should never be more than one buffer on the two lists - * together, but we are not guaranteed of that so we allow for more. - * - * If a buffer is on the read list when the associated cache buffer is - * Uptodate, the data is copied into the read buffer and it's b_end_io - * routine is called. This may happen in the end_request routine only - * if the buffer has just successfully been read. end_request should - * remove the buffers from the list and then set the Uptodate bit on - * the buffer. Other threads may do this only if they first check - * that the Uptodate bit is set. Once they have checked that they may - * take buffers off the read queue. - * - * When a buffer on the write list is committed for write it is copied - * into the cache buffer, which is then marked dirty, and moved onto a - * third list, the written list (bh_written). Once both the parity - * block and the cached buffer are successfully written, any buffer on - * a written list can be returned with b_end_io. - * - * The write list and read list both act as fifos. The read list is - * protected by the device_lock. The write and written lists are - * protected by the stripe lock. The device_lock, which can be - * claimed while the stipe lock is held, is only for list - * manipulations and will only be held for a very short time. It can - * be claimed from interrupts. - * - * - * Stripes in the stripe cache can be on one of two lists (or on - * neither). The "inactive_list" contains stripes which are not - * currently being used for any request. They can freely be reused - * for another stripe. The "handle_list" contains stripes that need - * to be handled in some way. Both of these are fifo queues. Each - * stripe is also (potentially) linked to a hash bucket in the hash - * table so that it can be found by sector number. Stripes that are - * not hashed must be on the inactive_list, and will normally be at - * the front. All stripes start life this way. - * - * The inactive_list, handle_list and hash bucket lists are all protected by the - * device_lock. - * - stripes on the inactive_list never have their stripe_lock held. - * - stripes have a reference counter. If count==0, they are on a list. - * - If a stripe might need handling, STRIPE_HANDLE is set. - * - When refcount reaches zero, then if STRIPE_HANDLE it is put on - * handle_list else inactive_list - * - * This, combined with the fact that STRIPE_HANDLE is only ever - * cleared while a stripe has a non-zero count means that if the - * refcount is 0 and STRIPE_HANDLE is set, then it is on the - * handle_list and if recount is 0 and STRIPE_HANDLE is not set, then - * the stripe is on inactive_list. - * - * The possible transitions are: - * activate an unhashed/inactive stripe (get_active_stripe()) - * lockdev check-hash unlink-stripe cnt++ clean-stripe hash-stripe unlockdev - * activate a hashed, possibly active stripe (get_active_stripe()) - * lockdev check-hash if(!cnt++)unlink-stripe unlockdev - * attach a request to an active stripe (add_stripe_bh()) - * lockdev attach-buffer unlockdev - * handle a stripe (handle_stripe()) - * lockstripe clrSTRIPE_HANDLE ... - * (lockdev check-buffers unlockdev) .. - * change-state .. - * record io/ops needed unlockstripe schedule io/ops - * release an active stripe (release_stripe()) - * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev - * - * The refcount counts each thread that have activated the stripe, - * plus raid5d if it is handling it, plus one for each active request - * on a cached buffer, and plus one if the stripe is undergoing stripe - * operations. - * - * Stripe operations are performed outside the stripe lock, - * the stripe operations are: - * -copying data between the stripe cache and user application buffers - * -computing blocks to save a disk access, or to recover a missing block - * -updating the parity on a write operation (reconstruct write and - * read-modify-write) - * -checking parity correctness - * -running i/o to disk - * These operations are carried out by raid5_run_ops which uses the async_tx - * api to (optionally) offload operations to dedicated hardware engines. - * When requesting an operation handle_stripe sets the pending bit for the - * operation and increments the count. raid5_run_ops is then run whenever - * the count is non-zero. - * There are some critical dependencies between the operations that prevent some - * from being requested while another is in flight. - * 1/ Parity check operations destroy the in cache version of the parity block, - * so we prevent parity dependent operations like writes and compute_blocks - * from starting while a check is in progress. Some dma engines can perform - * the check without damaging the parity block, in these cases the parity - * block is re-marked up to date (assuming the check was successful) and is - * not re-read from disk. - * 2/ When a write operation is requested we immediately lock the affected - * blocks, and mark them as not up to date. This causes new read requests - * to be held off, as well as parity checks and compute block operations. - * 3/ Once a compute block operation has been requested handle_stripe treats - * that block as if it is up to date. raid5_run_ops guaruntees that any - * operation that is dependent on the compute block result is initiated after - * the compute block completes. - */ - -/* - * Operations state - intermediate states that are visible outside of sh->lock - * In general _idle indicates nothing is running, _run indicates a data - * processing operation is active, and _result means the data processing result - * is stable and can be acted upon. For simple operations like biofill and - * compute that only have an _idle and _run state they are indicated with - * sh->state flags (STRIPE_BIOFILL_RUN and STRIPE_COMPUTE_RUN) - */ -/** - * enum check_states - handles syncing / repairing a stripe - * @check_state_idle - check operations are quiesced - * @check_state_run - check operation is running - * @check_state_result - set outside lock when check result is valid - * @check_state_compute_run - check failed and we are repairing - * @check_state_compute_result - set outside lock when compute result is valid - */ -enum check_states { - check_state_idle = 0, - check_state_run, /* parity check */ - check_state_check_result, - check_state_compute_run, /* parity repair */ - check_state_compute_result, -}; - -/** - * enum reconstruct_states - handles writing or expanding a stripe - */ -enum reconstruct_states { - reconstruct_state_idle = 0, - reconstruct_state_prexor_drain_run, /* prexor-write */ - reconstruct_state_drain_run, /* write */ - reconstruct_state_run, /* expand */ - reconstruct_state_prexor_drain_result, - reconstruct_state_drain_result, - reconstruct_state_result, -}; - -struct stripe_head { - struct hlist_node hash; - struct list_head lru; /* inactive_list or handle_list */ - struct raid5_private_data *raid_conf; - sector_t sector; /* sector of this row */ - int pd_idx; /* parity disk index */ - unsigned long state; /* state flags */ - atomic_t count; /* nr of active thread/requests */ - spinlock_t lock; - int bm_seq; /* sequence number for bitmap flushes */ - int disks; /* disks in stripe */ - enum check_states check_state; - enum reconstruct_states reconstruct_state; - /* stripe_operations - * @target - STRIPE_OP_COMPUTE_BLK target - */ - struct stripe_operations { - int target; - u32 zero_sum_result; - } ops; - struct r5dev { - struct bio req; - struct bio_vec vec; - struct page *page; - struct bio *toread, *read, *towrite, *written; - sector_t sector; /* sector of this page */ - unsigned long flags; - } dev[1]; /* allocated with extra space depending of RAID geometry */ -}; - -/* stripe_head_state - collects and tracks the dynamic state of a stripe_head - * for handle_stripe. It is only valid under spin_lock(sh->lock); - */ -struct stripe_head_state { - int syncing, expanding, expanded; - int locked, uptodate, to_read, to_write, failed, written; - int to_fill, compute, req_compute, non_overwrite; - int failed_num; - unsigned long ops_request; -}; - -/* r6_state - extra state data only relevant to r6 */ -struct r6_state { - int p_failed, q_failed, qd_idx, failed_num[2]; -}; - -/* Flags */ -#define R5_UPTODATE 0 /* page contains current data */ -#define R5_LOCKED 1 /* IO has been submitted on "req" */ -#define R5_OVERWRITE 2 /* towrite covers whole page */ -/* and some that are internal to handle_stripe */ -#define R5_Insync 3 /* rdev && rdev->in_sync at start */ -#define R5_Wantread 4 /* want to schedule a read */ -#define R5_Wantwrite 5 -#define R5_Overlap 7 /* There is a pending overlapping request on this block */ -#define R5_ReadError 8 /* seen a read error here recently */ -#define R5_ReWrite 9 /* have tried to over-write the readerror */ - -#define R5_Expanded 10 /* This block now has post-expand data */ -#define R5_Wantcompute 11 /* compute_block in progress treat as - * uptodate - */ -#define R5_Wantfill 12 /* dev->toread contains a bio that needs - * filling - */ -#define R5_Wantdrain 13 /* dev->towrite needs to be drained */ -/* - * Write method - */ -#define RECONSTRUCT_WRITE 1 -#define READ_MODIFY_WRITE 2 -/* not a write method, but a compute_parity mode */ -#define CHECK_PARITY 3 - -/* - * Stripe state - */ -#define STRIPE_HANDLE 2 -#define STRIPE_SYNCING 3 -#define STRIPE_INSYNC 4 -#define STRIPE_PREREAD_ACTIVE 5 -#define STRIPE_DELAYED 6 -#define STRIPE_DEGRADED 7 -#define STRIPE_BIT_DELAY 8 -#define STRIPE_EXPANDING 9 -#define STRIPE_EXPAND_SOURCE 10 -#define STRIPE_EXPAND_READY 11 -#define STRIPE_IO_STARTED 12 /* do not count towards 'bypass_count' */ -#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */ -#define STRIPE_BIOFILL_RUN 14 -#define STRIPE_COMPUTE_RUN 15 -/* - * Operation request flags - */ -#define STRIPE_OP_BIOFILL 0 -#define STRIPE_OP_COMPUTE_BLK 1 -#define STRIPE_OP_PREXOR 2 -#define STRIPE_OP_BIODRAIN 3 -#define STRIPE_OP_POSTXOR 4 -#define STRIPE_OP_CHECK 5 - -/* - * Plugging: - * - * To improve write throughput, we need to delay the handling of some - * stripes until there has been a chance that several write requests - * for the one stripe have all been collected. - * In particular, any write request that would require pre-reading - * is put on a "delayed" queue until there are no stripes currently - * in a pre-read phase. Further, if the "delayed" queue is empty when - * a stripe is put on it then we "plug" the queue and do not process it - * until an unplug call is made. (the unplug_io_fn() is called). - * - * When preread is initiated on a stripe, we set PREREAD_ACTIVE and add - * it to the count of prereading stripes. - * When write is initiated, or the stripe refcnt == 0 (just in case) we - * clear the PREREAD_ACTIVE flag and decrement the count - * Whenever the 'handle' queue is empty and the device is not plugged, we - * move any strips from delayed to handle and clear the DELAYED flag and set - * PREREAD_ACTIVE. - * In stripe_handle, if we find pre-reading is necessary, we do it if - * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue. - * HANDLE gets cleared if stripe_handle leave nothing locked. - */ - - -struct disk_info { - mdk_rdev_t *rdev; -}; - -struct raid5_private_data { - struct hlist_head *stripe_hashtbl; - mddev_t *mddev; - struct disk_info *spare; - int chunk_size, level, algorithm; - int max_degraded; - int raid_disks; - int max_nr_stripes; - - /* used during an expand */ - sector_t expand_progress; /* MaxSector when no expand happening */ - sector_t expand_lo; /* from here up to expand_progress it out-of-bounds - * as we haven't flushed the metadata yet - */ - int previous_raid_disks; - - struct list_head handle_list; /* stripes needing handling */ - struct list_head hold_list; /* preread ready stripes */ - struct list_head delayed_list; /* stripes that have plugged requests */ - struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ - struct bio *retry_read_aligned; /* currently retrying aligned bios */ - struct bio *retry_read_aligned_list; /* aligned bios retry list */ - atomic_t preread_active_stripes; /* stripes with scheduled io */ - atomic_t active_aligned_reads; - atomic_t pending_full_writes; /* full write backlog */ - int bypass_count; /* bypassed prereads */ - int bypass_threshold; /* preread nice */ - struct list_head *last_hold; /* detect hold_list promotions */ - - atomic_t reshape_stripes; /* stripes with pending writes for reshape */ - /* unfortunately we need two cache names as we temporarily have - * two caches. - */ - int active_name; - char cache_name[2][20]; - struct kmem_cache *slab_cache; /* for allocating stripes */ - - int seq_flush, seq_write; - int quiesce; - - int fullsync; /* set to 1 if a full sync is needed, - * (fresh device added). - * Cleared when a sync completes. - */ - - struct page *spare_page; /* Used when checking P/Q in raid6 */ - - /* - * Free stripes pool - */ - atomic_t active_stripes; - struct list_head inactive_list; - wait_queue_head_t wait_for_stripe; - wait_queue_head_t wait_for_overlap; - int inactive_blocked; /* release of inactive stripes blocked, - * waiting for 25% to be free - */ - int pool_size; /* number of disks in stripeheads in pool */ - spinlock_t device_lock; - struct disk_info *disks; -}; - -typedef struct raid5_private_data raid5_conf_t; - -#define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private) - -/* - * Our supported algorithms - */ -#define ALGORITHM_LEFT_ASYMMETRIC 0 -#define ALGORITHM_RIGHT_ASYMMETRIC 1 -#define ALGORITHM_LEFT_SYMMETRIC 2 -#define ALGORITHM_RIGHT_SYMMETRIC 3 - -#endif diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 3e120587ead..5a210959e3f 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -1,8 +1,6 @@ #ifndef _XOR_H #define _XOR_H -#include <linux/raid/md.h> - #define MAX_XOR_BLOCKS 4 extern void xor_blocks(unsigned int count, unsigned int bytes, diff --git a/include/linux/rtc-v3020.h b/include/linux/rtc-v3020.h index bf74e63c98f..8ba646e610d 100644 --- a/include/linux/rtc-v3020.h +++ b/include/linux/rtc-v3020.h @@ -14,6 +14,12 @@ * is used depends on the board. */ struct v3020_platform_data { int leftshift; /* (1<<(leftshift)) & readl() */ + + int use_gpio:1; + unsigned int gpio_cs; + unsigned int gpio_wr; + unsigned int gpio_rd; + unsigned int gpio_io; }; #define V3020_STATUS_0 0x00 diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 4046b75563c..60f88a7fb13 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -99,6 +99,7 @@ struct rtc_pll_info { #ifdef __KERNEL__ +#include <linux/types.h> #include <linux/interrupt.h> extern int rtc_month_days(unsigned int month, unsigned int year); @@ -232,6 +233,11 @@ int rtc_register(rtc_task_t *task); int rtc_unregister(rtc_task_t *task); int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg); +static inline bool is_leap_year(unsigned int year) +{ + return (!(year % 4) && (year % 100)) || !(year % 400); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_RTC_H_ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 29df6374d2d..9da5aa0771e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -68,7 +68,7 @@ struct sched_param { #include <linux/smp.h> #include <linux/sem.h> #include <linux/signal.h> -#include <linux/fs_struct.h> +#include <linux/path.h> #include <linux/compiler.h> #include <linux/completion.h> #include <linux/pid.h> @@ -97,6 +97,7 @@ struct futex_pi_state; struct robust_list_head; struct bio; struct bts_tracer; +struct fs_struct; /* * List of flags we want to share for kernel threads, @@ -391,8 +392,15 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); (mm)->hiwater_vm = (mm)->total_vm; \ } while (0) -#define get_mm_hiwater_rss(mm) max((mm)->hiwater_rss, get_mm_rss(mm)) -#define get_mm_hiwater_vm(mm) max((mm)->hiwater_vm, (mm)->total_vm) +static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm) +{ + return max(mm->hiwater_rss, get_mm_rss(mm)); +} + +static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm) +{ + return max(mm->hiwater_vm, mm->total_vm); +} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); @@ -540,25 +548,8 @@ struct signal_struct { struct list_head cpu_timers[3]; - /* job control IDs */ - - /* - * pgrp and session fields are deprecated. - * use the task_session_Xnr and task_pgrp_Xnr routines below - */ - - union { - pid_t pgrp __deprecated; - pid_t __pgrp; - }; - struct pid *tty_old_pgrp; - union { - pid_t session __deprecated; - pid_t __session; - }; - /* boolean value for session group leader */ int leader; @@ -1462,16 +1453,6 @@ static inline int rt_task(struct task_struct *p) return rt_prio(p->prio); } -static inline void set_task_session(struct task_struct *tsk, pid_t session) -{ - tsk->signal->__session = session; -} - -static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp) -{ - tsk->signal->__pgrp = pgrp; -} - static inline struct pid *task_pid(struct task_struct *task) { return task->pids[PIDTYPE_PID].pid; @@ -1482,6 +1463,11 @@ static inline struct pid *task_tgid(struct task_struct *task) return task->group_leader->pids[PIDTYPE_PID].pid; } +/* + * Without tasklist or rcu lock it is not safe to dereference + * the result of task_pgrp/task_session even if task == current, + * we can race with another thread doing sys_setsid/sys_setpgid. + */ static inline struct pid *task_pgrp(struct task_struct *task) { return task->group_leader->pids[PIDTYPE_PGID].pid; @@ -1507,17 +1493,23 @@ struct pid_namespace; * * see also pid_nr() etc in include/linux/pid.h */ +pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, + struct pid_namespace *ns); static inline pid_t task_pid_nr(struct task_struct *tsk) { return tsk->pid; } -pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); +static inline pid_t task_pid_nr_ns(struct task_struct *tsk, + struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); +} static inline pid_t task_pid_vnr(struct task_struct *tsk) { - return pid_vnr(task_pid(tsk)); + return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); } @@ -1534,31 +1526,34 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk) } -static inline pid_t task_pgrp_nr(struct task_struct *tsk) +static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, + struct pid_namespace *ns) { - return tsk->signal->__pgrp; + return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); } -pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); - static inline pid_t task_pgrp_vnr(struct task_struct *tsk) { - return pid_vnr(task_pgrp(tsk)); + return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); } -static inline pid_t task_session_nr(struct task_struct *tsk) +static inline pid_t task_session_nr_ns(struct task_struct *tsk, + struct pid_namespace *ns) { - return tsk->signal->__session; + return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); } -pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); - static inline pid_t task_session_vnr(struct task_struct *tsk) { - return pid_vnr(task_session(tsk)); + return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } +/* obsolete, do not use */ +static inline pid_t task_pgrp_nr(struct task_struct *tsk) +{ + return task_pgrp_nr_ns(tsk, &init_pid_ns); +} /** * pid_alive - check that a task structure is not stale @@ -1968,7 +1963,8 @@ extern void mm_release(struct task_struct *, struct mm_struct *); /* Allocate a new mm structure and copy contents from tsk->mm */ extern struct mm_struct *dup_mm(struct task_struct *tsk); -extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); +extern int copy_thread(unsigned long, unsigned long, unsigned long, + struct task_struct *, struct pt_regs *); extern void flush_thread(void); extern void exit_thread(void); @@ -2053,6 +2049,11 @@ static inline int thread_group_empty(struct task_struct *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) +static inline int task_detached(struct task_struct *p) +{ + return p->exit_signal == -1; +} + /* * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h new file mode 100644 index 00000000000..85958277f83 --- /dev/null +++ b/include/linux/slow-work.h @@ -0,0 +1,95 @@ +/* Worker thread pool for slow items, such as filesystem lookups or mkdirs + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + * + * See Documentation/slow-work.txt + */ + +#ifndef _LINUX_SLOW_WORK_H +#define _LINUX_SLOW_WORK_H + +#ifdef CONFIG_SLOW_WORK + +#include <linux/sysctl.h> + +struct slow_work; + +/* + * The operations used to support slow work items + */ +struct slow_work_ops { + /* get a ref on a work item + * - return 0 if successful, -ve if not + */ + int (*get_ref)(struct slow_work *work); + + /* discard a ref to a work item */ + void (*put_ref)(struct slow_work *work); + + /* execute a work item */ + void (*execute)(struct slow_work *work); +}; + +/* + * A slow work item + * - A reference is held on the parent object by the thread pool when it is + * queued + */ +struct slow_work { + unsigned long flags; +#define SLOW_WORK_PENDING 0 /* item pending (further) execution */ +#define SLOW_WORK_EXECUTING 1 /* item currently executing */ +#define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ +#define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ + const struct slow_work_ops *ops; /* operations table for this item */ + struct list_head link; /* link in queue */ +}; + +/** + * slow_work_init - Initialise a slow work item + * @work: The work item to initialise + * @ops: The operations to use to handle the slow work item + * + * Initialise a slow work item. + */ +static inline void slow_work_init(struct slow_work *work, + const struct slow_work_ops *ops) +{ + work->flags = 0; + work->ops = ops; + INIT_LIST_HEAD(&work->link); +} + +/** + * slow_work_init - Initialise a very slow work item + * @work: The work item to initialise + * @ops: The operations to use to handle the slow work item + * + * Initialise a very slow work item. This item will be restricted such that + * only a certain number of the pool threads will be able to execute items of + * this type. + */ +static inline void vslow_work_init(struct slow_work *work, + const struct slow_work_ops *ops) +{ + work->flags = 1 << SLOW_WORK_VERY_SLOW; + work->ops = ops; + INIT_LIST_HEAD(&work->link); +} + +extern int slow_work_enqueue(struct slow_work *work); +extern int slow_work_register_user(void); +extern void slow_work_unregister_user(void); + +#ifdef CONFIG_SYSCTL +extern ctl_table slow_work_sysctls[]; +#endif + +#endif /* CONFIG_SLOW_WORK */ +#endif /* _LINUX_SLOW_WORK_H */ diff --git a/include/linux/spi/eeprom.h b/include/linux/spi/eeprom.h index 1085212c446..306e7b1c69e 100644 --- a/include/linux/spi/eeprom.h +++ b/include/linux/spi/eeprom.h @@ -1,6 +1,8 @@ #ifndef __LINUX_SPI_EEPROM_H #define __LINUX_SPI_EEPROM_H +#include <linux/memory.h> + /* * Put one of these structures in platform_data for SPI EEPROMS handled * by the "at25" driver. On SPI, most EEPROMS understand the same core @@ -17,6 +19,10 @@ struct spi_eeprom { #define EE_ADDR2 0x0002 /* 16 bit addrs */ #define EE_ADDR3 0x0004 /* 24 bit addrs */ #define EE_READONLY 0x0008 /* disallow writes */ + + /* for exporting this chip's data to other kernel code */ + void (*setup)(struct memory_accessor *mem, void *context); + void *context; }; #endif /* __LINUX_SPI_EEPROM_H */ diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h index 0f01a0f1f40..ca6782ee4b9 100644 --- a/include/linux/spi/spi_gpio.h +++ b/include/linux/spi/spi_gpio.h @@ -25,10 +25,16 @@ * ... * }; * + * If chipselect is not used (there's only one device on the bus), assign + * SPI_GPIO_NO_CHIPSELECT to the controller_data: + * .controller_data = (void *) SPI_GPIO_NO_CHIPSELECT; + * * If the bitbanged bus is later switched to a "native" controller, * that platform_device and controller_data should be removed. */ +#define SPI_GPIO_NO_CHIPSELECT ((unsigned long)-1l) + /** * struct spi_gpio_platform_data - parameter for bitbanged SPI master * @sck: number of the GPIO used for clock output diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index a0c66a2e00a..252b245cfcf 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -153,9 +153,11 @@ do { \ extern int _raw_spin_trylock(spinlock_t *lock); extern void _raw_spin_unlock(spinlock_t *lock); extern void _raw_read_lock(rwlock_t *lock); +#define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock) extern int _raw_read_trylock(rwlock_t *lock); extern void _raw_read_unlock(rwlock_t *lock); extern void _raw_write_lock(rwlock_t *lock); +#define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock) extern int _raw_write_trylock(rwlock_t *lock); extern void _raw_write_unlock(rwlock_t *lock); #else @@ -165,9 +167,13 @@ do { \ # define _raw_spin_trylock(lock) __raw_spin_trylock(&(lock)->raw_lock) # define _raw_spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock) # define _raw_read_lock(rwlock) __raw_read_lock(&(rwlock)->raw_lock) +# define _raw_read_lock_flags(lock, flags) \ + __raw_read_lock_flags(&(lock)->raw_lock, *(flags)) # define _raw_read_trylock(rwlock) __raw_read_trylock(&(rwlock)->raw_lock) # define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock) # define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock) +# define _raw_write_lock_flags(lock, flags) \ + __raw_write_lock_flags(&(lock)->raw_lock, *(flags)) # define _raw_write_trylock(rwlock) __raw_write_trylock(&(rwlock)->raw_lock) # define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock) #endif diff --git a/include/linux/string.h b/include/linux/string.h index d18fc198aa2..8852739f36d 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -12,6 +12,7 @@ #include <linux/stddef.h> /* for NULL */ extern char *strndup_user(const char __user *, long); +extern void *memdup_user(const void __user *, size_t); /* * Include machine specific inline routines diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3435d24bfe5..d3a4c023193 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -69,7 +69,6 @@ struct svc_serv { struct list_head sv_tempsocks; /* all temporary sockets */ int sv_tmpcnt; /* count of temporary sockets */ struct timer_list sv_temptimer; /* timer for aging temporary sockets */ - sa_family_t sv_family; /* listener's address family */ char * sv_name; /* service name */ @@ -385,19 +384,19 @@ struct svc_procedure { /* * Function prototypes. */ -struct svc_serv *svc_create(struct svc_program *, unsigned int, sa_family_t, +struct svc_serv *svc_create(struct svc_program *, unsigned int, void (*shutdown)(struct svc_serv *)); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - sa_family_t, void (*shutdown)(struct svc_serv *), + void (*shutdown)(struct svc_serv *), svc_thread_fn, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); void svc_destroy(struct svc_serv *); int svc_process(struct svc_rqst *); -int svc_register(const struct svc_serv *, const unsigned short, - const unsigned short); +int svc_register(const struct svc_serv *, const int, + const unsigned short, const unsigned short); void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0127daca435..0d9cb6ef28b 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -71,7 +71,8 @@ int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_create_xprt(struct svc_serv *, char *, unsigned short, int); +int svc_create_xprt(struct svc_serv *, const char *, const int, + const unsigned short, int); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_received(struct svc_xprt *); void svc_xprt_put(struct svc_xprt *xprt); @@ -80,7 +81,8 @@ void svc_close_xprt(struct svc_xprt *xprt); void svc_delete_xprt(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); -struct svc_xprt *svc_find_xprt(struct svc_serv *, char *, int, int); +struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, + const sa_family_t af, const unsigned short port); int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen); static inline void svc_xprt_get(struct svc_xprt *xprt) @@ -88,29 +90,32 @@ static inline void svc_xprt_get(struct svc_xprt *xprt) kref_get(&xprt->xpt_ref); } static inline void svc_xprt_set_local(struct svc_xprt *xprt, - struct sockaddr *sa, int salen) + const struct sockaddr *sa, + const size_t salen) { memcpy(&xprt->xpt_local, sa, salen); xprt->xpt_locallen = salen; } static inline void svc_xprt_set_remote(struct svc_xprt *xprt, - struct sockaddr *sa, int salen) + const struct sockaddr *sa, + const size_t salen) { memcpy(&xprt->xpt_remote, sa, salen); xprt->xpt_remotelen = salen; } -static inline unsigned short svc_addr_port(struct sockaddr *sa) +static inline unsigned short svc_addr_port(const struct sockaddr *sa) { - unsigned short ret = 0; + const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa; + switch (sa->sa_family) { case AF_INET: - ret = ntohs(((struct sockaddr_in *)sa)->sin_port); - break; + return ntohs(sin->sin_port); case AF_INET6: - ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port); - break; + return ntohs(sin6->sin6_port); } - return ret; + + return 0; } static inline size_t svc_addr_len(struct sockaddr *sa) @@ -124,36 +129,39 @@ static inline size_t svc_addr_len(struct sockaddr *sa) return -EAFNOSUPPORT; } -static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt) +static inline unsigned short svc_xprt_local_port(const struct svc_xprt *xprt) { - return svc_addr_port((struct sockaddr *)&xprt->xpt_local); + return svc_addr_port((const struct sockaddr *)&xprt->xpt_local); } -static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt) +static inline unsigned short svc_xprt_remote_port(const struct svc_xprt *xprt) { - return svc_addr_port((struct sockaddr *)&xprt->xpt_remote); + return svc_addr_port((const struct sockaddr *)&xprt->xpt_remote); } -static inline char *__svc_print_addr(struct sockaddr *addr, - char *buf, size_t len) +static inline char *__svc_print_addr(const struct sockaddr *addr, + char *buf, const size_t len) { + const struct sockaddr_in *sin = (const struct sockaddr_in *)addr; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)addr; + switch (addr->sa_family) { case AF_INET: - snprintf(buf, len, "%pI4, port=%u", - &((struct sockaddr_in *)addr)->sin_addr, - ntohs(((struct sockaddr_in *) addr)->sin_port)); + snprintf(buf, len, "%pI4, port=%u", &sin->sin_addr, + ntohs(sin->sin_port)); break; case AF_INET6: snprintf(buf, len, "%pI6, port=%u", - &((struct sockaddr_in6 *)addr)->sin6_addr, - ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); + &sin6->sin6_addr, + ntohs(sin6->sin6_port)); break; default: snprintf(buf, len, "unknown address type: %d", addr->sa_family); break; } + return buf; } #endif /* SUNRPC_SVC_XPRT_H */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 11fc71d50c1..1758d9f5b5c 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -235,6 +235,7 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * */ int xprt_register_transport(struct xprt_class *type); int xprt_unregister_transport(struct xprt_class *type); +int xprt_load_transport(const char *); void xprt_set_retrans_timeout_def(struct rpc_task *task); void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); @@ -259,6 +260,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); #define XPRT_BOUND (4) #define XPRT_BINDING (5) #define XPRT_CLOSING (6) +#define XPRT_CONNECTION_ABORT (7) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/include/linux/suspend.h b/include/linux/suspend.h index c7d9bb1832b..3e3a4364cbf 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -1,9 +1,6 @@ #ifndef _LINUX_SUSPEND_H #define _LINUX_SUSPEND_H -#if defined(CONFIG_X86) || defined(CONFIG_FRV) || defined(CONFIG_PPC32) || defined(CONFIG_PPC64) -#include <asm/suspend.h> -#endif #include <linux/swap.h> #include <linux/notifier.h> #include <linux/init.h> diff --git a/include/linux/swap.h b/include/linux/swap.h index d3021557887..62d81435347 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -212,7 +212,7 @@ static inline void lru_cache_add_active_file(struct page *page) /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, - gfp_t gfp_mask); + gfp_t gfp_mask, nodemask_t *mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, unsigned int swappiness); @@ -382,6 +382,11 @@ static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, return NULL; } +static inline int swap_writepage(struct page *p, struct writeback_control *wbc) +{ + return 0; +} + static inline struct page *lookup_swap_cache(swp_entry_t swp) { return NULL; diff --git a/include/linux/synclink.h b/include/linux/synclink.h index 99b8bdb17b2..0ff2779c44d 100644 --- a/include/linux/synclink.h +++ b/include/linux/synclink.h @@ -125,6 +125,7 @@ #define MGSL_MODE_MONOSYNC 3 #define MGSL_MODE_BISYNC 4 #define MGSL_MODE_RAW 6 +#define MGSL_MODE_BASE_CLOCK 7 #define MGSL_BUS_TYPE_ISA 1 #define MGSL_BUS_TYPE_EISA 2 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f9f900cfd06..b299a82a05e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -461,6 +461,10 @@ asmlinkage long sys_pread64(unsigned int fd, char __user *buf, size_t count, loff_t pos); asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); +asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, u32 pos_high, u32 pos_low); +asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, u32 pos_high, u32 pos_low); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); asmlinkage long sys_mkdir(const char __user *pathname, int mode); asmlinkage long sys_chdir(const char __user *filename); diff --git a/include/linux/timeriomem-rng.h b/include/linux/timeriomem-rng.h index dd253177f65..3e08a1c8683 100644 --- a/include/linux/timeriomem-rng.h +++ b/include/linux/timeriomem-rng.h @@ -14,7 +14,7 @@ struct timeriomem_rng_data { struct completion completion; unsigned int present:1; - u32 __iomem *address; + void __iomem *address; /* measures in usecs */ unsigned int period; diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 6186a789d6c..c7aa154f4bf 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -388,17 +388,14 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info, * tracehook_consider_ignored_signal - suppress short-circuit of ignored signal * @task: task receiving the signal * @sig: signal number being sent - * @handler: %SIG_IGN or %SIG_DFL * * Return zero iff tracing doesn't care to examine this ignored signal, * so it can short-circuit normal delivery and never even get queued. - * Either @handler is %SIG_DFL and @sig's default is ignore, or it's %SIG_IGN. * * Called with @task->sighand->siglock held. */ static inline int tracehook_consider_ignored_signal(struct task_struct *task, - int sig, - void __user *handler) + int sig) { return (task_ptrace(task) & PT_PTRACED) != 0; } @@ -407,19 +404,17 @@ static inline int tracehook_consider_ignored_signal(struct task_struct *task, * tracehook_consider_fatal_signal - suppress special handling of fatal signal * @task: task receiving the signal * @sig: signal number being sent - * @handler: %SIG_DFL or %SIG_IGN * * Return nonzero to prevent special handling of this termination signal. - * Normally @handler is %SIG_DFL. It can be %SIG_IGN if @sig is ignored, - * in which case force_sig() is about to reset it to %SIG_DFL. + * Normally handler for signal is %SIG_DFL. It can be %SIG_IGN if @sig is + * ignored, in which case force_sig() is about to reset it to %SIG_DFL. * When this returns zero, this signal might cause a quick termination * that does not give the debugger a chance to intercept the signal. * * Called with or without @task->sighand->siglock held. */ static inline int tracehook_consider_fatal_signal(struct task_struct *task, - int sig, - void __user *handler) + int sig) { return (task_ptrace(task) & PT_PTRACED) != 0; } @@ -507,7 +502,7 @@ static inline int tracehook_notify_jctl(int notify, int why) static inline int tracehook_notify_death(struct task_struct *task, void **death_cookie, int group_dead) { - if (task->exit_signal == -1) + if (task_detached(task)) return task->ptrace ? SIGCHLD : DEATH_REAP; /* diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 08e088334db..8615d661ab6 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -252,8 +252,6 @@ struct tty_operations { void (*set_ldisc)(struct tty_struct *tty); void (*wait_until_sent)(struct tty_struct *tty, int timeout); void (*send_xchar)(struct tty_struct *tty, char ch); - int (*read_proc)(char *page, char **start, off_t off, - int count, int *eof, void *data); int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); @@ -264,6 +262,7 @@ struct tty_operations { int (*poll_get_char)(struct tty_driver *driver, int line); void (*poll_put_char)(struct tty_driver *driver, int line, char ch); #endif + const struct file_operations *proc_fops; }; struct tty_driver { diff --git a/include/linux/wait.h b/include/linux/wait.h index a210ede73b5..5d631c17eae 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -135,8 +135,11 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync, void *key); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); -extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); +void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, + void *key); +void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); +void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_bit(wait_queue_head_t *, void *, int); int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); @@ -155,21 +158,17 @@ wait_queue_head_t *bit_waitqueue(void *, int); #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) #define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1) -#ifdef CONFIG_DEBUG_LOCK_ALLOC /* - * macro to avoid include hell + * Wakeup macros to be used to report events to the targets. */ -#define wake_up_nested(x, s) \ -do { \ - unsigned long flags; \ - \ - spin_lock_irqsave_nested(&(x)->lock, flags, (s)); \ - wake_up_locked(x); \ - spin_unlock_irqrestore(&(x)->lock, flags); \ -} while (0) -#else -#define wake_up_nested(x, s) wake_up(x) -#endif +#define wake_up_poll(x, m) \ + __wake_up(x, TASK_NORMAL, 1, (void *) (m)) +#define wake_up_locked_poll(x, m) \ + __wake_up_locked_key((x), TASK_NORMAL, (void *) (m)) +#define wake_up_interruptible_poll(x, m) \ + __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m)) +#define wake_up_interruptible_sync_poll(x, m) \ + __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m)) #define __wait_event(wq, condition) \ do { \ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 3cd51e579ab..13e1adf55c4 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -41,6 +41,11 @@ struct delayed_work { struct timer_list timer; }; +static inline struct delayed_work *to_delayed_work(struct work_struct *work) +{ + return container_of(work, struct delayed_work, work); +} + struct execute_work { struct work_struct work; }; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 7300ecdc480..93445477f86 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -109,8 +109,8 @@ extern int dirty_background_ratio; extern unsigned long dirty_background_bytes; extern int vm_dirty_ratio; extern unsigned long vm_dirty_bytes; -extern int dirty_writeback_interval; -extern int dirty_expire_interval; +extern unsigned int dirty_writeback_interval; +extern unsigned int dirty_expire_interval; extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; |