diff options
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/caching/netfs-api.txt | 73 | ||||
-rw-r--r-- | Documentation/filesystems/directory-locking | 31 | ||||
-rw-r--r-- | Documentation/filesystems/f2fs.txt | 7 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 8 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 1 | ||||
-rw-r--r-- | Documentation/filesystems/vfat.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 14 |
7 files changed, 106 insertions, 30 deletions
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index 11a0a40ce44..aed6b94160b 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt @@ -29,15 +29,16 @@ This document contains the following sections: (6) Index registration (7) Data file registration (8) Miscellaneous object registration - (9) Setting the data file size + (9) Setting the data file size (10) Page alloc/read/write (11) Page uncaching (12) Index and data file consistency - (13) Miscellaneous cookie operations - (14) Cookie unregistration - (15) Index invalidation - (16) Data file invalidation - (17) FS-Cache specific page flags. + (13) Cookie enablement + (14) Miscellaneous cookie operations + (15) Cookie unregistration + (16) Index invalidation + (17) Data file invalidation + (18) FS-Cache specific page flags. ============================= @@ -334,7 +335,8 @@ the path to the file: struct fscache_cookie * fscache_acquire_cookie(struct fscache_cookie *parent, const struct fscache_object_def *def, - void *netfs_data); + void *netfs_data, + bool enable); This function creates an index entry in the index represented by parent, filling in the index entry by calling the operations pointed to by def. @@ -350,6 +352,10 @@ object needs to be created somewhere down the hierarchy. Furthermore, an index may be created in several different caches independently at different times. This is all handled transparently, and the netfs doesn't see any of it. +A cookie will be created in the disabled state if enabled is false. A cookie +must be enabled to do anything with it. A disabled cookie can be enabled by +calling fscache_enable_cookie() (see below). + For example, with AFS, a cell would be added to the primary index. This index entry would have a dependent inode containing a volume location index for the volume mappings within this cell: @@ -357,7 +363,7 @@ volume mappings within this cell: cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, &afs_cell_cache_index_def, - cell); + cell, true); Then when a volume location was accessed, it would be entered into the cell's index and an inode would be allocated that acts as a volume type and hash chain @@ -366,7 +372,7 @@ combination: vlocation->cache = fscache_acquire_cookie(cell->cache, &afs_vlocation_cache_index_def, - vlocation); + vlocation, true); And then a particular flavour of volume (R/O for example) could be added to that index, creating another index for vnodes (AFS inode equivalents): @@ -374,7 +380,7 @@ that index, creating another index for vnodes (AFS inode equivalents): volume->cache = fscache_acquire_cookie(vlocation->cache, &afs_volume_cache_index_def, - volume); + volume, true); ====================== @@ -388,7 +394,7 @@ the object definition should be something other than index type. vnode->cache = fscache_acquire_cookie(volume->cache, &afs_vnode_cache_object_def, - vnode); + vnode, true); ================================= @@ -404,7 +410,7 @@ it would be some other type of object such as a data file. xattr->cache = fscache_acquire_cookie(vnode->cache, &afs_xattr_cache_object_def, - xattr); + xattr, true); Miscellaneous objects might be used to store extended attributes or directory entries for example. @@ -733,6 +739,47 @@ Note that partial updates may happen automatically at other times, such as when data blocks are added to a data file object. +================= +COOKIE ENABLEMENT +================= + +Cookies exist in one of two states: enabled and disabled. If a cookie is +disabled, it ignores all attempts to acquire child cookies; check, update or +invalidate its state; allocate, read or write backing pages - though it is +still possible to uncache pages and relinquish the cookie. + +The initial enablement state is set by fscache_acquire_cookie(), but the cookie +can be enabled or disabled later. To disable a cookie, call: + + void fscache_disable_cookie(struct fscache_cookie *cookie, + bool invalidate); + +If the cookie is not already disabled, this locks the cookie against other +enable and disable ops, marks the cookie as being disabled, discards or +invalidates any backing objects and waits for cessation of activity on any +associated object before unlocking the cookie. + +All possible failures are handled internally. The caller should consider +calling fscache_uncache_all_inode_pages() afterwards to make sure all page +markings are cleared up. + +Cookies can be enabled or reenabled with: + + void fscache_enable_cookie(struct fscache_cookie *cookie, + bool (*can_enable)(void *data), + void *data) + +If the cookie is not already enabled, this locks the cookie against other +enable and disable ops, invokes can_enable() and, if the cookie is not an index +cookie, will begin the procedure of acquiring backing objects. + +The optional can_enable() function is passed the data argument and returns a +ruling as to whether or not enablement should actually be permitted to begin. + +All possible failures are handled internally. The cookie will only be marked +as enabled if provisional backing objects are allocated. + + =============================== MISCELLANEOUS COOKIE OPERATIONS =============================== @@ -778,7 +825,7 @@ COOKIE UNREGISTRATION To get rid of a cookie, this function should be called. void fscache_relinquish_cookie(struct fscache_cookie *cookie, - int retire); + bool retire); If retire is non-zero, then the object will be marked for recycling, and all copies of it will be removed from all active caches in which it is present. diff --git a/Documentation/filesystems/directory-locking b/Documentation/filesystems/directory-locking index ff7b611abf3..09bbf9a54f8 100644 --- a/Documentation/filesystems/directory-locking +++ b/Documentation/filesystems/directory-locking @@ -2,6 +2,10 @@ kinds of locks - per-inode (->i_mutex) and per-filesystem (->s_vfs_rename_mutex). + When taking the i_mutex on multiple non-directory objects, we +always acquire the locks in order by increasing address. We'll call +that "inode pointer" order in the following. + For our purposes all operations fall in 5 classes: 1) read access. Locking rules: caller locks directory we are accessing. @@ -12,8 +16,9 @@ kinds of locks - per-inode (->i_mutex) and per-filesystem locks victim and calls the method. 4) rename() that is _not_ cross-directory. Locking rules: caller locks -the parent, finds source and target, if target already exists - locks it -and then calls the method. +the parent and finds source and target. If target already exists, lock +it. If source is a non-directory, lock it. If that means we need to +lock both, lock them in inode pointer order. 5) link creation. Locking rules: * lock parent @@ -30,7 +35,9 @@ rules: fail with -ENOTEMPTY * if new parent is equal to or is a descendent of source fail with -ELOOP - * if target exists - lock it. + * If target exists, lock it. If source is a non-directory, lock + it. In case that means we need to lock both source and target, + do so in inode pointer order. * call the method. @@ -56,9 +63,11 @@ objects - A < B iff A is an ancestor of B. renames will be blocked on filesystem lock and we don't start changing the order until we had acquired all locks). -(3) any operation holds at most one lock on non-directory object and - that lock is acquired after all other locks. (Proof: see descriptions - of operations). +(3) locks on non-directory objects are acquired only after locks on + directory objects, and are acquired in inode pointer order. + (Proof: all operations but renames take lock on at most one + non-directory object, except renames, which take locks on source and + target in inode pointer order in the case they are not directories.) Now consider the minimal deadlock. Each process is blocked on attempt to acquire some lock and already holds at least one lock. Let's @@ -66,9 +75,13 @@ consider the set of contended locks. First of all, filesystem lock is not contended, since any process blocked on it is not holding any locks. Thus all processes are blocked on ->i_mutex. - Non-directory objects are not contended due to (3). Thus link -creation can't be a part of deadlock - it can't be blocked on source -and it means that it doesn't hold any locks. + By (3), any process holding a non-directory lock can only be +waiting on another non-directory lock with a larger address. Therefore +the process holding the "largest" such lock can always make progress, and +non-directory objects are not included in the set of contended locks. + + Thus link creation can't be a part of deadlock - it can't be +blocked on source and it means that it doesn't hold any locks. Any contended object is either held by cross-directory rename or has a child that is also contended. Indeed, suppose that it is held by diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 3cd27bed634..a3fe811bbdb 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -119,6 +119,7 @@ active_logs=%u Support configuring the number of active logs. In the Default number is 6. disable_ext_identify Disable the extension list configured by mkfs, so f2fs does not aware of cold files such as media files. +inline_xattr Enable the inline xattrs feature. ================================================================================ DEBUGFS ENTRIES @@ -164,6 +165,12 @@ Files in /sys/fs/f2fs/<devname> gc_idle = 1 will select the Cost Benefit approach & setting gc_idle = 2 will select the greedy aproach. + reclaim_segments This parameter controls the number of prefree + segments to be reclaimed. If the number of prefree + segments is larger than this number, f2fs tries to + conduct checkpoint to reclaim the prefree segments + to free segments. By default, 100 segments, 200MB. + ================================================================================ USAGE ================================================================================ diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index f0890581f7f..fe2b7ae6f96 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -455,3 +455,11 @@ in your dentry operations instead. vfs_follow_link has been removed. Filesystems must use nd_set_link from ->follow_link for normal symlinks, or nd_jump_link for magic /proc/<pid> style links. +-- +[mandatory] + iget5_locked()/ilookup5()/ilookup5_nowait() test() callback used to be + called with both ->i_lock and inode_hash_lock held; the former is *not* + taken anymore, so verify that your callbacks do not rely on it (none + of the in-tree instances did). inode_hash_lock is still held, + of course, so they are still serialized wrt removal from inode hash, + as well as wrt set() callback of iget5_locked(). diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 823c95faebd..22d89aa3721 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -460,6 +460,7 @@ manner. The codes are the following: nl - non-linear mapping ar - architecture specific flag dd - do not include area into core dump + sd - soft-dirty flag mm - mixed map area hg - huge page advise flag nh - no-huge page advise flag diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index aa1f459fa6c..4a93e98b290 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -307,7 +307,7 @@ the following: <proceeding files...> <slot #3, id = 0x43, characters = "h is long"> - <slot #2, id = 0x02, characters = "xtension which"> + <slot #2, id = 0x02, characters = "xtension whic"> <slot #1, id = 0x01, characters = "My Big File.E"> <directory entry, name = "MYBIGFIL.EXT"> diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index f93a88250a4..deb48b5fd88 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -359,11 +359,9 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*update_time)(struct inode *, struct timespec *, int); - int (*atomic_open)(struct inode *, struct dentry *, + int (*atomic_open)(struct inode *, struct dentry *, struct file *, + unsigned open_flag, umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); -} ____cacheline_aligned; - struct file *, unsigned open_flag, - umode_t create_mode, int *opened); }; Again, all methods are called without any locks being held, unless @@ -470,9 +468,11 @@ otherwise noted. method the filesystem can look up, possibly create and open the file in one atomic operation. If it cannot perform this (e.g. the file type turned out to be wrong) it may signal this by returning 1 instead of - usual 0 or -ve . This method is only called if the last - component is negative or needs lookup. Cached positive dentries are - still handled by f_op->open(). + usual 0 or -ve . This method is only called if the last component is + negative or needs lookup. Cached positive dentries are still handled by + f_op->open(). If the file was created, the FILE_CREATED flag should be + set in "opened". In case of O_EXCL the method must only succeed if the + file didn't exist and hence FILE_CREATED shall always be set on success. tmpfile: called in the end of O_TMPFILE open(). Optional, equivalent to atomically creating, opening and unlinking a file in given directory. |