diff options
59 files changed, 886 insertions, 708 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 2e994efe12c..61b31acb917 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -128,7 +128,7 @@ alloc_inode: destroy_inode: dirty_inode: (must not sleep) write_inode: -drop_inode: !!!inode_lock!!! +drop_inode: !!!inode->i_lock!!! evict_inode: put_super: write write_super: read diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 0c986c9e851..6e29954851a 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -298,11 +298,14 @@ be used instead. It gets called whenever the inode is evicted, whether it has remaining links or not. Caller does *not* evict the pagecache or inode-associated metadata buffers; getting rid of those is responsibility of method, as it had been for ->delete_inode(). - ->drop_inode() returns int now; it's called on final iput() with inode_lock -held and it returns true if filesystems wants the inode to be dropped. As before, -generic_drop_inode() is still the default and it's been updated appropriately. -generic_delete_inode() is also alive and it consists simply of return 1. Note that -all actual eviction work is done by caller after ->drop_inode() returns. + + ->drop_inode() returns int now; it's called on final iput() with +inode->i_lock held and it returns true if filesystems wants the inode to be +dropped. As before, generic_drop_inode() is still the default and it's been +updated appropriately. generic_delete_inode() is also alive and it consists +simply of return 1. Note that all actual eviction work is done by caller after +->drop_inode() returns. + clear_inode() is gone; use end_writeback() instead. As before, it must be called exactly once on each call of ->evict_inode() (as it used to be for each call of ->delete_inode()). Unlike before, if you are using inode-associated @@ -397,6 +400,9 @@ a file off. -- [mandatory] + +-- +[mandatory] ->get_sb() is gone. Switch to use of ->mount(). Typically it's just a matter of switching from calling get_sb_... to mount_... and changing the function type. If you were doing it manually, just switch from setting ->mnt_root diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 306f0ae8df0..80815ed654c 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -254,7 +254,7 @@ or bottom half). should be synchronous or not, not all filesystems check this flag. drop_inode: called when the last access to the inode is dropped, - with the inode_lock spinlock held. + with the inode->i_lock spinlock held. This method should be either NULL (normal UNIX filesystem semantics) or "generic_delete_inode" (for filesystems that do not diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index b3b0f0f5053..e5f6fc42834 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -78,7 +78,7 @@ __tagtable(ATAG_INITRD2, parse_tag_initrd2); */ struct meminfo meminfo; -void show_mem(void) +void show_mem(unsigned int filter) { int free = 0, total = 0, reserved = 0; int shared = 0, cached = 0, slab = 0, i; diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 54bf5405981..9a018cde5d8 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -36,7 +36,7 @@ static unsigned long max_gap; * Shows a simple page count of reserved and used pages in the system. * For discontig machines, it does this on a per-pgdat basis. */ -void show_mem(void) +void show_mem(unsigned int filter) { int i, total_reserved = 0; int total_shared = 0, total_cached = 0; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 61620323bb6..82ab1bc6afb 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -614,7 +614,7 @@ void __cpuinit *per_cpu_init(void) * Shows a simple page count of reserved and used pages in the system. * For discontig machines, it does this on a per-pgdat basis. */ -void show_mem(void) +void show_mem(unsigned int filter) { int i, total_reserved = 0; int total_shared = 0, total_cached = 0; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index f4f4d700833..b7ed8d7a9b3 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -544,7 +544,7 @@ void __init mem_init(void) unsigned long *empty_zero_page __read_mostly; EXPORT_SYMBOL(empty_zero_page); -void show_mem(void) +void show_mem(unsigned int filter) { int i,free = 0,total = 0,reserved = 0; int shared = 0, cached = 0; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d17d04cfb2c..33794c1d92c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -821,7 +821,7 @@ cmds(struct pt_regs *excp) memzcan(); break; case 'i': - show_mem(); + show_mem(0); break; default: termch = cmd; diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 6d0e02c4fe0..4c31e2b6e71 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -75,7 +75,7 @@ void __init kmap_init(void) kmap_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE); } -void show_mem(void) +void show_mem(unsigned int filter) { printk("Mem-info:\n"); show_free_areas(); diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 1a2b36f8866..de7d8e21e01 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -41,7 +41,7 @@ * The normal show_free_areas() is too verbose on Tile, with dozens * of processors and often four NUMA zones each with high and lowmem. */ -void show_mem(void) +void show_mem(unsigned int filter) { struct zone *zone; diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 3dbe3709b69..1fc02633f70 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -55,7 +55,7 @@ early_param("initrd", early_initrd); */ struct meminfo meminfo; -void show_mem(void) +void show_mem(unsigned int filter) { int free = 0, total = 0, reserved = 0; int shared = 0, cached = 0, slab = 0, i; diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 4c95b5fd9df..799e1490cf2 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1073,6 +1073,9 @@ int drm_mode_getresources(struct drm_device *dev, void *data, uint32_t __user *encoder_id; struct drm_mode_group *mode_group; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); /* @@ -1244,6 +1247,9 @@ int drm_mode_getcrtc(struct drm_device *dev, struct drm_mode_object *obj; int ret = 0; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, crtc_resp->crtc_id, @@ -1312,6 +1318,9 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, uint64_t __user *prop_values; uint32_t __user *encoder_ptr; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + memset(&u_mode, 0, sizeof(struct drm_mode_modeinfo)); DRM_DEBUG_KMS("[CONNECTOR:%d:?]\n", out_resp->connector_id); @@ -1431,6 +1440,9 @@ int drm_mode_getencoder(struct drm_device *dev, void *data, struct drm_encoder *encoder; int ret = 0; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, enc_resp->encoder_id, DRM_MODE_OBJECT_ENCODER); @@ -1486,6 +1498,9 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, int ret = 0; int i; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, crtc_req->crtc_id, DRM_MODE_OBJECT_CRTC); @@ -1603,6 +1618,9 @@ int drm_mode_cursor_ioctl(struct drm_device *dev, struct drm_crtc *crtc; int ret = 0; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + if (!req->flags) { DRM_ERROR("no operation set\n"); return -EINVAL; @@ -1667,6 +1685,9 @@ int drm_mode_addfb(struct drm_device *dev, struct drm_framebuffer *fb; int ret = 0; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + if ((config->min_width > r->width) || (r->width > config->max_width)) { DRM_ERROR("mode new framebuffer width not within limits\n"); return -EINVAL; @@ -1724,6 +1745,9 @@ int drm_mode_rmfb(struct drm_device *dev, int ret = 0; int found = 0; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, *id, DRM_MODE_OBJECT_FB); /* TODO check that we realy get a framebuffer back. */ @@ -1780,6 +1804,9 @@ int drm_mode_getfb(struct drm_device *dev, struct drm_framebuffer *fb; int ret = 0; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB); if (!obj) { @@ -1813,6 +1840,9 @@ int drm_mode_dirtyfb_ioctl(struct drm_device *dev, int num_clips; int ret = 0; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB); if (!obj) { @@ -1996,6 +2026,9 @@ int drm_mode_attachmode_ioctl(struct drm_device *dev, struct drm_mode_modeinfo *umode = &mode_cmd->mode; int ret = 0; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR); @@ -2042,6 +2075,9 @@ int drm_mode_detachmode_ioctl(struct drm_device *dev, struct drm_mode_modeinfo *umode = &mode_cmd->mode; int ret = 0; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR); @@ -2211,6 +2247,9 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev, uint64_t __user *values_ptr; uint32_t __user *blob_length_ptr; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, out_resp->prop_id, DRM_MODE_OBJECT_PROPERTY); if (!obj) { @@ -2333,6 +2372,9 @@ int drm_mode_getblob_ioctl(struct drm_device *dev, int ret = 0; void *blob_ptr; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, out_resp->blob_id, DRM_MODE_OBJECT_BLOB); if (!obj) { @@ -2393,6 +2435,9 @@ int drm_mode_connector_property_set_ioctl(struct drm_device *dev, int ret = -EINVAL; int i; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, out_resp->connector_id, DRM_MODE_OBJECT_CONNECTOR); @@ -2509,6 +2554,9 @@ int drm_mode_gamma_set_ioctl(struct drm_device *dev, int size; int ret = 0; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC); if (!obj) { @@ -2560,6 +2608,9 @@ int drm_mode_gamma_get_ioctl(struct drm_device *dev, int size; int ret = 0; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->mode_config.mutex); obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC); if (!obj) { diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 57ce27c9a74..74e4ff57801 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -499,11 +499,12 @@ EXPORT_SYMBOL(drm_gem_vm_open); void drm_gem_vm_close(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; + struct drm_device *dev = obj->dev; - mutex_lock(&obj->dev->struct_mutex); + mutex_lock(&dev->struct_mutex); drm_vm_close_locked(vma); drm_gem_object_unreference(obj); - mutex_unlock(&obj->dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); } EXPORT_SYMBOL(drm_gem_vm_close); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 7f6912a1676..904d7e9c8e4 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -280,6 +280,9 @@ int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_priv) if (dev->driver->dumb_create) req->value = 1; break; + case DRM_CAP_VBLANK_HIGH_CRTC: + req->value = 1; + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index a34ef97d3c8..741457bd1c4 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1125,7 +1125,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data, { union drm_wait_vblank *vblwait = data; int ret = 0; - unsigned int flags, seq, crtc; + unsigned int flags, seq, crtc, high_crtc; if ((!drm_dev_to_irq(dev)) || (!dev->irq_enabled)) return -EINVAL; @@ -1134,16 +1134,21 @@ int drm_wait_vblank(struct drm_device *dev, void *data, return -EINVAL; if (vblwait->request.type & - ~(_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK)) { + ~(_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK | + _DRM_VBLANK_HIGH_CRTC_MASK)) { DRM_ERROR("Unsupported type value 0x%x, supported mask 0x%x\n", vblwait->request.type, - (_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK)); + (_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK | + _DRM_VBLANK_HIGH_CRTC_MASK)); return -EINVAL; } flags = vblwait->request.type & _DRM_VBLANK_FLAGS_MASK; - crtc = flags & _DRM_VBLANK_SECONDARY ? 1 : 0; - + high_crtc = (vblwait->request.type & _DRM_VBLANK_HIGH_CRTC_MASK); + if (high_crtc) + crtc = high_crtc >> _DRM_VBLANK_HIGH_CRTC_SHIFT; + else + crtc = flags & _DRM_VBLANK_SECONDARY ? 1 : 0; if (crtc >= dev->num_crtcs) return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 09e0327fc6c..87c8e29465e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -892,7 +892,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused) seq_printf(m, "Render p-state limit: %d\n", rp_state_limits & 0xff); seq_printf(m, "CAGF: %dMHz\n", ((rpstat & GEN6_CAGF_MASK) >> - GEN6_CAGF_SHIFT) * 100); + GEN6_CAGF_SHIFT) * 50); seq_printf(m, "RP CUR UP EI: %dus\n", rpupei & GEN6_CURICONT_MASK); seq_printf(m, "RP CUR UP: %dus\n", rpcurup & @@ -908,15 +908,15 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused) max_freq = (rp_state_cap & 0xff0000) >> 16; seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", - max_freq * 100); + max_freq * 50); max_freq = (rp_state_cap & 0xff00) >> 8; seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", - max_freq * 100); + max_freq * 50); max_freq = rp_state_cap & 0xff; seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", - max_freq * 100); + max_freq * 50); __gen6_gt_force_wake_put(dev_priv); } else { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c4c2855d002..7ce3f353af3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -224,7 +224,7 @@ i915_gem_dumb_create(struct drm_file *file, struct drm_mode_create_dumb *args) { /* have to work out size/pitch and return them */ - args->pitch = ALIGN(args->width & ((args->bpp + 1) / 8), 64); + args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, args->size, &args->handle); @@ -1356,9 +1356,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) if (!obj->fault_mappable) return; - unmap_mapping_range(obj->base.dev->dev_mapping, - (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, - obj->base.size, 1); + if (obj->base.dev->dev_mapping) + unmap_mapping_range(obj->base.dev->dev_mapping, + (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, + obj->base.size, 1); obj->fault_mappable = false; } @@ -1796,8 +1797,10 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) return; spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; + if (request->file_priv) { + list_del(&request->client_list); + request->file_priv = NULL; + } spin_unlock(&file_priv->mm.lock); } @@ -2217,13 +2220,18 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring, { int ret; + if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) + return 0; + trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); ret = ring->flush(ring, invalidate_domains, flush_domains); if (ret) return ret; - i915_gem_process_flushing_list(ring, flush_domains); + if (flush_domains & I915_GEM_GPU_DOMAINS) + i915_gem_process_flushing_list(ring, flush_domains); + return 0; } @@ -2579,8 +2587,23 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, reg = &dev_priv->fence_regs[obj->fence_reg]; list_move_tail(®->lru_list, &dev_priv->mm.fence_list); - if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) - pipelined = NULL; + if (obj->tiling_changed) { + ret = i915_gem_object_flush_fence(obj, pipelined); + if (ret) + return ret; + + if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) + pipelined = NULL; + + if (pipelined) { + reg->setup_seqno = + i915_gem_next_request_seqno(pipelined); + obj->last_fenced_seqno = reg->setup_seqno; + obj->last_fenced_ring = pipelined; + } + + goto update; + } if (!pipelined) { if (reg->setup_seqno) { @@ -2599,31 +2622,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, ret = i915_gem_object_flush_fence(obj, pipelined); if (ret) return ret; - } else if (obj->tiling_changed) { - if (obj->fenced_gpu_access) { - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->ring, - 0, obj->base.write_domain); - if (ret) - return ret; - } - - obj->fenced_gpu_access = false; - } - } - - if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) - pipelined = NULL; - BUG_ON(!pipelined && reg->setup_seqno); - - if (obj->tiling_changed) { - if (pipelined) { - reg->setup_seqno = - i915_gem_next_request_seqno(pipelined); - obj->last_fenced_seqno = reg->setup_seqno; - obj->last_fenced_ring = pipelined; - } - goto update; } return 0; @@ -3606,6 +3604,8 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) return; } + trace_i915_gem_object_destroy(obj); + if (obj->base.map_list.map) i915_gem_free_mmap_offset(obj); @@ -3615,8 +3615,6 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) kfree(obj->page_cpu_valid); kfree(obj->bit_17); kfree(obj); - - trace_i915_gem_object_destroy(obj); } void i915_gem_free_object(struct drm_gem_object *gem_obj) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7ff7f933ddf..20a4cc5b818 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -367,6 +367,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, uint32_t __iomem *reloc_entry; void __iomem *reloc_page; + /* We can't wait for rendering with pagefaults disabled */ + if (obj->active && in_atomic()) + return -EFAULT; + ret = i915_gem_object_set_to_gtt_domain(obj, 1); if (ret) return ret; @@ -440,15 +444,24 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, struct list_head *objects) { struct drm_i915_gem_object *obj; - int ret; - + int ret = 0; + + /* This is the fast path and we cannot handle a pagefault whilst + * holding the struct mutex lest the user pass in the relocations + * contained within a mmaped bo. For in such a case we, the page + * fault handler would call i915_gem_fault() and we would try to + * acquire the struct mutex again. Obviously this is bad and so + * lockdep complains vehemently. + */ + pagefault_disable(); list_for_each_entry(obj, objects, exec_list) { ret = i915_gem_execbuffer_relocate_object(obj, eb); if (ret) - return ret; + break; } + pagefault_enable(); - return 0; + return ret; } static int diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3106c0dc838..432fc04c6bf 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1516,9 +1516,10 @@ static void intel_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, reg = PIPECONF(pipe); val = I915_READ(reg); - val |= PIPECONF_ENABLE; - I915_WRITE(reg, val); - POSTING_READ(reg); + if (val & PIPECONF_ENABLE) + return; + + I915_WRITE(reg, val | PIPECONF_ENABLE); intel_wait_for_vblank(dev_priv->dev, pipe); } @@ -1552,9 +1553,10 @@ static void intel_disable_pipe(struct drm_i915_private *dev_priv, reg = PIPECONF(pipe); val = I915_READ(reg); - val &= ~PIPECONF_ENABLE; - I915_WRITE(reg, val); - POSTING_READ(reg); + if ((val & PIPECONF_ENABLE) == 0) + return; + + I915_WRITE(reg, val & ~PIPECONF_ENABLE); intel_wait_for_pipe_off(dev_priv->dev, pipe); } @@ -1577,9 +1579,10 @@ static void intel_enable_plane(struct drm_i915_private *dev_priv, reg = DSPCNTR(plane); val = I915_READ(reg); - val |= DISPLAY_PLANE_ENABLE; - I915_WRITE(reg, val); - POSTING_READ(reg); + if (val & DISPLAY_PLANE_ENABLE) + return; + + I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE); intel_wait_for_vblank(dev_priv->dev, pipe); } @@ -1610,9 +1613,10 @@ static void intel_disable_plane(struct drm_i915_private *dev_priv, reg = DSPCNTR(plane); val = I915_READ(reg); - val &= ~DISPLAY_PLANE_ENABLE; - I915_WRITE(reg, val); - POSTING_READ(reg); + if ((val & DISPLAY_PLANE_ENABLE) == 0) + return; + + I915_WRITE(reg, val & ~DISPLAY_PLANE_ENABLE); intel_flush_display_plane(dev_priv, plane); intel_wait_for_vblank(dev_priv->dev, pipe); } @@ -1769,7 +1773,6 @@ static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval) return; I915_WRITE(DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN); - POSTING_READ(DPFC_CONTROL); intel_wait_for_vblank(dev, intel_crtc->pipe); } @@ -1861,7 +1864,6 @@ static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval) return; I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN); - POSTING_READ(ILK_DPFC_CONTROL); intel_wait_for_vblank(dev, intel_crtc->pipe); } @@ -3883,10 +3885,7 @@ static bool g4x_compute_srwm(struct drm_device *dev, display, cursor); } -static inline bool single_plane_enabled(unsigned int mask) -{ - return mask && (mask & -mask) == 0; -} +#define single_plane_enabled(mask) is_power_of_2(mask) static void g4x_update_wm(struct drm_device *dev) { @@ -5777,7 +5776,6 @@ static void intel_increase_pllclock(struct drm_crtc *crtc) dpll &= ~DISPLAY_RATE_SELECT_FPA1; I915_WRITE(dpll_reg, dpll); - POSTING_READ(dpll_reg); intel_wait_for_vblank(dev, pipe); dpll = I915_READ(dpll_reg); @@ -5821,7 +5819,6 @@ static void intel_decrease_pllclock(struct drm_crtc *crtc) dpll |= DISPLAY_RATE_SELECT_FPA1; I915_WRITE(dpll_reg, dpll); - dpll = I915_READ(dpll_reg); intel_wait_for_vblank(dev, pipe); dpll = I915_READ(dpll_reg); if (!(dpll & DISPLAY_RATE_SELECT_FPA1)) @@ -6933,7 +6930,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) DRM_ERROR("timeout waiting for pcode mailbox to finish\n"); if (pcu_mbox & (1<<31)) { /* OC supported */ max_freq = pcu_mbox & 0xff; - DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 100); + DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50); } /* In units of 100MHz */ diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d29e33f815d..0daefca5cbb 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1957,9 +1957,9 @@ intel_dp_init(struct drm_device *dev, int output_reg) DP_NO_AUX_HANDSHAKE_LINK_TRAINING; } else { /* if this fails, presume the device is a ghost */ - DRM_ERROR("failed to retrieve link info\n"); - intel_dp_destroy(&intel_connector->base); + DRM_INFO("failed to retrieve link info, disabling eDP\n"); intel_dp_encoder_destroy(&intel_dp->base.base); + intel_dp_destroy(&intel_connector->base); return; } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 789c47801ba..e9e6f71418a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -65,62 +65,60 @@ render_ring_flush(struct intel_ring_buffer *ring, u32 cmd; int ret; - if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) { + /* + * read/write caches: + * + * I915_GEM_DOMAIN_RENDER is always invalidated, but is + * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is + * also flushed at 2d versus 3d pipeline switches. + * + * read-only caches: + * + * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if + * MI_READ_FLUSH is set, and is always flushed on 965. + * + * I915_GEM_DOMAIN_COMMAND may not exist? + * + * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is + * invalidated when MI_EXE_FLUSH is set. + * + * I915_GEM_DOMAIN_VERTEX, which exists on 965, is + * invalidated with every MI_FLUSH. + * + * TLBs: + * + * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND + * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and + * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER + * are flushed at any MI_FLUSH. + */ + + cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; + if ((invalidate_domains|flush_domains) & + I915_GEM_DOMAIN_RENDER) + cmd &= ~MI_NO_WRITE_FLUSH; + if (INTEL_INFO(dev)->gen < 4) { /* - * read/write caches: - * - * I915_GEM_DOMAIN_RENDER is always invalidated, but is - * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is - * also flushed at 2d versus 3d pipeline switches. - * - * read-only caches: - * - * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if - * MI_READ_FLUSH is set, and is always flushed on 965. - * - * I915_GEM_DOMAIN_COMMAND may not exist? - * - * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is - * invalidated when MI_EXE_FLUSH is set. - * - * I915_GEM_DOMAIN_VERTEX, which exists on 965, is - * invalidated with every MI_FLUSH. - * - * TLBs: - * - * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND - * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and - * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER - * are flushed at any MI_FLUSH. + * On the 965, the sampler cache always gets flushed + * and this bit is reserved. */ + if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) + cmd |= MI_READ_FLUSH; + } + if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) + cmd |= MI_EXE_FLUSH; - cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; - if ((invalidate_domains|flush_domains) & - I915_GEM_DOMAIN_RENDER) - cmd &= ~MI_NO_WRITE_FLUSH; - if (INTEL_INFO(dev)->gen < 4) { - /* - * On the 965, the sampler cache always gets flushed - * and this bit is reserved. - */ - if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) - cmd |= MI_READ_FLUSH; - } - if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) - cmd |= MI_EXE_FLUSH; - - if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && - (IS_G4X(dev) || IS_GEN5(dev))) - cmd |= MI_INVALIDATE_ISP; + if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && + (IS_G4X(dev) || IS_GEN5(dev))) + cmd |= MI_INVALIDATE_ISP; - ret = intel_ring_begin(ring, 2); - if (ret) - return ret; + ret = intel_ring_begin(ring, 2); + if (ret) + return ret; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - } + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -568,9 +566,6 @@ bsd_ring_flush(struct intel_ring_buffer *ring, { int ret; - if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0) - return 0; - ret = intel_ring_begin(ring, 2); if (ret) return ret; @@ -1056,9 +1051,6 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring, uint32_t cmd; int ret; - if (((invalidate | flush) & I915_GEM_GPU_DOMAINS) == 0) - return 0; - ret = intel_ring_begin(ring, 4); if (ret) return ret; @@ -1230,9 +1222,6 @@ static int blt_ring_flush(struct intel_ring_buffer *ring, uint32_t cmd; int ret; - if (((invalidate | flush) & I915_GEM_DOMAIN_RENDER) == 0) - return 0; - ret = blt_ring_begin(ring, 4); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 3cd3234ba0a..10e41af6b02 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -957,7 +957,11 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode /* adjust pixel clock as needed */ adjusted_clock = atombios_adjust_pll(crtc, mode, pll, ss_enabled, &ss); - if (ASIC_IS_AVIVO(rdev)) + if (radeon_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT)) + /* TV seems to prefer the legacy algo on some boards */ + radeon_compute_pll_legacy(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div, + &ref_div, &post_div); + else if (ASIC_IS_AVIVO(rdev)) radeon_compute_pll_avivo(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div, &ref_div, &post_div); else diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index cf7c8d5b4ec..cf602e2d071 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -448,7 +448,7 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) { - int edid_info; + int edid_info, size; struct edid *edid; unsigned char *raw; edid_info = combios_get_table_offset(rdev->ddev, COMBIOS_HARDCODED_EDID_TABLE); @@ -456,11 +456,12 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) return false; raw = rdev->bios + edid_info; - edid = kmalloc(EDID_LENGTH * (raw[0x7e] + 1), GFP_KERNEL); + size = EDID_LENGTH * (raw[0x7e] + 1); + edid = kmalloc(size, GFP_KERNEL); if (edid == NULL) return false; - memcpy((unsigned char *)edid, raw, EDID_LENGTH * (raw[0x7e] + 1)); + memcpy((unsigned char *)edid, raw, size); if (!drm_edid_is_valid(edid)) { kfree(edid); @@ -468,6 +469,7 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) } rdev->mode_info.bios_hardcoded_edid = edid; + rdev->mode_info.bios_hardcoded_edid_size = size; return true; } @@ -475,8 +477,17 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) struct edid * radeon_bios_get_hardcoded_edid(struct radeon_device *rdev) { - if (rdev->mode_info.bios_hardcoded_edid) - return rdev->mode_info.bios_hardcoded_edid; + struct edid *edid; + + if (rdev->mode_info.bios_hardcoded_edid) { + edid = kmalloc(rdev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL); + if (edid) { + memcpy((unsigned char *)edid, + (unsigned char *)rdev->mode_info.bios_hardcoded_edid, + rdev->mode_info.bios_hardcoded_edid_size); + return edid; + } + } return NULL; } diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 28c7961cd19..2ef6d513506 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -633,6 +633,8 @@ static int radeon_vga_mode_valid(struct drm_connector *connector, static enum drm_connector_status radeon_vga_detect(struct drm_connector *connector, bool force) { + struct drm_device *dev = connector->dev; + struct radeon_device *rdev = dev->dev_private; struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder; struct drm_encoder_helper_funcs *encoder_funcs; @@ -683,6 +685,17 @@ radeon_vga_detect(struct drm_connector *connector, bool force) if (ret == connector_status_connected) ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, true); + + /* RN50 and some RV100 asics in servers often have a hardcoded EDID in the + * vbios to deal with KVMs. If we have one and are not able to detect a monitor + * by other means, assume the CRT is connected and use that EDID. + */ + if ((!rdev->is_atom_bios) && + (ret == connector_status_disconnected) && + rdev->mode_info.bios_hardcoded_edid_size) { + ret = connector_status_connected; + } + radeon_connector_update_scratch_regs(connector, ret); return ret; } @@ -794,6 +807,8 @@ static int radeon_dvi_get_modes(struct drm_connector *connector) static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connector, bool force) { + struct drm_device *dev = connector->dev; + struct radeon_device *rdev = dev->dev_private; struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = NULL; struct drm_encoder_helper_funcs *encoder_funcs; @@ -833,8 +848,6 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) * you don't really know what's connected to which port as both are digital. */ if (radeon_connector->shared_ddc && (ret == connector_status_connected)) { - struct drm_device *dev = connector->dev; - struct radeon_device *rdev = dev->dev_private; struct drm_connector *list_connector; struct radeon_connector *list_radeon_connector; list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) { @@ -899,6 +912,19 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, true); } + /* RN50 and some RV100 asics in servers often have a hardcoded EDID in the + * vbios to deal with KVMs. If we have one and are not able to detect a monitor + * by other means, assume the DFP is connected and use that EDID. In most + * cases the DVI port is actually a virtual KVM port connected to the service + * processor. + */ + if ((!rdev->is_atom_bios) && + (ret == connector_status_disconnected) && + rdev->mode_info.bios_hardcoded_edid_size) { + radeon_connector->use_digital = true; + ret = connector_status_connected; + } + out: /* updated in get modes as well since we need to know if it's analog or digital */ radeon_connector_update_scratch_regs(connector, ret); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index e4582814bb7..9c57538231d 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -239,6 +239,7 @@ struct radeon_mode_info { struct drm_property *underscan_vborder_property; /* hardcoded DFP edid from BIOS */ struct edid *bios_hardcoded_edid; + int bios_hardcoded_edid_size; /* pointer to fbdev info structure */ struct radeon_fbdev *rfbdev; diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 2aed03bde4b..08de669e025 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -365,12 +365,14 @@ static ssize_t radeon_set_pm_profile(struct device *dev, else if (strncmp("high", buf, strlen("high")) == 0) rdev->pm.profile = PM_PROFILE_HIGH; else { - DRM_ERROR("invalid power profile!\n"); + count = -EINVAL; goto fail; } radeon_pm_update_profile(rdev); radeon_pm_set_clocks(rdev); - } + } else + count = -EINVAL; + fail: mutex_unlock(&rdev->pm.mutex); @@ -413,7 +415,7 @@ static ssize_t radeon_set_pm_method(struct device *dev, mutex_unlock(&rdev->pm.mutex); cancel_delayed_work_sync(&rdev->pm.dynpm_idle_work); } else { - DRM_ERROR("invalid power method!\n"); + count = -EINVAL; goto fail; } radeon_pm_compute_clocks(rdev); diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c index 9be6a830f1d..ac0e42b47b2 100644 --- a/drivers/media/rc/ite-cir.c +++ b/drivers/media/rc/ite-cir.c @@ -187,7 +187,7 @@ static void ite_decode_bytes(struct ite_dev *dev, const u8 * data, int sample_period = dev->params.sample_period; ldata = (unsigned long *)data; size = length << 3; - next_one = generic_find_next_le_bit(ldata, size, 0); + next_one = find_next_bit_le(ldata, size, 0); if (next_one > 0) { ev.pulse = true; ev.duration = @@ -196,14 +196,14 @@ static void ite_decode_bytes(struct ite_dev *dev, const u8 * data, int } while (next_one < size) { - next_zero = generic_find_next_zero_le_bit(ldata, size, next_one + 1); + next_zero = find_next_zero_bit_le(ldata, size, next_one + 1); ev.pulse = false; ev.duration = ITE_BITS_TO_NS(next_zero - next_one, sample_period); ir_raw_event_store_with_filter(dev->rdev, &ev); if (next_zero < size) { next_one = - generic_find_next_le_bit(ldata, + find_next_bit_le(ldata, size, next_zero + 1); ev.pulse = true; diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 81f13958e75..43db715f150 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -306,7 +306,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = { static void sysrq_handle_showmem(int key) { - show_mem(); + show_mem(0); } static struct sysrq_key_op sysrq_showmem_op = { .handler = sysrq_handle_showmem, diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 6dd3c68c13a..d6b342b5b42 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -600,7 +600,7 @@ static void fn_scroll_back(struct vc_data *vc) static void fn_show_mem(struct vc_data *vc) { - show_mem(); + show_mem(0); } static void fn_show_state(struct vc_data *vc) diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 54f92379272..475f9c597cb 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -61,8 +61,6 @@ do { \ current->pid, __func__, ##args); \ } while (0) -extern spinlock_t autofs4_lock; - /* Unified info structure. This is pointed to by both the dentry and inode structures. Each file in the filesystem has an instance of this structure. It holds a reference to the dentry, so dentries are never diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 1442da4860e..509fe1eb66a 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -372,6 +372,10 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, return -EBUSY; } else { struct file *pipe = fget(pipefd); + if (!pipe) { + err = -EBADF; + goto out; + } if (!pipe->f_op || !pipe->f_op->write) { err = -EPIPE; fput(pipe); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index f43100b9662..450f529a4ea 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -87,18 +87,70 @@ done: } /* + * Calculate and dget next entry in the subdirs list under root. + */ +static struct dentry *get_next_positive_subdir(struct dentry *prev, + struct dentry *root) +{ + struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); + struct list_head *next; + struct dentry *p, *q; + + spin_lock(&sbi->lookup_lock); + + if (prev == NULL) { + spin_lock(&root->d_lock); + prev = dget_dlock(root); + next = prev->d_subdirs.next; + p = prev; + goto start; + } + + p = prev; + spin_lock(&p->d_lock); +again: + next = p->d_u.d_child.next; +start: + if (next == &root->d_subdirs) { + spin_unlock(&p->d_lock); + spin_unlock(&sbi->lookup_lock); + dput(prev); + return NULL; + } + + q = list_entry(next, struct dentry, d_u.d_child); + + spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); + /* Negative dentry - try next */ + if (!simple_positive(q)) { + spin_unlock(&p->d_lock); + p = q; + goto again; + } + dget_dlock(q); + spin_unlock(&q->d_lock); + spin_unlock(&p->d_lock); + spin_unlock(&sbi->lookup_lock); + + dput(prev); + + return q; +} + +/* * Calculate and dget next entry in top down tree traversal. */ static struct dentry *get_next_positive_dentry(struct dentry *prev, struct dentry *root) { + struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); struct list_head *next; struct dentry *p, *ret; if (prev == NULL) return dget(root); - spin_lock(&autofs4_lock); + spin_lock(&sbi->lookup_lock); relock: p = prev; spin_lock(&p->d_lock); @@ -110,7 +162,7 @@ again: if (p == root) { spin_unlock(&p->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&sbi->lookup_lock); dput(prev); return NULL; } @@ -140,7 +192,7 @@ again: dget_dlock(ret); spin_unlock(&ret->d_lock); spin_unlock(&p->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&sbi->lookup_lock); dput(prev); @@ -290,11 +342,8 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(root); /* No point expiring a pending mount */ - if (ino->flags & AUTOFS_INF_PENDING) { - spin_unlock(&sbi->fs_lock); - return NULL; - } - managed_dentry_set_transit(root); + if (ino->flags & AUTOFS_INF_PENDING) + goto out; if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { struct autofs_info *ino = autofs4_dentry_ino(root); ino->flags |= AUTOFS_INF_EXPIRING; @@ -302,7 +351,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, spin_unlock(&sbi->fs_lock); return root; } - managed_dentry_clear_transit(root); +out: spin_unlock(&sbi->fs_lock); dput(root); @@ -336,13 +385,12 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, timeout = sbi->exp_timeout; dentry = NULL; - while ((dentry = get_next_positive_dentry(dentry, root))) { + while ((dentry = get_next_positive_subdir(dentry, root))) { spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); /* No point expiring a pending mount */ if (ino->flags & AUTOFS_INF_PENDING) - goto cont; - managed_dentry_set_transit(dentry); + goto next; /* * Case 1: (i) indirect mount or top level pseudo direct mount @@ -402,8 +450,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, } } next: - managed_dentry_clear_transit(dentry); -cont: spin_unlock(&sbi->fs_lock); } return NULL; @@ -415,13 +461,13 @@ found: ino->flags |= AUTOFS_INF_EXPIRING; init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); - spin_lock(&autofs4_lock); + spin_lock(&sbi->lookup_lock); spin_lock(&expired->d_parent->d_lock); spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); spin_unlock(&expired->d_lock); spin_unlock(&expired->d_parent->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&sbi->lookup_lock); return expired; } @@ -484,8 +530,6 @@ int autofs4_expire_run(struct super_block *sb, spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); ino->flags &= ~AUTOFS_INF_EXPIRING; - if (!d_unhashed(dentry)) - managed_dentry_clear_transit(dentry); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); @@ -513,9 +557,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, spin_lock(&sbi->fs_lock); ino->flags &= ~AUTOFS_INF_EXPIRING; spin_lock(&dentry->d_lock); - if (ret) - __managed_dentry_clear_transit(dentry); - else { + if (!ret) { if ((IS_ROOT(dentry) || (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent))) && diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e6f84d26f4c..96804a17bbd 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -23,8 +23,6 @@ #include "autofs_i.h" -DEFINE_SPINLOCK(autofs4_lock); - static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); static int autofs4_dir_unlink(struct inode *,struct dentry *); static int autofs4_dir_rmdir(struct inode *,struct dentry *); @@ -125,15 +123,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) * autofs file system so just let the libfs routines handle * it. */ - spin_lock(&autofs4_lock); + spin_lock(&sbi->lookup_lock); spin_lock(&dentry->d_lock); if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&sbi->lookup_lock); return -ENOENT; } spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&sbi->lookup_lock); out: return dcache_dir_open(inode, file); @@ -171,7 +169,6 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) const unsigned char *str = name->name; struct list_head *p, *head; - spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock); head = &sbi->active_list; list_for_each(p, head) { @@ -204,14 +201,12 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) dget_dlock(active); spin_unlock(&active->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&autofs4_lock); return active; } next: spin_unlock(&active->d_lock); } spin_unlock(&sbi->lookup_lock); - spin_unlock(&autofs4_lock); return NULL; } @@ -226,7 +221,6 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) const unsigned char *str = name->name; struct list_head *p, *head; - spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock); head = &sbi->expiring_list; list_for_each(p, head) { @@ -259,14 +253,12 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) dget_dlock(expiring); spin_unlock(&expiring->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&autofs4_lock); return expiring; } next: spin_unlock(&expiring->d_lock); } spin_unlock(&sbi->lookup_lock); - spin_unlock(&autofs4_lock); return NULL; } @@ -275,17 +267,16 @@ static int autofs4_mount_wait(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); - int status; + int status = 0; if (ino->flags & AUTOFS_INF_PENDING) { DPRINTK("waiting for mount name=%.*s", dentry->d_name.len, dentry->d_name.name); status = autofs4_wait(sbi, dentry, NFY_MOUNT); DPRINTK("mount wait done status=%d", status); - ino->last_used = jiffies; - return status; } - return 0; + ino->last_used = jiffies; + return status; } static int do_expire_wait(struct dentry *dentry) @@ -319,9 +310,12 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) */ if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { struct dentry *parent = dentry->d_parent; + struct autofs_info *ino; struct dentry *new = d_lookup(parent, &dentry->d_name); if (!new) return NULL; + ino = autofs4_dentry_ino(new); + ino->last_used = jiffies; dput(path->dentry); path->dentry = new; } @@ -338,18 +332,6 @@ static struct vfsmount *autofs4_d_automount(struct path *path) DPRINTK("dentry=%p %.*s", dentry, dentry->d_name.len, dentry->d_name.name); - /* - * Someone may have manually umounted this or it was a submount - * that has gone away. - */ - spin_lock(&dentry->d_lock); - if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { - if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) && - (dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) - __managed_dentry_set_transit(path->dentry); - } - spin_unlock(&dentry->d_lock); - /* The daemon never triggers a mount. */ if (autofs4_oz_mode(sbi)) return NULL; @@ -418,18 +400,17 @@ static struct vfsmount *autofs4_d_automount(struct path *path) done: if (!(ino->flags & AUTOFS_INF_EXPIRING)) { /* - * Any needed mounting has been completed and the path updated - * so turn this into a normal dentry so we don't continually - * call ->d_automount() and ->d_manage(). - */ - spin_lock(&dentry->d_lock); - __managed_dentry_clear_transit(dentry); - /* + * Any needed mounting has been completed and the path + * updated so clear DCACHE_NEED_AUTOMOUNT so we don't + * call ->d_automount() on rootless multi-mounts since + * it can lead to an incorrect ELOOP error return. + * * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and * symlinks as in all other cases the dentry will be covered by * an actual mount so ->d_automount() won't be called during * the follow. */ + spin_lock(&dentry->d_lock); if ((!d_mountpoint(dentry) && !list_empty(&dentry->d_subdirs)) || (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) @@ -455,6 +436,8 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) /* The daemon never waits. */ if (autofs4_oz_mode(sbi)) { + if (rcu_walk) + return 0; if (!d_mountpoint(dentry)) return -EISDIR; return 0; @@ -612,12 +595,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) dir->i_mtime = CURRENT_TIME; - spin_lock(&autofs4_lock); - autofs4_add_expiring(dentry); + spin_lock(&sbi->lookup_lock); + __autofs4_add_expiring(dentry); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&sbi->lookup_lock); return 0; } @@ -686,20 +669,17 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) if (!autofs4_oz_mode(sbi)) return -EACCES; - spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock); spin_lock(&dentry->d_lock); if (!list_empty(&dentry->d_subdirs)) { spin_unlock(&dentry->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&autofs4_lock); return -ENOTEMPTY; } __autofs4_add_expiring(dentry); - spin_unlock(&sbi->lookup_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&sbi->lookup_lock); if (sbi->version < 5) autofs_clear_leaf_automount_flags(dentry); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 56010056b2e..25435987d6a 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -197,12 +197,12 @@ rename_retry: seq = read_seqbegin(&rename_lock); rcu_read_lock(); - spin_lock(&autofs4_lock); + spin_lock(&sbi->fs_lock); for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) len += tmp->d_name.len + 1; if (!len || --len > NAME_MAX) { - spin_unlock(&autofs4_lock); + spin_unlock(&sbi->fs_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; @@ -218,7 +218,7 @@ rename_retry: p -= tmp->d_name.len; strncpy(p, tmp->d_name.name, tmp->d_name.len); } - spin_unlock(&autofs4_lock); + spin_unlock(&sbi->fs_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; diff --git a/fs/block_dev.c b/fs/block_dev.c index 7d02afb2b7f..c1511c674f5 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -55,11 +55,13 @@ EXPORT_SYMBOL(I_BDEV); static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); + spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; if (inode->i_state & I_DIRTY) list_move(&inode->i_wb_list, &dst->wb.b_dirty); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_wb_list_lock); } static sector_t max_block(struct block_device *bdev) diff --git a/fs/buffer.c b/fs/buffer.c index 2e6b1a387b7..a08bb8e61c6 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1138,7 +1138,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) * inode list. * * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, - * mapping->tree_lock and the global inode_lock. + * mapping->tree_lock and mapping->host->i_lock. */ void mark_buffer_dirty(struct buffer_head *bh) { diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 816f88e6b9c..98b77c89494 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -8,6 +8,7 @@ #include <linux/writeback.h> #include <linux/sysctl.h> #include <linux/gfp.h> +#include "internal.h" /* A global variable is a bit ugly, but it keeps the code simple */ int sysctl_drop_caches; @@ -16,20 +17,23 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) { struct inode *inode, *toput_inode = NULL; - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) - continue; - if (inode->i_mapping->nrpages == 0) + spin_lock(&inode->i_lock); + if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || + (inode->i_mapping->nrpages == 0)) { + spin_unlock(&inode->i_lock); continue; + } __iget(inode); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_sb_list_lock); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); } - spin_unlock(&inode_lock); + spin_unlock(&inode_sb_list_lock); iput(toput_inode); } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 59c6e495678..b5ed541fb13 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -176,6 +176,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) } /* + * Remove the inode from the writeback list it is on. + */ +void inode_wb_list_del(struct inode *inode) +{ + spin_lock(&inode_wb_list_lock); + list_del_init(&inode->i_wb_list); + spin_unlock(&inode_wb_list_lock); +} + + +/* * Redirty an inode: set its when-it-was dirtied timestamp and move it to the * furthest end of its superblock's dirty-inode list. * @@ -188,6 +199,7 @@ static void redirty_tail(struct inode *inode) { struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; + assert_spin_locked(&inode_wb_list_lock); if (!list_empty(&wb->b_dirty)) { struct inode *tail; @@ -205,14 +217,17 @@ static void requeue_io(struct inode *inode) { struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; + assert_spin_locked(&inode_wb_list_lock); list_move(&inode->i_wb_list, &wb->b_more_io); } static void inode_sync_complete(struct inode *inode) { /* - * Prevent speculative execution through spin_unlock(&inode_lock); + * Prevent speculative execution through + * spin_unlock(&inode_wb_list_lock); */ + smp_mb(); wake_up_bit(&inode->i_state, __I_SYNC); } @@ -286,6 +301,7 @@ static void move_expired_inodes(struct list_head *delaying_queue, */ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) { + assert_spin_locked(&inode_wb_list_lock); list_splice_init(&wb->b_more_io, &wb->b_io); move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); } @@ -306,25 +322,25 @@ static void inode_wait_for_writeback(struct inode *inode) wait_queue_head_t *wqh; wqh = bit_waitqueue(&inode->i_state, __I_SYNC); - while (inode->i_state & I_SYNC) { - spin_unlock(&inode_lock); + while (inode->i_state & I_SYNC) { + spin_unlock(&inode->i_lock); + spin_unlock(&inode_wb_list_lock); __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); + spin_lock(&inode->i_lock); } } /* - * Write out an inode's dirty pages. Called under inode_lock. Either the - * caller has ref on the inode (either via __iget or via syscall against an fd) - * or the inode has I_WILL_FREE set (via generic_forget_inode) + * Write out an inode's dirty pages. Called under inode_wb_list_lock and + * inode->i_lock. Either the caller has an active reference on the inode or + * the inode has I_WILL_FREE set. * * If `wait' is set, wait on the writeout. * * The whole writeout design is quite complex and fragile. We want to avoid * starvation of particular inodes when others are being redirtied, prevent * livelocks, etc. - * - * Called under inode_lock. */ static int writeback_single_inode(struct inode *inode, struct writeback_control *wbc) @@ -333,6 +349,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) unsigned dirty; int ret; + assert_spin_locked(&inode_wb_list_lock); + assert_spin_locked(&inode->i_lock); + if (!atomic_read(&inode->i_count)) WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); else @@ -363,7 +382,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) /* Set I_SYNC, reset I_DIRTY_PAGES */ inode->i_state |= I_SYNC; inode->i_state &= ~I_DIRTY_PAGES; - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_wb_list_lock); ret = do_writepages(mapping, wbc); @@ -383,10 +403,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * due to delalloc, clear dirty metadata flags right before * write_inode() */ - spin_lock(&inode_lock); + spin_lock(&inode->i_lock); dirty = inode->i_state & I_DIRTY; inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { int err = write_inode(inode, wbc); @@ -394,7 +414,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ret = err; } - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); + spin_lock(&inode->i_lock); inode->i_state &= ~I_SYNC; if (!(inode->i_state & I_FREEING)) { if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { @@ -506,7 +527,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, * kind does not need peridic writeout yet, and for the latter * kind writeout is handled by the freer. */ + spin_lock(&inode->i_lock); if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { + spin_unlock(&inode->i_lock); requeue_io(inode); continue; } @@ -515,10 +538,13 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, * Was this inode dirtied after sync_sb_inodes was called? * This keeps sync from extra jobs and livelock. */ - if (inode_dirtied_after(inode, wbc->wb_start)) + if (inode_dirtied_after(inode, wbc->wb_start)) { + spin_unlock(&inode->i_lock); return 1; + } __iget(inode); + pages_skipped = wbc->pages_skipped; writeback_single_inode(inode, wbc); if (wbc->pages_skipped != pages_skipped) { @@ -528,10 +554,11 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, */ redirty_tail(inode); } - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_wb_list_lock); iput(inode); cond_resched(); - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); if (wbc->nr_to_write <= 0) { wbc->more_io = 1; return 1; @@ -550,7 +577,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb, if (!wbc->wb_start) wbc->wb_start = jiffies; /* livelock avoidance */ - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); if (!wbc->for_kupdate || list_empty(&wb->b_io)) queue_io(wb, wbc->older_than_this); @@ -568,7 +595,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb, if (ret) break; } - spin_unlock(&inode_lock); + spin_unlock(&inode_wb_list_lock); /* Leave any unwritten inodes on b_io */ } @@ -577,11 +604,11 @@ static void __writeback_inodes_sb(struct super_block *sb, { WARN_ON(!rwsem_is_locked(&sb->s_umount)); - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); if (!wbc->for_kupdate || list_empty(&wb->b_io)) queue_io(wb, wbc->older_than_this); writeback_sb_inodes(sb, wb, wbc, true); - spin_unlock(&inode_lock); + spin_unlock(&inode_wb_list_lock); } /* @@ -720,13 +747,15 @@ static long wb_writeback(struct bdi_writeback *wb, * become available for writeback. Otherwise * we'll just busyloop. */ - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); if (!list_empty(&wb->b_more_io)) { inode = wb_inode(wb->b_more_io.prev); trace_wbc_writeback_wait(&wbc, wb->bdi); + spin_lock(&inode->i_lock); inode_wait_for_writeback(inode); + spin_unlock(&inode->i_lock); } - spin_unlock(&inode_lock); + spin_unlock(&inode_wb_list_lock); } return wrote; @@ -992,7 +1021,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) { struct super_block *sb = inode->i_sb; struct backing_dev_info *bdi = NULL; - bool wakeup_bdi = false; /* * Don't do this for I_DIRTY_PAGES - that doesn't actually @@ -1016,7 +1044,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) if (unlikely(block_dump)) block_dump___mark_inode_dirty(inode); - spin_lock(&inode_lock); + spin_lock(&inode->i_lock); if ((inode->i_state & flags) != flags) { const int was_dirty = inode->i_state & I_DIRTY; @@ -1028,7 +1056,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) * superblock list, based upon its state. */ if (inode->i_state & I_SYNC) - goto out; + goto out_unlock_inode; /* * Only add valid (hashed) inodes to the superblock's @@ -1036,16 +1064,17 @@ void __mark_inode_dirty(struct inode *inode, int flags) */ if (!S_ISBLK(inode->i_mode)) { if (inode_unhashed(inode)) - goto out; + goto out_unlock_inode; } if (inode->i_state & I_FREEING) - goto out; + goto out_unlock_inode; /* * If the inode was already on b_dirty/b_io/b_more_io, don't * reposition it (that would break b_dirty time-ordering). */ if (!was_dirty) { + bool wakeup_bdi = false; bdi = inode_to_bdi(inode); if (bdi_cap_writeback_dirty(bdi)) { @@ -1062,15 +1091,20 @@ void __mark_inode_dirty(struct inode *inode, int flags) wakeup_bdi = true; } + spin_unlock(&inode->i_lock); + spin_lock(&inode_wb_list_lock); inode->dirtied_when = jiffies; list_move(&inode->i_wb_list, &bdi->wb.b_dirty); + spin_unlock(&inode_wb_list_lock); + + if (wakeup_bdi) + bdi_wakeup_thread_delayed(bdi); + return; } } -out: - spin_unlock(&inode_lock); +out_unlock_inode: + spin_unlock(&inode->i_lock); - if (wakeup_bdi) - bdi_wakeup_thread_delayed(bdi); } EXPORT_SYMBOL(__mark_inode_dirty); @@ -1101,7 +1135,7 @@ static void wait_sb_inodes(struct super_block *sb) */ WARN_ON(!rwsem_is_locked(&sb->s_umount)); - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); /* * Data integrity sync. Must wait for all pages under writeback, @@ -1111,22 +1145,25 @@ static void wait_sb_inodes(struct super_block *sb) * we still have to wait for that writeout. */ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - struct address_space *mapping; + struct address_space *mapping = inode->i_mapping; - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) - continue; - mapping = inode->i_mapping; - if (mapping->nrpages == 0) + spin_lock(&inode->i_lock); + if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || + (mapping->nrpages == 0)) { + spin_unlock(&inode->i_lock); continue; + } __iget(inode); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_sb_list_lock); + /* - * We hold a reference to 'inode' so it couldn't have - * been removed from s_inodes list while we dropped the - * inode_lock. We cannot iput the inode now as we can - * be holding the last reference and we cannot iput it - * under inode_lock. So we keep the reference and iput - * it later. + * We hold a reference to 'inode' so it couldn't have been + * removed from s_inodes list while we dropped the + * inode_sb_list_lock. We cannot iput the inode now as we can + * be holding the last reference and we cannot iput it under + * inode_sb_list_lock. So we keep the reference and iput it + * later. */ iput(old_inode); old_inode = inode; @@ -1135,9 +1172,9 @@ static void wait_sb_inodes(struct super_block *sb) cond_resched(); - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); } - spin_unlock(&inode_lock); + spin_unlock(&inode_sb_list_lock); iput(old_inode); } @@ -1271,9 +1308,11 @@ int write_inode_now(struct inode *inode, int sync) wbc.nr_to_write = 0; might_sleep(); - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); + spin_lock(&inode->i_lock); ret = writeback_single_inode(inode, &wbc); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_wb_list_lock); if (sync) inode_sync_wait(inode); return ret; @@ -1295,9 +1334,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) { int ret; - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); + spin_lock(&inode->i_lock); ret = writeback_single_inode(inode, wbc); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_wb_list_lock); return ret; } EXPORT_SYMBOL(sync_inode); diff --git a/fs/inode.c b/fs/inode.c index 0b3da4a7770..05a1f75ae79 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -26,6 +26,38 @@ #include <linux/posix_acl.h> #include <linux/ima.h> #include <linux/cred.h> +#include "internal.h" + +/* + * inode locking rules. + * + * inode->i_lock protects: + * inode->i_state, inode->i_hash, __iget() + * inode_lru_lock protects: + * inode_lru, inode->i_lru + * inode_sb_list_lock protects: + * sb->s_inodes, inode->i_sb_list + * inode_wb_list_lock protects: + * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list + * inode_hash_lock protects: + * inode_hashtable, inode->i_hash + * + * Lock ordering: + * + * inode_sb_list_lock + * inode->i_lock + * inode_lru_lock + * + * inode_wb_list_lock + * inode->i_lock + * + * inode_hash_lock + * inode_sb_list_lock + * inode->i_lock + * + * iunique_lock + * inode_hash_lock + */ /* * This is needed for the following functions: @@ -60,6 +92,8 @@ static unsigned int i_hash_mask __read_mostly; static unsigned int i_hash_shift __read_mostly; +static struct hlist_head *inode_hashtable __read_mostly; +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); /* * Each inode can be on two separate lists. One is @@ -74,15 +108,10 @@ static unsigned int i_hash_shift __read_mostly; */ static LIST_HEAD(inode_lru); -static struct hlist_head *inode_hashtable __read_mostly; +static DEFINE_SPINLOCK(inode_lru_lock); -/* - * A simple spinlock to protect the list manipulations. - * - * NOTE! You also have to own the lock if you change - * the i_state of an inode while it is in use.. - */ -DEFINE_SPINLOCK(inode_lock); +__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); +__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); /* * iprune_sem provides exclusion between the icache shrinking and the @@ -137,15 +166,6 @@ int proc_nr_inodes(ctl_table *table, int write, } #endif -static void wake_up_inode(struct inode *inode) -{ - /* - * Prevent speculative execution through spin_unlock(&inode_lock); - */ - smp_mb(); - wake_up_bit(&inode->i_state, __I_NEW); -} - /** * inode_init_always - perform inode structure intialisation * @sb: superblock inode belongs to @@ -336,7 +356,7 @@ static void init_once(void *foo) } /* - * inode_lock must be held + * inode->i_lock must be held */ void __iget(struct inode *inode) { @@ -354,23 +374,22 @@ EXPORT_SYMBOL(ihold); static void inode_lru_list_add(struct inode *inode) { + spin_lock(&inode_lru_lock); if (list_empty(&inode->i_lru)) { list_add(&inode->i_lru, &inode_lru); inodes_stat.nr_unused++; } + spin_unlock(&inode_lru_lock); } static void inode_lru_list_del(struct inode *inode) { + spin_lock(&inode_lru_lock); if (!list_empty(&inode->i_lru)) { list_del_init(&inode->i_lru); inodes_stat.nr_unused--; } -} - -static inline void __inode_sb_list_add(struct inode *inode) -{ - list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); + spin_unlock(&inode_lru_lock); } /** @@ -379,15 +398,17 @@ static inline void __inode_sb_list_add(struct inode *inode) */ void inode_sb_list_add(struct inode *inode) { - spin_lock(&inode_lock); - __inode_sb_list_add(inode); - spin_unlock(&inode_lock); + spin_lock(&inode_sb_list_lock); + list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); + spin_unlock(&inode_sb_list_lock); } EXPORT_SYMBOL_GPL(inode_sb_list_add); -static inline void __inode_sb_list_del(struct inode *inode) +static inline void inode_sb_list_del(struct inode *inode) { + spin_lock(&inode_sb_list_lock); list_del_init(&inode->i_sb_list); + spin_unlock(&inode_sb_list_lock); } static unsigned long hash(struct super_block *sb, unsigned long hashval) @@ -412,24 +433,15 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) { struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); + spin_lock(&inode->i_lock); hlist_add_head(&inode->i_hash, b); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_hash_lock); } EXPORT_SYMBOL(__insert_inode_hash); /** - * __remove_inode_hash - remove an inode from the hash - * @inode: inode to unhash - * - * Remove an inode from the superblock. - */ -static void __remove_inode_hash(struct inode *inode) -{ - hlist_del_init(&inode->i_hash); -} - -/** * remove_inode_hash - remove an inode from the hash * @inode: inode to unhash * @@ -437,9 +449,11 @@ static void __remove_inode_hash(struct inode *inode) */ void remove_inode_hash(struct inode *inode) { - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); + spin_lock(&inode->i_lock); hlist_del_init(&inode->i_hash); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_hash_lock); } EXPORT_SYMBOL(remove_inode_hash); @@ -456,10 +470,29 @@ void end_writeback(struct inode *inode) } EXPORT_SYMBOL(end_writeback); +/* + * Free the inode passed in, removing it from the lists it is still connected + * to. We remove any pages still attached to the inode and wait for any IO that + * is still in progress before finally destroying the inode. + * + * An inode must already be marked I_FREEING so that we avoid the inode being + * moved back onto lists if we race with other code that manipulates the lists + * (e.g. writeback_single_inode). The caller is responsible for setting this. + * + * An inode must already be removed from the LRU list before being evicted from + * the cache. This should occur atomically with setting the I_FREEING state + * flag, so no inodes here should ever be on the LRU when being evicted. + */ static void evict(struct inode *inode) { const struct super_operations *op = inode->i_sb->s_op; + BUG_ON(!(inode->i_state & I_FREEING)); + BUG_ON(!list_empty(&inode->i_lru)); + + inode_wb_list_del(inode); + inode_sb_list_del(inode); + if (op->evict_inode) { op->evict_inode(inode); } else { @@ -471,6 +504,15 @@ static void evict(struct inode *inode) bd_forget(inode); if (S_ISCHR(inode->i_mode) && inode->i_cdev) cd_forget(inode); + + remove_inode_hash(inode); + + spin_lock(&inode->i_lock); + wake_up_bit(&inode->i_state, __I_NEW); + BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); + spin_unlock(&inode->i_lock); + + destroy_inode(inode); } /* @@ -489,14 +531,6 @@ static void dispose_list(struct list_head *head) list_del_init(&inode->i_lru); evict(inode); - - spin_lock(&inode_lock); - __remove_inode_hash(inode); - __inode_sb_list_del(inode); - spin_unlock(&inode_lock); - - wake_up_inode(inode); - destroy_inode(inode); } } @@ -514,25 +548,23 @@ void evict_inodes(struct super_block *sb) struct inode *inode, *next; LIST_HEAD(dispose); - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { if (atomic_read(&inode->i_count)) continue; - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) + + spin_lock(&inode->i_lock); + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { + spin_unlock(&inode->i_lock); continue; + } inode->i_state |= I_FREEING; - - /* - * Move the inode off the IO lists and LRU once I_FREEING is - * set so that it won't get moved back on there if it is dirty. - */ - list_move(&inode->i_lru, &dispose); - list_del_init(&inode->i_wb_list); - if (!(inode->i_state & (I_DIRTY | I_SYNC))) - inodes_stat.nr_unused--; + inode_lru_list_del(inode); + spin_unlock(&inode->i_lock); + list_add(&inode->i_lru, &dispose); } - spin_unlock(&inode_lock); + spin_unlock(&inode_sb_list_lock); dispose_list(&dispose); @@ -561,31 +593,30 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) struct inode *inode, *next; LIST_HEAD(dispose); - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) + spin_lock(&inode->i_lock); + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { + spin_unlock(&inode->i_lock); continue; + } if (inode->i_state & I_DIRTY && !kill_dirty) { + spin_unlock(&inode->i_lock); busy = 1; continue; } if (atomic_read(&inode->i_count)) { + spin_unlock(&inode->i_lock); busy = 1; continue; } inode->i_state |= I_FREEING; - - /* - * Move the inode off the IO lists and LRU once I_FREEING is - * set so that it won't get moved back on there if it is dirty. - */ - list_move(&inode->i_lru, &dispose); - list_del_init(&inode->i_wb_list); - if (!(inode->i_state & (I_DIRTY | I_SYNC))) - inodes_stat.nr_unused--; + inode_lru_list_del(inode); + spin_unlock(&inode->i_lock); + list_add(&inode->i_lru, &dispose); } - spin_unlock(&inode_lock); + spin_unlock(&inode_sb_list_lock); dispose_list(&dispose); @@ -607,7 +638,7 @@ static int can_unuse(struct inode *inode) /* * Scan `goal' inodes on the unused list for freeable ones. They are moved to a - * temporary list and then are freed outside inode_lock by dispose_list(). + * temporary list and then are freed outside inode_lru_lock by dispose_list(). * * Any inodes which are pinned purely because of attached pagecache have their * pagecache removed. If the inode has metadata buffers attached to @@ -628,7 +659,7 @@ static void prune_icache(int nr_to_scan) unsigned long reap = 0; down_read(&iprune_sem); - spin_lock(&inode_lock); + spin_lock(&inode_lru_lock); for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { struct inode *inode; @@ -638,53 +669,67 @@ static void prune_icache(int nr_to_scan) inode = list_entry(inode_lru.prev, struct inode, i_lru); /* + * we are inverting the inode_lru_lock/inode->i_lock here, + * so use a trylock. If we fail to get the lock, just move the + * inode to the back of the list so we don't spin on it. + */ + if (!spin_trylock(&inode->i_lock)) { + list_move(&inode->i_lru, &inode_lru); + continue; + } + + /* * Referenced or dirty inodes are still in use. Give them * another pass through the LRU as we canot reclaim them now. */ if (atomic_read(&inode->i_count) || (inode->i_state & ~I_REFERENCED)) { list_del_init(&inode->i_lru); + spin_unlock(&inode->i_lock); inodes_stat.nr_unused--; continue; } /* recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { - list_move(&inode->i_lru, &inode_lru); inode->i_state &= ~I_REFERENCED; + list_move(&inode->i_lru, &inode_lru); + spin_unlock(&inode->i_lock); continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { __iget(inode); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_lru_lock); if (remove_inode_buffers(inode)) reap += invalidate_mapping_pages(&inode->i_data, 0, -1); iput(inode); - spin_lock(&inode_lock); + spin_lock(&inode_lru_lock); if (inode != list_entry(inode_lru.next, struct inode, i_lru)) continue; /* wrong inode or list_empty */ - if (!can_unuse(inode)) + /* avoid lock inversions with trylock */ + if (!spin_trylock(&inode->i_lock)) + continue; + if (!can_unuse(inode)) { + spin_unlock(&inode->i_lock); continue; + } } WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; + spin_unlock(&inode->i_lock); - /* - * Move the inode off the IO lists and LRU once I_FREEING is - * set so that it won't get moved back on there if it is dirty. - */ list_move(&inode->i_lru, &freeable); - list_del_init(&inode->i_wb_list); inodes_stat.nr_unused--; } if (current_is_kswapd()) __count_vm_events(KSWAPD_INODESTEAL, reap); else __count_vm_events(PGINODESTEAL, reap); - spin_unlock(&inode_lock); + spin_unlock(&inode_lru_lock); dispose_list(&freeable); up_read(&iprune_sem); @@ -733,15 +778,21 @@ static struct inode *find_inode(struct super_block *sb, repeat: hlist_for_each_entry(inode, node, head, i_hash) { - if (inode->i_sb != sb) + spin_lock(&inode->i_lock); + if (inode->i_sb != sb) { + spin_unlock(&inode->i_lock); continue; - if (!test(inode, data)) + } + if (!test(inode, data)) { + spin_unlock(&inode->i_lock); continue; + } if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } __iget(inode); + spin_unlock(&inode->i_lock); return inode; } return NULL; @@ -759,15 +810,21 @@ static struct inode *find_inode_fast(struct super_block *sb, repeat: hlist_for_each_entry(inode, node, head, i_hash) { - if (inode->i_ino != ino) + spin_lock(&inode->i_lock); + if (inode->i_ino != ino) { + spin_unlock(&inode->i_lock); continue; - if (inode->i_sb != sb) + } + if (inode->i_sb != sb) { + spin_unlock(&inode->i_lock); continue; + } if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } __iget(inode); + spin_unlock(&inode->i_lock); return inode; } return NULL; @@ -827,19 +884,26 @@ struct inode *new_inode(struct super_block *sb) { struct inode *inode; - spin_lock_prefetch(&inode_lock); + spin_lock_prefetch(&inode_sb_list_lock); inode = alloc_inode(sb); if (inode) { - spin_lock(&inode_lock); - __inode_sb_list_add(inode); + spin_lock(&inode->i_lock); inode->i_state = 0; - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + inode_sb_list_add(inode); } return inode; } EXPORT_SYMBOL(new_inode); +/** + * unlock_new_inode - clear the I_NEW state and wake up any waiters + * @inode: new inode to unlock + * + * Called when the inode is fully initialised to clear the new state of the + * inode and wake up anyone waiting for the inode to finish initialisation. + */ void unlock_new_inode(struct inode *inode) { #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -859,51 +923,67 @@ void unlock_new_inode(struct inode *inode) } } #endif - /* - * This is special! We do not need the spinlock when clearing I_NEW, - * because we're guaranteed that nobody else tries to do anything about - * the state of the inode when it is locked, as we just created it (so - * there can be no old holders that haven't tested I_NEW). - * However we must emit the memory barrier so that other CPUs reliably - * see the clearing of I_NEW after the other inode initialisation has - * completed. - */ - smp_mb(); + spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW; - wake_up_inode(inode); + wake_up_bit(&inode->i_state, __I_NEW); + spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(unlock_new_inode); -/* - * This is called without the inode lock held.. Be careful. +/** + * iget5_locked - obtain an inode from a mounted file system + * @sb: super block of file system + * @hashval: hash value (usually inode number) to get + * @test: callback used for comparisons between inodes + * @set: callback used to initialize a new struct inode + * @data: opaque data pointer to pass to @test and @set + * + * Search for the inode specified by @hashval and @data in the inode cache, + * and if present it is return it with an increased reference count. This is + * a generalized version of iget_locked() for file systems where the inode + * number is not sufficient for unique identification of an inode. * - * We no longer cache the sb_flags in i_flags - see fs.h - * -- rmk@arm.uk.linux.org + * If the inode is not in cache, allocate a new inode and return it locked, + * hashed, and with the I_NEW flag set. The file system gets to fill it in + * before unlocking it via unlock_new_inode(). + * + * Note both @test and @set are called with the inode_hash_lock held, so can't + * sleep. */ -static struct inode *get_new_inode(struct super_block *sb, - struct hlist_head *head, - int (*test)(struct inode *, void *), - int (*set)(struct inode *, void *), - void *data) +struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *data) { + struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; + spin_lock(&inode_hash_lock); + inode = find_inode(sb, head, test, data); + spin_unlock(&inode_hash_lock); + + if (inode) { + wait_on_inode(inode); + return inode; + } + inode = alloc_inode(sb); if (inode) { struct inode *old; - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); /* We released the lock, so.. */ old = find_inode(sb, head, test, data); if (!old) { if (set(inode, data)) goto set_failed; - hlist_add_head(&inode->i_hash, head); - __inode_sb_list_add(inode); + spin_lock(&inode->i_lock); inode->i_state = I_NEW; - spin_unlock(&inode_lock); + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode->i_lock); + inode_sb_list_add(inode); + spin_unlock(&inode_hash_lock); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents @@ -916,7 +996,7 @@ static struct inode *get_new_inode(struct super_block *sb, * us. Use the old inode instead of the one we just * allocated. */ - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); destroy_inode(inode); inode = old; wait_on_inode(inode); @@ -924,33 +1004,53 @@ static struct inode *get_new_inode(struct super_block *sb, return inode; set_failed: - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); destroy_inode(inode); return NULL; } +EXPORT_SYMBOL(iget5_locked); -/* - * get_new_inode_fast is the fast path version of get_new_inode, see the - * comment at iget_locked for details. +/** + * iget_locked - obtain an inode from a mounted file system + * @sb: super block of file system + * @ino: inode number to get + * + * Search for the inode specified by @ino in the inode cache and if present + * return it with an increased reference count. This is for file systems + * where the inode number is sufficient for unique identification of an inode. + * + * If the inode is not in cache, allocate a new inode and return it locked, + * hashed, and with the I_NEW flag set. The file system gets to fill it in + * before unlocking it via unlock_new_inode(). */ -static struct inode *get_new_inode_fast(struct super_block *sb, - struct hlist_head *head, unsigned long ino) +struct inode *iget_locked(struct super_block *sb, unsigned long ino) { + struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; + spin_lock(&inode_hash_lock); + inode = find_inode_fast(sb, head, ino); + spin_unlock(&inode_hash_lock); + if (inode) { + wait_on_inode(inode); + return inode; + } + inode = alloc_inode(sb); if (inode) { struct inode *old; - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); /* We released the lock, so.. */ old = find_inode_fast(sb, head, ino); if (!old) { inode->i_ino = ino; - hlist_add_head(&inode->i_hash, head); - __inode_sb_list_add(inode); + spin_lock(&inode->i_lock); inode->i_state = I_NEW; - spin_unlock(&inode_lock); + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode->i_lock); + inode_sb_list_add(inode); + spin_unlock(&inode_hash_lock); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents @@ -963,13 +1063,14 @@ static struct inode *get_new_inode_fast(struct super_block *sb, * us. Use the old inode instead of the one we just * allocated. */ - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); destroy_inode(inode); inode = old; wait_on_inode(inode); } return inode; } +EXPORT_SYMBOL(iget_locked); /* * search the inode cache for a matching inode number. @@ -984,10 +1085,14 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino) struct hlist_node *node; struct inode *inode; + spin_lock(&inode_hash_lock); hlist_for_each_entry(inode, node, b, i_hash) { - if (inode->i_ino == ino && inode->i_sb == sb) + if (inode->i_ino == ino && inode->i_sb == sb) { + spin_unlock(&inode_hash_lock); return 0; + } } + spin_unlock(&inode_hash_lock); return 1; } @@ -1017,7 +1122,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) static unsigned int counter; ino_t res; - spin_lock(&inode_lock); spin_lock(&iunique_lock); do { if (counter <= max_reserved) @@ -1025,7 +1129,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) res = counter++; } while (!test_inode_iunique(sb, res)); spin_unlock(&iunique_lock); - spin_unlock(&inode_lock); return res; } @@ -1033,116 +1136,50 @@ EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { - spin_lock(&inode_lock); - if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) + spin_lock(&inode->i_lock); + if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { __iget(inode); - else + spin_unlock(&inode->i_lock); + } else { + spin_unlock(&inode->i_lock); /* * Handle the case where s_op->clear_inode is not been * called yet, and somebody is calling igrab * while the inode is getting freed. */ inode = NULL; - spin_unlock(&inode_lock); + } return inode; } EXPORT_SYMBOL(igrab); /** - * ifind - internal function, you want ilookup5() or iget5(). - * @sb: super block of file system to search - * @head: the head of the list to search - * @test: callback used for comparisons between inodes - * @data: opaque data pointer to pass to @test - * @wait: if true wait for the inode to be unlocked, if false do not - * - * ifind() searches for the inode specified by @data in the inode - * cache. This is a generalized version of ifind_fast() for file systems where - * the inode number is not sufficient for unique identification of an inode. - * - * If the inode is in the cache, the inode is returned with an incremented - * reference count. - * - * Otherwise NULL is returned. - * - * Note, @test is called with the inode_lock held, so can't sleep. - */ -static struct inode *ifind(struct super_block *sb, - struct hlist_head *head, int (*test)(struct inode *, void *), - void *data, const int wait) -{ - struct inode *inode; - - spin_lock(&inode_lock); - inode = find_inode(sb, head, test, data); - if (inode) { - spin_unlock(&inode_lock); - if (likely(wait)) - wait_on_inode(inode); - return inode; - } - spin_unlock(&inode_lock); - return NULL; -} - -/** - * ifind_fast - internal function, you want ilookup() or iget(). - * @sb: super block of file system to search - * @head: head of the list to search - * @ino: inode number to search for - * - * ifind_fast() searches for the inode @ino in the inode cache. This is for - * file systems where the inode number is sufficient for unique identification - * of an inode. - * - * If the inode is in the cache, the inode is returned with an incremented - * reference count. - * - * Otherwise NULL is returned. - */ -static struct inode *ifind_fast(struct super_block *sb, - struct hlist_head *head, unsigned long ino) -{ - struct inode *inode; - - spin_lock(&inode_lock); - inode = find_inode_fast(sb, head, ino); - if (inode) { - spin_unlock(&inode_lock); - wait_on_inode(inode); - return inode; - } - spin_unlock(&inode_lock); - return NULL; -} - -/** * ilookup5_nowait - search for an inode in the inode cache * @sb: super block of file system to search * @hashval: hash value (usually inode number) to search for * @test: callback used for comparisons between inodes * @data: opaque data pointer to pass to @test * - * ilookup5() uses ifind() to search for the inode specified by @hashval and - * @data in the inode cache. This is a generalized version of ilookup() for - * file systems where the inode number is not sufficient for unique - * identification of an inode. - * + * Search for the inode specified by @hashval and @data in the inode cache. * If the inode is in the cache, the inode is returned with an incremented - * reference count. Note, the inode lock is not waited upon so you have to be - * very careful what you do with the returned inode. You probably should be - * using ilookup5() instead. + * reference count. * - * Otherwise NULL is returned. + * Note: I_NEW is not waited upon so you have to be very careful what you do + * with the returned inode. You probably should be using ilookup5() instead. * - * Note, @test is called with the inode_lock held, so can't sleep. + * Note: @test is called with the inode_hash_lock held, so can't sleep. */ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { struct hlist_head *head = inode_hashtable + hash(sb, hashval); + struct inode *inode; + + spin_lock(&inode_hash_lock); + inode = find_inode(sb, head, test, data); + spin_unlock(&inode_hash_lock); - return ifind(sb, head, test, data, 0); + return inode; } EXPORT_SYMBOL(ilookup5_nowait); @@ -1153,24 +1190,24 @@ EXPORT_SYMBOL(ilookup5_nowait); * @test: callback used for comparisons between inodes * @data: opaque data pointer to pass to @test * - * ilookup5() uses ifind() to search for the inode specified by @hashval and - * @data in the inode cache. This is a generalized version of ilookup() for - * file systems where the inode number is not sufficient for unique - * identification of an inode. - * - * If the inode is in the cache, the inode lock is waited upon and the inode is + * Search for the inode specified by @hashval and @data in the inode cache, + * and if the inode is in the cache, return the inode with an incremented + * reference count. Waits on I_NEW before returning the inode. * returned with an incremented reference count. * - * Otherwise NULL is returned. + * This is a generalized version of ilookup() for file systems where the + * inode number is not sufficient for unique identification of an inode. * - * Note, @test is called with the inode_lock held, so can't sleep. + * Note: @test is called with the inode_hash_lock held, so can't sleep. */ struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { - struct hlist_head *head = inode_hashtable + hash(sb, hashval); + struct inode *inode = ilookup5_nowait(sb, hashval, test, data); - return ifind(sb, head, test, data, 1); + if (inode) + wait_on_inode(inode); + return inode; } EXPORT_SYMBOL(ilookup5); @@ -1179,91 +1216,23 @@ EXPORT_SYMBOL(ilookup5); * @sb: super block of file system to search * @ino: inode number to search for * - * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. - * This is for file systems where the inode number is sufficient for unique - * identification of an inode. - * - * If the inode is in the cache, the inode is returned with an incremented - * reference count. - * - * Otherwise NULL is returned. + * Search for the inode @ino in the inode cache, and if the inode is in the + * cache, the inode is returned with an incremented reference count. */ struct inode *ilookup(struct super_block *sb, unsigned long ino) { struct hlist_head *head = inode_hashtable + hash(sb, ino); - - return ifind_fast(sb, head, ino); -} -EXPORT_SYMBOL(ilookup); - -/** - * iget5_locked - obtain an inode from a mounted file system - * @sb: super block of file system - * @hashval: hash value (usually inode number) to get - * @test: callback used for comparisons between inodes - * @set: callback used to initialize a new struct inode - * @data: opaque data pointer to pass to @test and @set - * - * iget5_locked() uses ifind() to search for the inode specified by @hashval - * and @data in the inode cache and if present it is returned with an increased - * reference count. This is a generalized version of iget_locked() for file - * systems where the inode number is not sufficient for unique identification - * of an inode. - * - * If the inode is not in cache, get_new_inode() is called to allocate a new - * inode and this is returned locked, hashed, and with the I_NEW flag set. The - * file system gets to fill it in before unlocking it via unlock_new_inode(). - * - * Note both @test and @set are called with the inode_lock held, so can't sleep. - */ -struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, - int (*test)(struct inode *, void *), - int (*set)(struct inode *, void *), void *data) -{ - struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; - inode = ifind(sb, head, test, data, 1); - if (inode) - return inode; - /* - * get_new_inode() will do the right thing, re-trying the search - * in case it had to block at any point. - */ - return get_new_inode(sb, head, test, set, data); -} -EXPORT_SYMBOL(iget5_locked); - -/** - * iget_locked - obtain an inode from a mounted file system - * @sb: super block of file system - * @ino: inode number to get - * - * iget_locked() uses ifind_fast() to search for the inode specified by @ino in - * the inode cache and if present it is returned with an increased reference - * count. This is for file systems where the inode number is sufficient for - * unique identification of an inode. - * - * If the inode is not in cache, get_new_inode_fast() is called to allocate a - * new inode and this is returned locked, hashed, and with the I_NEW flag set. - * The file system gets to fill it in before unlocking it via - * unlock_new_inode(). - */ -struct inode *iget_locked(struct super_block *sb, unsigned long ino) -{ - struct hlist_head *head = inode_hashtable + hash(sb, ino); - struct inode *inode; + spin_lock(&inode_hash_lock); + inode = find_inode_fast(sb, head, ino); + spin_unlock(&inode_hash_lock); - inode = ifind_fast(sb, head, ino); if (inode) - return inode; - /* - * get_new_inode_fast() will do the right thing, re-trying the search - * in case it had to block at any point. - */ - return get_new_inode_fast(sb, head, ino); + wait_on_inode(inode); + return inode; } -EXPORT_SYMBOL(iget_locked); +EXPORT_SYMBOL(ilookup); int insert_inode_locked(struct inode *inode) { @@ -1271,27 +1240,33 @@ int insert_inode_locked(struct inode *inode) ino_t ino = inode->i_ino; struct hlist_head *head = inode_hashtable + hash(sb, ino); - inode->i_state |= I_NEW; while (1) { struct hlist_node *node; struct inode *old = NULL; - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); hlist_for_each_entry(old, node, head, i_hash) { if (old->i_ino != ino) continue; if (old->i_sb != sb) continue; - if (old->i_state & (I_FREEING|I_WILL_FREE)) + spin_lock(&old->i_lock); + if (old->i_state & (I_FREEING|I_WILL_FREE)) { + spin_unlock(&old->i_lock); continue; + } break; } if (likely(!node)) { + spin_lock(&inode->i_lock); + inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_hash_lock); return 0; } __iget(old); - spin_unlock(&inode_lock); + spin_unlock(&old->i_lock); + spin_unlock(&inode_hash_lock); wait_on_inode(old); if (unlikely(!inode_unhashed(old))) { iput(old); @@ -1308,29 +1283,34 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, struct super_block *sb = inode->i_sb; struct hlist_head *head = inode_hashtable + hash(sb, hashval); - inode->i_state |= I_NEW; - while (1) { struct hlist_node *node; struct inode *old = NULL; - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); hlist_for_each_entry(old, node, head, i_hash) { if (old->i_sb != sb) continue; if (!test(old, data)) continue; - if (old->i_state & (I_FREEING|I_WILL_FREE)) + spin_lock(&old->i_lock); + if (old->i_state & (I_FREEING|I_WILL_FREE)) { + spin_unlock(&old->i_lock); continue; + } break; } if (likely(!node)) { + spin_lock(&inode->i_lock); + inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_hash_lock); return 0; } __iget(old); - spin_unlock(&inode_lock); + spin_unlock(&old->i_lock); + spin_unlock(&inode_hash_lock); wait_on_inode(old); if (unlikely(!inode_unhashed(old))) { iput(old); @@ -1375,47 +1355,35 @@ static void iput_final(struct inode *inode) const struct super_operations *op = inode->i_sb->s_op; int drop; + WARN_ON(inode->i_state & I_NEW); + if (op && op->drop_inode) drop = op->drop_inode(inode); else drop = generic_drop_inode(inode); + if (!drop && (sb->s_flags & MS_ACTIVE)) { + inode->i_state |= I_REFERENCED; + if (!(inode->i_state & (I_DIRTY|I_SYNC))) + inode_lru_list_add(inode); + spin_unlock(&inode->i_lock); + return; + } + if (!drop) { - if (sb->s_flags & MS_ACTIVE) { - inode->i_state |= I_REFERENCED; - if (!(inode->i_state & (I_DIRTY|I_SYNC))) { - inode_lru_list_add(inode); - } - spin_unlock(&inode_lock); - return; - } - WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_WILL_FREE; - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); write_inode_now(inode, 1); - spin_lock(&inode_lock); + spin_lock(&inode->i_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; - __remove_inode_hash(inode); } - WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - - /* - * Move the inode off the IO lists and LRU once I_FREEING is - * set so that it won't get moved back on there if it is dirty. - */ inode_lru_list_del(inode); - list_del_init(&inode->i_wb_list); + spin_unlock(&inode->i_lock); - __inode_sb_list_del(inode); - spin_unlock(&inode_lock); evict(inode); - remove_inode_hash(inode); - wake_up_inode(inode); - BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); - destroy_inode(inode); } /** @@ -1432,7 +1400,7 @@ void iput(struct inode *inode) if (inode) { BUG_ON(inode->i_state & I_CLEAR); - if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) + if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) iput_final(inode); } } @@ -1611,9 +1579,8 @@ EXPORT_SYMBOL(inode_wait); * to recheck inode state. * * It doesn't matter if I_NEW is not set initially, a call to - * wake_up_inode() after removing from the hash list will DTRT. - * - * This is called with inode_lock held. + * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list + * will DTRT. */ static void __wait_on_freeing_inode(struct inode *inode) { @@ -1621,10 +1588,11 @@ static void __wait_on_freeing_inode(struct inode *inode) DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); wq = bit_waitqueue(&inode->i_state, __I_NEW); prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_hash_lock); schedule(); finish_wait(wq, &wait.wait); - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); } static __initdata unsigned long ihash_entries; diff --git a/fs/internal.h b/fs/internal.h index 8318059b42c..b29c46e4e32 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -125,6 +125,13 @@ extern long do_handle_open(int mountdirfd, /* * inode.c */ +extern spinlock_t inode_sb_list_lock; + +/* + * fs-writeback.c + */ +extern void inode_wb_list_del(struct inode *inode); + extern int get_nr_dirty_inodes(void); extern void evict_inodes(struct super_block *); extern int invalidate_inodes(struct super_block *, bool); diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 03b8c240aed..edfea7a3a74 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -293,7 +293,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc) return ret; } -/* called with inode_lock held */ +/* called with inode->i_lock held */ static int logfs_drop_inode(struct inode *inode) { struct logfs_super *super = logfs_super(inode->i_sb); diff --git a/fs/namei.c b/fs/namei.c index d0066e17d45..3cb616d38d9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -992,6 +992,12 @@ int follow_down_one(struct path *path) return 0; } +static inline bool managed_dentry_might_block(struct dentry *dentry) +{ + return (dentry->d_flags & DCACHE_MANAGE_TRANSIT && + dentry->d_op->d_manage(dentry, true) < 0); +} + /* * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we * meet a managed dentry and we're not walking to "..". True is returned to @@ -1000,19 +1006,26 @@ int follow_down_one(struct path *path) static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, struct inode **inode, bool reverse_transit) { - while (d_mountpoint(path->dentry)) { + for (;;) { struct vfsmount *mounted; - if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && - !reverse_transit && - path->dentry->d_op->d_manage(path->dentry, true) < 0) + /* + * Don't forget we might have a non-mountpoint managed dentry + * that wants to block transit. + */ + *inode = path->dentry->d_inode; + if (!reverse_transit && + unlikely(managed_dentry_might_block(path->dentry))) return false; + + if (!d_mountpoint(path->dentry)) + break; + mounted = __lookup_mnt(path->mnt, path->dentry, 1); if (!mounted) break; path->mnt = mounted; path->dentry = mounted->mnt_root; nd->seq = read_seqcount_begin(&path->dentry->d_seq); - *inode = path->dentry->d_inode; } if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 4c29fcf557d..07ea8d3e6ea 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -22,13 +22,14 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/spinlock.h> -#include <linux/writeback.h> /* for inode_lock */ #include <asm/atomic.h> #include <linux/fsnotify_backend.h> #include "fsnotify.h" +#include "../internal.h" + /* * Recalculate the mask of events relevant to a given inode locked. */ @@ -237,15 +238,14 @@ out: * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. * @list: list of inodes being unmounted (sb->s_inodes) * - * Called with inode_lock held, protecting the unmounting super block's list - * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. - * We temporarily drop inode_lock, however, and CAN block. + * Called during unmount with no locks held, so needs to be safe against + * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block. */ void fsnotify_unmount_inodes(struct list_head *list) { struct inode *inode, *next_i, *need_iput = NULL; - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); list_for_each_entry_safe(inode, next_i, list, i_sb_list) { struct inode *need_iput_tmp; @@ -254,8 +254,11 @@ void fsnotify_unmount_inodes(struct list_head *list) * I_WILL_FREE, or I_NEW which is fine because by that point * the inode cannot have any associated watches. */ - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) + spin_lock(&inode->i_lock); + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { + spin_unlock(&inode->i_lock); continue; + } /* * If i_count is zero, the inode cannot have any watches and @@ -263,8 +266,10 @@ void fsnotify_unmount_inodes(struct list_head *list) * evict all inodes with zero i_count from icache which is * unnecessarily violent and may in fact be illegal to do. */ - if (!atomic_read(&inode->i_count)) + if (!atomic_read(&inode->i_count)) { + spin_unlock(&inode->i_lock); continue; + } need_iput_tmp = need_iput; need_iput = NULL; @@ -274,22 +279,25 @@ void fsnotify_unmount_inodes(struct list_head *list) __iget(inode); else need_iput_tmp = NULL; + spin_unlock(&inode->i_lock); /* In case the dropping of a reference would nuke next_i. */ if ((&next_i->i_sb_list != list) && - atomic_read(&next_i->i_count) && - !(next_i->i_state & (I_FREEING | I_WILL_FREE))) { - __iget(next_i); - need_iput = next_i; + atomic_read(&next_i->i_count)) { + spin_lock(&next_i->i_lock); + if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { + __iget(next_i); + need_iput = next_i; + } + spin_unlock(&next_i->i_lock); } /* - * We can safely drop inode_lock here because we hold + * We can safely drop inode_sb_list_lock here because we hold * references on both inode and next_i. Also no new inodes - * will be added since the umount has begun. Finally, - * iprune_mutex keeps shrink_icache_memory() away. + * will be added since the umount has begun. */ - spin_unlock(&inode_lock); + spin_unlock(&inode_sb_list_lock); if (need_iput_tmp) iput(need_iput_tmp); @@ -301,7 +309,7 @@ void fsnotify_unmount_inodes(struct list_head *list) iput(inode); - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); } - spin_unlock(&inode_lock); + spin_unlock(&inode_sb_list_lock); } diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 325185e514b..50c00856f73 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -91,7 +91,6 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/srcu.h> -#include <linux/writeback.h> /* for inode_lock */ #include <asm/atomic.h> diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index 85eebff6d0d..e86577d6c5c 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -23,7 +23,6 @@ #include <linux/mount.h> #include <linux/mutex.h> #include <linux/spinlock.h> -#include <linux/writeback.h> /* for inode_lock */ #include <asm/atomic.h> diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index a627ed82c0a..0b56c6b7ec0 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -54,7 +54,7 @@ * * Return 1 if the attributes match and 0 if not. * - * NOTE: This function runs with the inode_lock spin lock held so it is not + * NOTE: This function runs with the inode->i_lock spin lock held so it is not * allowed to sleep. */ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) @@ -98,7 +98,7 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) * * Return 0 on success and -errno on error. * - * NOTE: This function runs with the inode_lock spin lock held so it is not + * NOTE: This function runs with the inode->i_lock spin lock held so it is not * allowed to sleep. (Hence the GFP_ATOMIC allocation.) */ static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index a2a622e079f..fcc8ae75d87 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -76,7 +76,7 @@ #include <linux/buffer_head.h> #include <linux/capability.h> #include <linux/quotaops.h> -#include <linux/writeback.h> /* for inode_lock, oddly enough.. */ +#include "../internal.h" /* ugh */ #include <asm/uaccess.h> @@ -900,33 +900,38 @@ static void add_dquot_ref(struct super_block *sb, int type) int reserved = 0; #endif - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) + spin_lock(&inode->i_lock); + if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || + !atomic_read(&inode->i_writecount) || + !dqinit_needed(inode, type)) { + spin_unlock(&inode->i_lock); continue; + } #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif - if (!atomic_read(&inode->i_writecount)) - continue; - if (!dqinit_needed(inode, type)) - continue; - __iget(inode); - spin_unlock(&inode_lock); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_sb_list_lock); iput(old_inode); __dquot_initialize(inode, type); - /* We hold a reference to 'inode' so it couldn't have been - * removed from s_inodes list while we dropped the inode_lock. - * We cannot iput the inode now as we can be holding the last - * reference and we cannot iput it under inode_lock. So we - * keep the reference and iput it later. */ + + /* + * We hold a reference to 'inode' so it couldn't have been + * removed from s_inodes list while we dropped the + * inode_sb_list_lock We cannot iput the inode now as we can be + * holding the last reference and we cannot iput it under + * inode_sb_list_lock. So we keep the reference and iput it + * later. + */ old_inode = inode; - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); } - spin_unlock(&inode_lock); + spin_unlock(&inode_sb_list_lock); iput(old_inode); #ifdef CONFIG_QUOTA_DEBUG @@ -1007,7 +1012,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, struct inode *inode; int reserved = 0; - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { /* * We have to scan also I_NEW inodes because they can already @@ -1021,7 +1026,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, remove_inode_dquot_ref(inode, type, tofree_head); } } - spin_unlock(&inode_lock); + spin_unlock(&inode_sb_list_lock); #ifdef CONFIG_QUOTA_DEBUG if (reserved) { printk(KERN_WARNING "VFS (%s): Writes happened after quota" diff --git a/include/drm/drm.h b/include/drm/drm.h index 9ac43139617..4be33b4ca2f 100644 --- a/include/drm/drm.h +++ b/include/drm/drm.h @@ -463,12 +463,15 @@ struct drm_irq_busid { enum drm_vblank_seq_type { _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */ _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ + /* bits 1-6 are reserved for high crtcs */ + _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e, _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */ _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */ }; +#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1 #define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) #define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \ @@ -753,6 +756,7 @@ struct drm_event_vblank { }; #define DRM_CAP_DUMB_BUFFER 0x1 +#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 /* typedef area */ #ifndef __KERNEL__ diff --git a/include/linux/fs.h b/include/linux/fs.h index ce7e1855519..b677bd77f2d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1636,7 +1636,7 @@ struct super_operations { }; /* - * Inode state bits. Protected by inode_lock. + * Inode state bits. Protected by inode->i_lock * * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, * I_DIRTY_DATASYNC and I_DIRTY_PAGES. diff --git a/include/linux/mm.h b/include/linux/mm.h index f9535b2c955..7606d7db96c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -861,7 +861,7 @@ extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) /* - * Flags passed to __show_mem() and __show_free_areas() to suppress output in + * Flags passed to show_mem() and __show_free_areas() to suppress output in * various contexts. */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* filter disallowed nodes */ @@ -1360,8 +1360,7 @@ extern void setup_per_zone_wmarks(void); extern void calculate_zone_inactive_ratio(struct zone *zone); extern void mem_init(void); extern void __init mmap_init(void); -extern void show_mem(void); -extern void __show_mem(unsigned int flags); +extern void show_mem(unsigned int flags); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern int after_bootmem; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index eb354f6f26b..26f9e3612e0 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -277,7 +277,7 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr) /* * Mark inode fully dirty. Since we are allocating blocks, inode * would become fully dirty soon anyway and it reportedly - * reduces inode_lock contention. + * reduces lock contention. */ mark_inode_dirty(inode); } diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 0ead399e08b..17e7ccc322a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -9,7 +9,7 @@ struct backing_dev_info; -extern spinlock_t inode_lock; +extern spinlock_t inode_wb_list_lock; /* * fs/fs-writeback.c diff --git a/lib/show_mem.c b/lib/show_mem.c index d8d602b58c3..90cbe4bb596 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -9,7 +9,7 @@ #include <linux/nmi.h> #include <linux/quicklist.h> -void __show_mem(unsigned int filter) +void show_mem(unsigned int filter) { pg_data_t *pgdat; unsigned long total = 0, reserved = 0, shared = 0, @@ -61,8 +61,3 @@ void __show_mem(unsigned int filter) quicklist_total_size()); #endif } - -void show_mem(void) -{ - __show_mem(0); -} diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8fe9d340792..0d9a036ada6 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -67,14 +67,14 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) struct inode *inode; nr_wb = nr_dirty = nr_io = nr_more_io = 0; - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); list_for_each_entry(inode, &wb->b_dirty, i_wb_list) nr_dirty++; list_for_each_entry(inode, &wb->b_io, i_wb_list) nr_io++; list_for_each_entry(inode, &wb->b_more_io, i_wb_list) nr_more_io++; - spin_unlock(&inode_lock); + spin_unlock(&inode_wb_list_lock); global_dirty_limits(&background_thresh, &dirty_thresh); bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); @@ -676,11 +676,11 @@ void bdi_destroy(struct backing_dev_info *bdi) if (bdi_has_dirty_io(bdi)) { struct bdi_writeback *dst = &default_backing_dev_info.wb; - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); list_splice(&bdi->wb.b_dirty, &dst->b_dirty); list_splice(&bdi->wb.b_io, &dst->b_io); list_splice(&bdi->wb.b_more_io, &dst->b_more_io); - spin_unlock(&inode_lock); + spin_unlock(&inode_wb_list_lock); } bdi_unregister(bdi); diff --git a/mm/filemap.c b/mm/filemap.c index 04d1992fd86..c641edf553a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -80,8 +80,8 @@ * ->i_mutex * ->i_alloc_sem (various) * - * ->inode_lock - * ->sb_lock (fs/fs-writeback.c) + * inode_wb_list_lock + * sb_lock (fs/fs-writeback.c) * ->mapping->tree_lock (__sync_single_inode) * * ->i_mmap_lock @@ -98,8 +98,10 @@ * ->zone.lru_lock (check_pte_range->isolate_lru_page) * ->private_lock (page_remove_rmap->set_page_dirty) * ->tree_lock (page_remove_rmap->set_page_dirty) - * ->inode_lock (page_remove_rmap->set_page_dirty) - * ->inode_lock (zap_pte_range->set_page_dirty) + * inode_wb_list_lock (page_remove_rmap->set_page_dirty) + * ->inode->i_lock (page_remove_rmap->set_page_dirty) + * inode_wb_list_lock (zap_pte_range->set_page_dirty) + * ->inode->i_lock (zap_pte_range->set_page_dirty) * ->private_lock (zap_pte_range->__set_page_dirty_buffers) * * (code doesn't rely on that order, so you could switch it around) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 62a5cec08a1..6a819d1b2c7 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -406,7 +406,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, task_unlock(current); dump_stack(); mem_cgroup_print_oom_info(mem, p); - __show_mem(SHOW_MEM_FILTER_NODES); + show_mem(SHOW_MEM_FILTER_NODES); if (sysctl_oom_dump_tasks) dump_tasks(mem, nodemask); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8e5726ab0d8..d6e7ba7373b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2195,7 +2195,7 @@ nopage: current->comm, order, gfp_mask); dump_stack(); if (!should_suppress_show_mem()) - __show_mem(filter); + show_mem(filter); } return page; got_pg: diff --git a/mm/rmap.c b/mm/rmap.c index 4a8e99a0fb9..8da044a1db0 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -31,11 +31,12 @@ * swap_lock (in swap_duplicate, swap_info_get) * mmlist_lock (in mmput, drain_mmlist and others) * mapping->private_lock (in __set_page_dirty_buffers) - * inode_lock (in set_page_dirty's __mark_inode_dirty) + * inode->i_lock (in set_page_dirty's __mark_inode_dirty) + * inode_wb_list_lock (in set_page_dirty's __mark_inode_dirty) * sb_lock (within inode_lock in fs/fs-writeback.c) * mapping->tree_lock (widely used, in set_page_dirty, * in arch-dependent flush_dcache_mmap_lock, - * within inode_lock in __sync_single_inode) + * within inode_wb_list_lock in __sync_single_inode) * * (code doesn't rely on that order so it could be switched around) * ->tasklist_lock diff --git a/mm/slub.c b/mm/slub.c index 93de30db95f..f881874843a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -849,11 +849,11 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) local_irq_save(flags); kmemcheck_slab_free(s, x, s->objsize); debug_check_no_locks_freed(x, s->objsize); - if (!(s->flags & SLAB_DEBUG_OBJECTS)) - debug_check_no_obj_freed(x, s->objsize); local_irq_restore(flags); } #endif + if (!(s->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(x, s->objsize); } /* @@ -1604,7 +1604,7 @@ static inline void note_cmpxchg_failure(const char *n, void init_kmem_cache_cpus(struct kmem_cache *s) { -#if defined(CONFIG_CMPXCHG_LOCAL) && defined(CONFIG_PREEMPT) +#ifdef CONFIG_CMPXCHG_LOCAL int cpu; for_each_possible_cpu(cpu) |