From f77d390c9779c496aa5b99ec832996fb76bb1d13 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 2 Feb 2009 16:55:46 +1100 Subject: drm: Split drm_map and drm_local_map Once upon a time, the DRM made the distinction between the drm_map data structure exchanged with user space and the drm_local_map used in the kernel. For some reasons, while the BSD port still has that "feature", the linux part abused drm_map for kernel internal usage as the local map only existed as a typedef of the struct drm_map. This patch fixes it by declaring struct drm_local_map separately (though its content is currently identical to the userspace variant), and changing the kernel code to only use that, except when it's a user<->kernel interface (ie. ioctl). This allows subsequent changes to the in-kernel format I've also replaced the use of drm_local_map_t with struct drm_local_map in a couple of places. Mostly by accident but they are the same (the former is a typedef of the later) and I have some remote plans and half finished patch to completely kill the drm_local_map_t typedef so I left those bits in. Signed-off-by: Benjamin Herrenschmidt Acked-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 37427e4016c..592b24efeb4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -627,7 +627,7 @@ i915_gem_create_mmap_offset(struct drm_gem_object *obj) struct drm_gem_mm *mm = dev->mm_private; struct drm_i915_gem_object *obj_priv = obj->driver_private; struct drm_map_list *list; - struct drm_map *map; + struct drm_local_map *map; int ret = 0; /* Set the object up for mmap'ing */ -- cgit v1.2.3-70-g09d2 From 995e37cafb90f104395e015a9836cc459df1fc39 Mon Sep 17 00:00:00 2001 From: "Owain G. Ainsworth" Date: Fri, 20 Feb 2009 08:30:19 +0000 Subject: i915/drm: Remove two redundant agp_chipset_flushes agp_chipset_flush() is for flushing the intel GMCH write cache via the IFP, these two uses are for when we're getting the object into the cpu READ domain, and thus should not be needed. This confused me when I was getting my head around the code. With thanks to airlied for helping me check my mental picture of how the flushes and clflushes are supposed to be used. Signed-off-by: Owain G. Ainsworth Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 592b24efeb4..8d5ec5fd525 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1913,7 +1913,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) { - struct drm_device *dev = obj->dev; int ret; i915_gem_object_flush_gpu_write_domain(obj); @@ -1932,7 +1931,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) /* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { i915_gem_clflush_object(obj); - drm_agp_chipset_flush(dev); obj->read_domains |= I915_GEM_DOMAIN_CPU; } @@ -2144,7 +2142,6 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) { - struct drm_device *dev = obj->dev; struct drm_i915_gem_object *obj_priv = obj->driver_private; if (!obj_priv->page_cpu_valid) @@ -2160,7 +2157,6 @@ i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) continue; drm_clflush_pages(obj_priv->page_list + i, 1); } - drm_agp_chipset_flush(dev); } /* Free the page_cpu_valid mappings which are now stale, whether -- cgit v1.2.3-70-g09d2 From 3de09aa3b38910d366f4710ffdf430c9d387d1a3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 9 Mar 2009 09:42:23 -0700 Subject: drm/i915: Fix lock order reversal in GTT pwrite path. Since the pagefault path determines that the lock order we use has to be mmap_sem -> struct_mutex, we can't allow page faults to occur while the struct_mutex is held. To fix this in pwrite, we first try optimistically to see if we can copy from user without faulting. If it fails, fall back to using get_user_pages to pin the user's memory, and map those pages atomically when copying it to the GPU. Signed-off-by: Eric Anholt Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_gem.c | 166 +++++++++++++++++++++++++++++++++------- 1 file changed, 139 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 37427e4016c..35f8c7bd0d3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -223,29 +223,34 @@ fast_user_write(struct io_mapping *mapping, */ static inline int -slow_user_write(struct io_mapping *mapping, - loff_t page_base, int page_offset, - char __user *user_data, - int length) +slow_kernel_write(struct io_mapping *mapping, + loff_t gtt_base, int gtt_offset, + struct page *user_page, int user_offset, + int length) { - char __iomem *vaddr; + char *src_vaddr, *dst_vaddr; unsigned long unwritten; - vaddr = io_mapping_map_wc(mapping, page_base); - if (vaddr == NULL) - return -EFAULT; - unwritten = __copy_from_user(vaddr + page_offset, - user_data, length); - io_mapping_unmap(vaddr); + dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base); + src_vaddr = kmap_atomic(user_page, KM_USER1); + unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset, + src_vaddr + user_offset, + length); + kunmap_atomic(src_vaddr, KM_USER1); + io_mapping_unmap_atomic(dst_vaddr); if (unwritten) return -EFAULT; return 0; } +/** + * This is the fast pwrite path, where we copy the data directly from the + * user into the GTT, uncached. + */ static int -i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, - struct drm_i915_gem_pwrite *args, - struct drm_file *file_priv) +i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, + struct drm_i915_gem_pwrite *args, + struct drm_file *file_priv) { struct drm_i915_gem_object *obj_priv = obj->driver_private; drm_i915_private_t *dev_priv = dev->dev_private; @@ -273,7 +278,6 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, obj_priv = obj->driver_private; offset = obj_priv->gtt_offset + args->offset; - obj_priv->dirty = 1; while (remain > 0) { /* Operation in this page @@ -292,16 +296,11 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, page_offset, user_data, page_length); /* If we get a fault while copying data, then (presumably) our - * source page isn't available. In this case, use the - * non-atomic function + * source page isn't available. Return the error and we'll + * retry in the slow path. */ - if (ret) { - ret = slow_user_write (dev_priv->mm.gtt_mapping, - page_base, page_offset, - user_data, page_length); - if (ret) - goto fail; - } + if (ret) + goto fail; remain -= page_length; user_data += page_length; @@ -315,6 +314,115 @@ fail: return ret; } +/** + * This is the fallback GTT pwrite path, which uses get_user_pages to pin + * the memory and maps it using kmap_atomic for copying. + * + * This code resulted in x11perf -rgb10text consuming about 10% more CPU + * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). + */ +static int +i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, + struct drm_i915_gem_pwrite *args, + struct drm_file *file_priv) +{ + struct drm_i915_gem_object *obj_priv = obj->driver_private; + drm_i915_private_t *dev_priv = dev->dev_private; + ssize_t remain; + loff_t gtt_page_base, offset; + loff_t first_data_page, last_data_page, num_pages; + loff_t pinned_pages, i; + struct page **user_pages; + struct mm_struct *mm = current->mm; + int gtt_page_offset, data_page_offset, data_page_index, page_length; + int ret; + uint64_t data_ptr = args->data_ptr; + + remain = args->size; + + /* Pin the user pages containing the data. We can't fault while + * holding the struct mutex, and all of the pwrite implementations + * want to hold it while dereferencing the user data. + */ + first_data_page = data_ptr / PAGE_SIZE; + last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; + num_pages = last_data_page - first_data_page + 1; + + user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + if (user_pages == NULL) + return -ENOMEM; + + down_read(&mm->mmap_sem); + pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, + num_pages, 0, 0, user_pages, NULL); + up_read(&mm->mmap_sem); + if (pinned_pages < num_pages) { + ret = -EFAULT; + goto out_unpin_pages; + } + + mutex_lock(&dev->struct_mutex); + ret = i915_gem_object_pin(obj, 0); + if (ret) + goto out_unlock; + + ret = i915_gem_object_set_to_gtt_domain(obj, 1); + if (ret) + goto out_unpin_object; + + obj_priv = obj->driver_private; + offset = obj_priv->gtt_offset + args->offset; + + while (remain > 0) { + /* Operation in this page + * + * gtt_page_base = page offset within aperture + * gtt_page_offset = offset within page in aperture + * data_page_index = page number in get_user_pages return + * data_page_offset = offset with data_page_index page. + * page_length = bytes to copy for this page + */ + gtt_page_base = offset & PAGE_MASK; + gtt_page_offset = offset & ~PAGE_MASK; + data_page_index = data_ptr / PAGE_SIZE - first_data_page; + data_page_offset = data_ptr & ~PAGE_MASK; + + page_length = remain; + if ((gtt_page_offset + page_length) > PAGE_SIZE) + page_length = PAGE_SIZE - gtt_page_offset; + if ((data_page_offset + page_length) > PAGE_SIZE) + page_length = PAGE_SIZE - data_page_offset; + + ret = slow_kernel_write(dev_priv->mm.gtt_mapping, + gtt_page_base, gtt_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length); + + /* If we get a fault while copying data, then (presumably) our + * source page isn't available. Return the error and we'll + * retry in the slow path. + */ + if (ret) + goto out_unpin_object; + + remain -= page_length; + offset += page_length; + data_ptr += page_length; + } + +out_unpin_object: + i915_gem_object_unpin(obj); +out_unlock: + mutex_unlock(&dev->struct_mutex); +out_unpin_pages: + for (i = 0; i < pinned_pages; i++) + page_cache_release(user_pages[i]); + kfree(user_pages); + + return ret; +} + static int i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, struct drm_i915_gem_pwrite *args, @@ -388,9 +496,13 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (obj_priv->phys_obj) ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); else if (obj_priv->tiling_mode == I915_TILING_NONE && - dev->gtt_total != 0) - ret = i915_gem_gtt_pwrite(dev, obj, args, file_priv); - else + dev->gtt_total != 0) { + ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv); + if (ret == -EFAULT) { + ret = i915_gem_gtt_pwrite_slow(dev, obj, args, + file_priv); + } + } else ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv); #if WATCH_PWRITE -- cgit v1.2.3-70-g09d2 From 856fa1988ea483fc2dab84a16681dcfde821b740 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 19 Mar 2009 14:10:50 -0700 Subject: drm/i915: Make GEM object's page lists refcounted instead of get/free. We've wanted this for a few consumers that touch the pages directly (such as the following commit), which have been doing the refcounting outside of get/put pages. Signed-off-by: Eric Anholt Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem.c | 70 +++++++++++++++++++++-------------------- 2 files changed, 38 insertions(+), 35 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d6cc9861e0a..75e33844146 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -404,7 +404,8 @@ struct drm_i915_gem_object { /** AGP memory structure for our GTT binding. */ DRM_AGP_MEM *agp_mem; - struct page **page_list; + struct page **pages; + int pages_refcount; /** * Current offset of the object in GTT space. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 35f8c7bd0d3..b998d659fd9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -43,8 +43,8 @@ static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, uint64_t offset, uint64_t size); static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); -static int i915_gem_object_get_page_list(struct drm_gem_object *obj); -static void i915_gem_object_free_page_list(struct drm_gem_object *obj); +static int i915_gem_object_get_pages(struct drm_gem_object *obj); +static void i915_gem_object_put_pages(struct drm_gem_object *obj); static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment); @@ -928,29 +928,30 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, } static void -i915_gem_object_free_page_list(struct drm_gem_object *obj) +i915_gem_object_put_pages(struct drm_gem_object *obj) { struct drm_i915_gem_object *obj_priv = obj->driver_private; int page_count = obj->size / PAGE_SIZE; int i; - if (obj_priv->page_list == NULL) - return; + BUG_ON(obj_priv->pages_refcount == 0); + if (--obj_priv->pages_refcount != 0) + return; for (i = 0; i < page_count; i++) - if (obj_priv->page_list[i] != NULL) { + if (obj_priv->pages[i] != NULL) { if (obj_priv->dirty) - set_page_dirty(obj_priv->page_list[i]); - mark_page_accessed(obj_priv->page_list[i]); - page_cache_release(obj_priv->page_list[i]); + set_page_dirty(obj_priv->pages[i]); + mark_page_accessed(obj_priv->pages[i]); + page_cache_release(obj_priv->pages[i]); } obj_priv->dirty = 0; - drm_free(obj_priv->page_list, + drm_free(obj_priv->pages, page_count * sizeof(struct page *), DRM_MEM_DRIVER); - obj_priv->page_list = NULL; + obj_priv->pages = NULL; } static void @@ -1402,7 +1403,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj) if (obj_priv->fence_reg != I915_FENCE_REG_NONE) i915_gem_clear_fence_reg(obj); - i915_gem_object_free_page_list(obj); + i915_gem_object_put_pages(obj); if (obj_priv->gtt_space) { atomic_dec(&dev->gtt_count); @@ -1521,7 +1522,7 @@ i915_gem_evict_everything(struct drm_device *dev) } static int -i915_gem_object_get_page_list(struct drm_gem_object *obj) +i915_gem_object_get_pages(struct drm_gem_object *obj) { struct drm_i915_gem_object *obj_priv = obj->driver_private; int page_count, i; @@ -1530,18 +1531,19 @@ i915_gem_object_get_page_list(struct drm_gem_object *obj) struct page *page; int ret; - if (obj_priv->page_list) + if (obj_priv->pages_refcount++ != 0) return 0; /* Get the list of pages out of our struct file. They'll be pinned * at this point until we release them. */ page_count = obj->size / PAGE_SIZE; - BUG_ON(obj_priv->page_list != NULL); - obj_priv->page_list = drm_calloc(page_count, sizeof(struct page *), - DRM_MEM_DRIVER); - if (obj_priv->page_list == NULL) { + BUG_ON(obj_priv->pages != NULL); + obj_priv->pages = drm_calloc(page_count, sizeof(struct page *), + DRM_MEM_DRIVER); + if (obj_priv->pages == NULL) { DRM_ERROR("Faled to allocate page list\n"); + obj_priv->pages_refcount--; return -ENOMEM; } @@ -1552,10 +1554,10 @@ i915_gem_object_get_page_list(struct drm_gem_object *obj) if (IS_ERR(page)) { ret = PTR_ERR(page); DRM_ERROR("read_mapping_page failed: %d\n", ret); - i915_gem_object_free_page_list(obj); + i915_gem_object_put_pages(obj); return ret; } - obj_priv->page_list[i] = page; + obj_priv->pages[i] = page; } return 0; } @@ -1878,7 +1880,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) DRM_INFO("Binding object of size %d at 0x%08x\n", obj->size, obj_priv->gtt_offset); #endif - ret = i915_gem_object_get_page_list(obj); + ret = i915_gem_object_get_pages(obj); if (ret) { drm_mm_put_block(obj_priv->gtt_space); obj_priv->gtt_space = NULL; @@ -1890,12 +1892,12 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) * into the GTT. */ obj_priv->agp_mem = drm_agp_bind_pages(dev, - obj_priv->page_list, + obj_priv->pages, page_count, obj_priv->gtt_offset, obj_priv->agp_type); if (obj_priv->agp_mem == NULL) { - i915_gem_object_free_page_list(obj); + i915_gem_object_put_pages(obj); drm_mm_put_block(obj_priv->gtt_space); obj_priv->gtt_space = NULL; return -ENOMEM; @@ -1922,10 +1924,10 @@ i915_gem_clflush_object(struct drm_gem_object *obj) * to GPU, and we can ignore the cache flush because it'll happen * again at bind time. */ - if (obj_priv->page_list == NULL) + if (obj_priv->pages == NULL) return; - drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE); + drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); } /** Flushes any GPU write domain for the object if it's dirty. */ @@ -2270,7 +2272,7 @@ i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { if (obj_priv->page_cpu_valid[i]) continue; - drm_clflush_pages(obj_priv->page_list + i, 1); + drm_clflush_pages(obj_priv->pages + i, 1); } drm_agp_chipset_flush(dev); } @@ -2336,7 +2338,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, if (obj_priv->page_cpu_valid[i]) continue; - drm_clflush_pages(obj_priv->page_list + i, 1); + drm_clflush_pages(obj_priv->pages + i, 1); obj_priv->page_cpu_valid[i] = 1; } @@ -3304,7 +3306,7 @@ i915_gem_init_hws(struct drm_device *dev) dev_priv->status_gfx_addr = obj_priv->gtt_offset; - dev_priv->hw_status_page = kmap(obj_priv->page_list[0]); + dev_priv->hw_status_page = kmap(obj_priv->pages[0]); if (dev_priv->hw_status_page == NULL) { DRM_ERROR("Failed to map status page.\n"); memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); @@ -3334,7 +3336,7 @@ i915_gem_cleanup_hws(struct drm_device *dev) obj = dev_priv->hws_obj; obj_priv = obj->driver_private; - kunmap(obj_priv->page_list[0]); + kunmap(obj_priv->pages[0]); i915_gem_object_unpin(obj); drm_gem_object_unreference(obj); dev_priv->hws_obj = NULL; @@ -3637,20 +3639,20 @@ void i915_gem_detach_phys_object(struct drm_device *dev, if (!obj_priv->phys_obj) return; - ret = i915_gem_object_get_page_list(obj); + ret = i915_gem_object_get_pages(obj); if (ret) goto out; page_count = obj->size / PAGE_SIZE; for (i = 0; i < page_count; i++) { - char *dst = kmap_atomic(obj_priv->page_list[i], KM_USER0); + char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0); char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); memcpy(dst, src, PAGE_SIZE); kunmap_atomic(dst, KM_USER0); } - drm_clflush_pages(obj_priv->page_list, page_count); + drm_clflush_pages(obj_priv->pages, page_count); drm_agp_chipset_flush(dev); out: obj_priv->phys_obj->cur_obj = NULL; @@ -3693,7 +3695,7 @@ i915_gem_attach_phys_object(struct drm_device *dev, obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1]; obj_priv->phys_obj->cur_obj = obj; - ret = i915_gem_object_get_page_list(obj); + ret = i915_gem_object_get_pages(obj); if (ret) { DRM_ERROR("failed to get page list\n"); goto out; @@ -3702,7 +3704,7 @@ i915_gem_attach_phys_object(struct drm_device *dev, page_count = obj->size / PAGE_SIZE; for (i = 0; i < page_count; i++) { - char *src = kmap_atomic(obj_priv->page_list[i], KM_USER0); + char *src = kmap_atomic(obj_priv->pages[i], KM_USER0); char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); memcpy(dst, src, PAGE_SIZE); -- cgit v1.2.3-70-g09d2 From 40123c1f8dd920dcff7a42cde5b351d7d0b0422e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 9 Mar 2009 13:42:30 -0700 Subject: drm/i915: Fix lock order reversal in shmem pwrite path. Like the GTT pwrite path fix, this uses an optimistic path and a fallback to get_user_pages. Note that this means we have to stop using vfs_write and roll it ourselves. Signed-off-by: Eric Anholt Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_gem.c | 225 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 205 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b998d659fd9..bdc7326052d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -136,6 +136,33 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, return 0; } +static inline int +slow_shmem_copy(struct page *dst_page, + int dst_offset, + struct page *src_page, + int src_offset, + int length) +{ + char *dst_vaddr, *src_vaddr; + + dst_vaddr = kmap_atomic(dst_page, KM_USER0); + if (dst_vaddr == NULL) + return -ENOMEM; + + src_vaddr = kmap_atomic(src_page, KM_USER1); + if (src_vaddr == NULL) { + kunmap_atomic(dst_vaddr, KM_USER0); + return -ENOMEM; + } + + memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); + + kunmap_atomic(src_vaddr, KM_USER1); + kunmap_atomic(dst_vaddr, KM_USER0); + + return 0; +} + /** * Reads data from the object referenced by handle. * @@ -243,6 +270,23 @@ slow_kernel_write(struct io_mapping *mapping, return 0; } +static inline int +fast_shmem_write(struct page **pages, + loff_t page_base, int page_offset, + char __user *data, + int length) +{ + char __iomem *vaddr; + + vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); + if (vaddr == NULL) + return -ENOMEM; + __copy_from_user_inatomic(vaddr + page_offset, data, length); + kunmap_atomic(vaddr, KM_USER0); + + return 0; +} + /** * This is the fast pwrite path, where we copy the data directly from the * user into the GTT, uncached. @@ -423,39 +467,175 @@ out_unpin_pages: return ret; } +/** + * This is the fast shmem pwrite path, which attempts to directly + * copy_from_user into the kmapped pages backing the object. + */ static int -i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, - struct drm_i915_gem_pwrite *args, - struct drm_file *file_priv) +i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, + struct drm_i915_gem_pwrite *args, + struct drm_file *file_priv) { + struct drm_i915_gem_object *obj_priv = obj->driver_private; + ssize_t remain; + loff_t offset, page_base; + char __user *user_data; + int page_offset, page_length; int ret; - loff_t offset; - ssize_t written; + + user_data = (char __user *) (uintptr_t) args->data_ptr; + remain = args->size; mutex_lock(&dev->struct_mutex); + ret = i915_gem_object_get_pages(obj); + if (ret != 0) + goto fail_unlock; + ret = i915_gem_object_set_to_cpu_domain(obj, 1); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return ret; + if (ret != 0) + goto fail_put_pages; + + obj_priv = obj->driver_private; + offset = args->offset; + obj_priv->dirty = 1; + + while (remain > 0) { + /* Operation in this page + * + * page_base = page offset within aperture + * page_offset = offset within page + * page_length = bytes to copy for this page + */ + page_base = (offset & ~(PAGE_SIZE-1)); + page_offset = offset & (PAGE_SIZE-1); + page_length = remain; + if ((page_offset + remain) > PAGE_SIZE) + page_length = PAGE_SIZE - page_offset; + + ret = fast_shmem_write(obj_priv->pages, + page_base, page_offset, + user_data, page_length); + if (ret) + goto fail_put_pages; + + remain -= page_length; + user_data += page_length; + offset += page_length; } +fail_put_pages: + i915_gem_object_put_pages(obj); +fail_unlock: + mutex_unlock(&dev->struct_mutex); + + return ret; +} + +/** + * This is the fallback shmem pwrite path, which uses get_user_pages to pin + * the memory and maps it using kmap_atomic for copying. + * + * This avoids taking mmap_sem for faulting on the user's address while the + * struct_mutex is held. + */ +static int +i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, + struct drm_i915_gem_pwrite *args, + struct drm_file *file_priv) +{ + struct drm_i915_gem_object *obj_priv = obj->driver_private; + struct mm_struct *mm = current->mm; + struct page **user_pages; + ssize_t remain; + loff_t offset, pinned_pages, i; + loff_t first_data_page, last_data_page, num_pages; + int shmem_page_index, shmem_page_offset; + int data_page_index, data_page_offset; + int page_length; + int ret; + uint64_t data_ptr = args->data_ptr; + + remain = args->size; + + /* Pin the user pages containing the data. We can't fault while + * holding the struct mutex, and all of the pwrite implementations + * want to hold it while dereferencing the user data. + */ + first_data_page = data_ptr / PAGE_SIZE; + last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; + num_pages = last_data_page - first_data_page + 1; + + user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + if (user_pages == NULL) + return -ENOMEM; + + down_read(&mm->mmap_sem); + pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, + num_pages, 0, 0, user_pages, NULL); + up_read(&mm->mmap_sem); + if (pinned_pages < num_pages) { + ret = -EFAULT; + goto fail_put_user_pages; + } + + mutex_lock(&dev->struct_mutex); + + ret = i915_gem_object_get_pages(obj); + if (ret != 0) + goto fail_unlock; + + ret = i915_gem_object_set_to_cpu_domain(obj, 1); + if (ret != 0) + goto fail_put_pages; + + obj_priv = obj->driver_private; offset = args->offset; + obj_priv->dirty = 1; - written = vfs_write(obj->filp, - (char __user *)(uintptr_t) args->data_ptr, - args->size, &offset); - if (written != args->size) { - mutex_unlock(&dev->struct_mutex); - if (written < 0) - return written; - else - return -EINVAL; + while (remain > 0) { + /* Operation in this page + * + * shmem_page_index = page number within shmem file + * shmem_page_offset = offset within page in shmem file + * data_page_index = page number in get_user_pages return + * data_page_offset = offset with data_page_index page. + * page_length = bytes to copy for this page + */ + shmem_page_index = offset / PAGE_SIZE; + shmem_page_offset = offset & ~PAGE_MASK; + data_page_index = data_ptr / PAGE_SIZE - first_data_page; + data_page_offset = data_ptr & ~PAGE_MASK; + + page_length = remain; + if ((shmem_page_offset + page_length) > PAGE_SIZE) + page_length = PAGE_SIZE - shmem_page_offset; + if ((data_page_offset + page_length) > PAGE_SIZE) + page_length = PAGE_SIZE - data_page_offset; + + ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length); + if (ret) + goto fail_put_pages; + + remain -= page_length; + data_ptr += page_length; + offset += page_length; } +fail_put_pages: + i915_gem_object_put_pages(obj); +fail_unlock: mutex_unlock(&dev->struct_mutex); +fail_put_user_pages: + for (i = 0; i < pinned_pages; i++) + page_cache_release(user_pages[i]); + kfree(user_pages); - return 0; + return ret; } /** @@ -502,8 +682,13 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file_priv); } - } else - ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv); + } else { + ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); + if (ret == -EFAULT) { + ret = i915_gem_shmem_pwrite_slow(dev, obj, args, + file_priv); + } + } #if WATCH_PWRITE if (ret) -- cgit v1.2.3-70-g09d2 From eb01459fbbccb4ca0b879cbfc97e33ac6eabf975 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 10 Mar 2009 11:44:52 -0700 Subject: drm/i915: Fix lock order reversal in shmem pread path. Signed-off-by: Eric Anholt Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_gem.c | 221 +++++++++++++++++++++++++++++++++++----- 1 file changed, 195 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bdc7326052d..010af908bdb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -136,6 +136,24 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, return 0; } +static inline int +fast_shmem_read(struct page **pages, + loff_t page_base, int page_offset, + char __user *data, + int length) +{ + char __iomem *vaddr; + int ret; + + vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); + if (vaddr == NULL) + return -ENOMEM; + ret = __copy_to_user_inatomic(data, vaddr + page_offset, length); + kunmap_atomic(vaddr, KM_USER0); + + return ret; +} + static inline int slow_shmem_copy(struct page *dst_page, int dst_offset, @@ -163,6 +181,179 @@ slow_shmem_copy(struct page *dst_page, return 0; } +/** + * This is the fast shmem pread path, which attempts to copy_from_user directly + * from the backing pages of the object to the user's address space. On a + * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). + */ +static int +i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj, + struct drm_i915_gem_pread *args, + struct drm_file *file_priv) +{ + struct drm_i915_gem_object *obj_priv = obj->driver_private; + ssize_t remain; + loff_t offset, page_base; + char __user *user_data; + int page_offset, page_length; + int ret; + + user_data = (char __user *) (uintptr_t) args->data_ptr; + remain = args->size; + + mutex_lock(&dev->struct_mutex); + + ret = i915_gem_object_get_pages(obj); + if (ret != 0) + goto fail_unlock; + + ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, + args->size); + if (ret != 0) + goto fail_put_pages; + + obj_priv = obj->driver_private; + offset = args->offset; + + while (remain > 0) { + /* Operation in this page + * + * page_base = page offset within aperture + * page_offset = offset within page + * page_length = bytes to copy for this page + */ + page_base = (offset & ~(PAGE_SIZE-1)); + page_offset = offset & (PAGE_SIZE-1); + page_length = remain; + if ((page_offset + remain) > PAGE_SIZE) + page_length = PAGE_SIZE - page_offset; + + ret = fast_shmem_read(obj_priv->pages, + page_base, page_offset, + user_data, page_length); + if (ret) + goto fail_put_pages; + + remain -= page_length; + user_data += page_length; + offset += page_length; + } + +fail_put_pages: + i915_gem_object_put_pages(obj); +fail_unlock: + mutex_unlock(&dev->struct_mutex); + + return ret; +} + +/** + * This is the fallback shmem pread path, which allocates temporary storage + * in kernel space to copy_to_user into outside of the struct_mutex, so we + * can copy out of the object's backing pages while holding the struct mutex + * and not take page faults. + */ +static int +i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, + struct drm_i915_gem_pread *args, + struct drm_file *file_priv) +{ + struct drm_i915_gem_object *obj_priv = obj->driver_private; + struct mm_struct *mm = current->mm; + struct page **user_pages; + ssize_t remain; + loff_t offset, pinned_pages, i; + loff_t first_data_page, last_data_page, num_pages; + int shmem_page_index, shmem_page_offset; + int data_page_index, data_page_offset; + int page_length; + int ret; + uint64_t data_ptr = args->data_ptr; + + remain = args->size; + + /* Pin the user pages containing the data. We can't fault while + * holding the struct mutex, yet we want to hold it while + * dereferencing the user data. + */ + first_data_page = data_ptr / PAGE_SIZE; + last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; + num_pages = last_data_page - first_data_page + 1; + + user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + if (user_pages == NULL) + return -ENOMEM; + + down_read(&mm->mmap_sem); + pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, + num_pages, 0, 0, user_pages, NULL); + up_read(&mm->mmap_sem); + if (pinned_pages < num_pages) { + ret = -EFAULT; + goto fail_put_user_pages; + } + + mutex_lock(&dev->struct_mutex); + + ret = i915_gem_object_get_pages(obj); + if (ret != 0) + goto fail_unlock; + + ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, + args->size); + if (ret != 0) + goto fail_put_pages; + + obj_priv = obj->driver_private; + offset = args->offset; + + while (remain > 0) { + /* Operation in this page + * + * shmem_page_index = page number within shmem file + * shmem_page_offset = offset within page in shmem file + * data_page_index = page number in get_user_pages return + * data_page_offset = offset with data_page_index page. + * page_length = bytes to copy for this page + */ + shmem_page_index = offset / PAGE_SIZE; + shmem_page_offset = offset & ~PAGE_MASK; + data_page_index = data_ptr / PAGE_SIZE - first_data_page; + data_page_offset = data_ptr & ~PAGE_MASK; + + page_length = remain; + if ((shmem_page_offset + page_length) > PAGE_SIZE) + page_length = PAGE_SIZE - shmem_page_offset; + if ((data_page_offset + page_length) > PAGE_SIZE) + page_length = PAGE_SIZE - data_page_offset; + + ret = slow_shmem_copy(user_pages[data_page_index], + data_page_offset, + obj_priv->pages[shmem_page_index], + shmem_page_offset, + page_length); + if (ret) + goto fail_put_pages; + + remain -= page_length; + data_ptr += page_length; + offset += page_length; + } + +fail_put_pages: + i915_gem_object_put_pages(obj); +fail_unlock: + mutex_unlock(&dev->struct_mutex); +fail_put_user_pages: + for (i = 0; i < pinned_pages; i++) { + SetPageDirty(user_pages[i]); + page_cache_release(user_pages[i]); + } + kfree(user_pages); + + return ret; +} + /** * Reads data from the object referenced by handle. * @@ -175,8 +366,6 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_pread *args = data; struct drm_gem_object *obj; struct drm_i915_gem_object *obj_priv; - ssize_t read; - loff_t offset; int ret; obj = drm_gem_object_lookup(dev, file_priv, args->handle); @@ -194,33 +383,13 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - mutex_lock(&dev->struct_mutex); - - ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, - args->size); - if (ret != 0) { - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - return ret; - } - - offset = args->offset; - - read = vfs_read(obj->filp, (char __user *)(uintptr_t)args->data_ptr, - args->size, &offset); - if (read != args->size) { - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - if (read < 0) - return read; - else - return -EINVAL; - } + ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); + if (ret != 0) + ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - return 0; + return ret; } /* This is the fast write path which cannot handle -- cgit v1.2.3-70-g09d2 From 201361a54ed187d8595a283e3a4ddb213bc8323b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 11 Mar 2009 12:30:04 -0700 Subject: drm/i915: Fix lock order reversal with cliprects and cmdbuf in non-DRI2 paths. This introduces allocation in the batch submission path that wasn't there previously, but these are compatibility paths so we care about simplicity more than performance. kernel.org bug #12419. Signed-off-by: Eric Anholt Reviewed-by: Keith Packard Acked-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_dma.c | 107 +++++++++++++++++++++++++++------------- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 27 ++++++++-- 3 files changed, 97 insertions(+), 39 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 6d21b9e48b8..ae83fe0ab37 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -356,7 +356,7 @@ static int validate_cmd(int cmd) return ret; } -static int i915_emit_cmds(struct drm_device * dev, int __user * buffer, int dwords) +static int i915_emit_cmds(struct drm_device * dev, int *buffer, int dwords) { drm_i915_private_t *dev_priv = dev->dev_private; int i; @@ -370,8 +370,7 @@ static int i915_emit_cmds(struct drm_device * dev, int __user * buffer, int dwor for (i = 0; i < dwords;) { int cmd, sz; - if (DRM_COPY_FROM_USER_UNCHECKED(&cmd, &buffer[i], sizeof(cmd))) - return -EINVAL; + cmd = buffer[i]; if ((sz = validate_cmd(cmd)) == 0 || i + sz > dwords) return -EINVAL; @@ -379,11 +378,7 @@ static int i915_emit_cmds(struct drm_device * dev, int __user * buffer, int dwor OUT_RING(cmd); while (++i, --sz) { - if (DRM_COPY_FROM_USER_UNCHECKED(&cmd, &buffer[i], - sizeof(cmd))) { - return -EINVAL; - } - OUT_RING(cmd); + OUT_RING(buffer[i]); } } @@ -397,17 +392,13 @@ static int i915_emit_cmds(struct drm_device * dev, int __user * buffer, int dwor int i915_emit_box(struct drm_device *dev, - struct drm_clip_rect __user *boxes, + struct drm_clip_rect *boxes, int i, int DR1, int DR4) { drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_clip_rect box; + struct drm_clip_rect box = boxes[i]; RING_LOCALS; - if (DRM_COPY_FROM_USER_UNCHECKED(&box, &boxes[i], sizeof(box))) { - return -EFAULT; - } - if (box.y2 <= box.y1 || box.x2 <= box.x1 || box.y2 <= 0 || box.x2 <= 0) { DRM_ERROR("Bad box %d,%d..%d,%d\n", box.x1, box.y1, box.x2, box.y2); @@ -460,7 +451,9 @@ static void i915_emit_breadcrumb(struct drm_device *dev) } static int i915_dispatch_cmdbuffer(struct drm_device * dev, - drm_i915_cmdbuffer_t * cmd) + drm_i915_cmdbuffer_t *cmd, + struct drm_clip_rect *cliprects, + void *cmdbuf) { int nbox = cmd->num_cliprects; int i = 0, count, ret; @@ -476,13 +469,13 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev, for (i = 0; i < count; i++) { if (i < nbox) { - ret = i915_emit_box(dev, cmd->cliprects, i, + ret = i915_emit_box(dev, cliprects, i, cmd->DR1, cmd->DR4); if (ret) return ret; } - ret = i915_emit_cmds(dev, (int __user *)cmd->buf, cmd->sz / 4); + ret = i915_emit_cmds(dev, cmdbuf, cmd->sz / 4); if (ret) return ret; } @@ -492,10 +485,10 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev, } static int i915_dispatch_batchbuffer(struct drm_device * dev, - drm_i915_batchbuffer_t * batch) + drm_i915_batchbuffer_t * batch, + struct drm_clip_rect *cliprects) { drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_clip_rect __user *boxes = batch->cliprects; int nbox = batch->num_cliprects; int i = 0, count; RING_LOCALS; @@ -511,7 +504,7 @@ static int i915_dispatch_batchbuffer(struct drm_device * dev, for (i = 0; i < count; i++) { if (i < nbox) { - int ret = i915_emit_box(dev, boxes, i, + int ret = i915_emit_box(dev, cliprects, i, batch->DR1, batch->DR4); if (ret) return ret; @@ -626,6 +619,7 @@ static int i915_batchbuffer(struct drm_device *dev, void *data, master_priv->sarea_priv; drm_i915_batchbuffer_t *batch = data; int ret; + struct drm_clip_rect *cliprects = NULL; if (!dev_priv->allow_batchbuffer) { DRM_ERROR("Batchbuffer ioctl disabled\n"); @@ -637,17 +631,35 @@ static int i915_batchbuffer(struct drm_device *dev, void *data, RING_LOCK_TEST_WITH_RETURN(dev, file_priv); - if (batch->num_cliprects && DRM_VERIFYAREA_READ(batch->cliprects, - batch->num_cliprects * - sizeof(struct drm_clip_rect))) - return -EFAULT; + if (batch->num_cliprects < 0) + return -EINVAL; + + if (batch->num_cliprects) { + cliprects = drm_calloc(batch->num_cliprects, + sizeof(struct drm_clip_rect), + DRM_MEM_DRIVER); + if (cliprects == NULL) + return -ENOMEM; + + ret = copy_from_user(cliprects, batch->cliprects, + batch->num_cliprects * + sizeof(struct drm_clip_rect)); + if (ret != 0) + goto fail_free; + } mutex_lock(&dev->struct_mutex); - ret = i915_dispatch_batchbuffer(dev, batch); + ret = i915_dispatch_batchbuffer(dev, batch, cliprects); mutex_unlock(&dev->struct_mutex); if (sarea_priv) sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); + +fail_free: + drm_free(cliprects, + batch->num_cliprects * sizeof(struct drm_clip_rect), + DRM_MEM_DRIVER); + return ret; } @@ -659,6 +671,8 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data, drm_i915_sarea_t *sarea_priv = (drm_i915_sarea_t *) master_priv->sarea_priv; drm_i915_cmdbuffer_t *cmdbuf = data; + struct drm_clip_rect *cliprects = NULL; + void *batch_data; int ret; DRM_DEBUG("i915 cmdbuffer, buf %p sz %d cliprects %d\n", @@ -666,25 +680,50 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data, RING_LOCK_TEST_WITH_RETURN(dev, file_priv); - if (cmdbuf->num_cliprects && - DRM_VERIFYAREA_READ(cmdbuf->cliprects, - cmdbuf->num_cliprects * - sizeof(struct drm_clip_rect))) { - DRM_ERROR("Fault accessing cliprects\n"); - return -EFAULT; + if (cmdbuf->num_cliprects < 0) + return -EINVAL; + + batch_data = drm_alloc(cmdbuf->sz, DRM_MEM_DRIVER); + if (batch_data == NULL) + return -ENOMEM; + + ret = copy_from_user(batch_data, cmdbuf->buf, cmdbuf->sz); + if (ret != 0) + goto fail_batch_free; + + if (cmdbuf->num_cliprects) { + cliprects = drm_calloc(cmdbuf->num_cliprects, + sizeof(struct drm_clip_rect), + DRM_MEM_DRIVER); + if (cliprects == NULL) + goto fail_batch_free; + + ret = copy_from_user(cliprects, cmdbuf->cliprects, + cmdbuf->num_cliprects * + sizeof(struct drm_clip_rect)); + if (ret != 0) + goto fail_clip_free; } mutex_lock(&dev->struct_mutex); - ret = i915_dispatch_cmdbuffer(dev, cmdbuf); + ret = i915_dispatch_cmdbuffer(dev, cmdbuf, cliprects, batch_data); mutex_unlock(&dev->struct_mutex); if (ret) { DRM_ERROR("i915_dispatch_cmdbuffer failed\n"); - return ret; + goto fail_batch_free; } if (sarea_priv) sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); - return 0; + +fail_batch_free: + drm_free(batch_data, cmdbuf->sz, DRM_MEM_DRIVER); +fail_clip_free: + drm_free(cliprects, + cmdbuf->num_cliprects * sizeof(struct drm_clip_rect), + DRM_MEM_DRIVER); + + return ret; } static int i915_flip_bufs(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 75e33844146..2c02ce6b2b9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -520,7 +520,7 @@ extern int i915_driver_device_is_agp(struct drm_device * dev); extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); extern int i915_emit_box(struct drm_device *dev, - struct drm_clip_rect __user *boxes, + struct drm_clip_rect *boxes, int i, int DR1, int DR4); /* i915_irq.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 010af908bdb..2bda15197cc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2891,11 +2891,10 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, static int i915_dispatch_gem_execbuffer(struct drm_device *dev, struct drm_i915_gem_execbuffer *exec, + struct drm_clip_rect *cliprects, uint64_t exec_offset) { drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_clip_rect __user *boxes = (struct drm_clip_rect __user *) - (uintptr_t) exec->cliprects_ptr; int nbox = exec->num_cliprects; int i = 0, count; uint32_t exec_start, exec_len; @@ -2916,7 +2915,7 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev, for (i = 0; i < count; i++) { if (i < nbox) { - int ret = i915_emit_box(dev, boxes, i, + int ret = i915_emit_box(dev, cliprects, i, exec->DR1, exec->DR4); if (ret) return ret; @@ -2983,6 +2982,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_gem_object **object_list = NULL; struct drm_gem_object *batch_obj; struct drm_i915_gem_object *obj_priv; + struct drm_clip_rect *cliprects = NULL; int ret, i, pinned = 0; uint64_t exec_offset; uint32_t seqno, flush_domains; @@ -3019,6 +3019,23 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } + if (args->num_cliprects != 0) { + cliprects = drm_calloc(args->num_cliprects, sizeof(*cliprects), + DRM_MEM_DRIVER); + if (cliprects == NULL) + goto pre_mutex_err; + + ret = copy_from_user(cliprects, + (struct drm_clip_rect __user *) + (uintptr_t) args->cliprects_ptr, + sizeof(*cliprects) * args->num_cliprects); + if (ret != 0) { + DRM_ERROR("copy %d cliprects failed: %d\n", + args->num_cliprects, ret); + goto pre_mutex_err; + } + } + mutex_lock(&dev->struct_mutex); i915_verify_inactive(dev, __FILE__, __LINE__); @@ -3155,7 +3172,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, #endif /* Exec the batchbuffer */ - ret = i915_dispatch_gem_execbuffer(dev, args, exec_offset); + ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset); if (ret) { DRM_ERROR("dispatch failed %d\n", ret); goto err; @@ -3224,6 +3241,8 @@ pre_mutex_err: DRM_MEM_DRIVER); drm_free(exec_list, sizeof(*exec_list) * args->buffer_count, DRM_MEM_DRIVER); + drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects, + DRM_MEM_DRIVER); return ret; } -- cgit v1.2.3-70-g09d2 From 40a5f0decdf050785ebd62b36ad48c869ee4b384 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 12 Mar 2009 11:23:52 -0700 Subject: drm/i915: Fix lock order reversal in GEM relocation entry copying. Signed-off-by: Eric Anholt Reviewed-by: Keith Packard --- drivers/gpu/drm/i915/i915_gem.c | 187 ++++++++++++++++++++++++++++------------ 1 file changed, 133 insertions(+), 54 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2bda15197cc..f135c903305 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2713,12 +2713,11 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, static int i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, struct drm_file *file_priv, - struct drm_i915_gem_exec_object *entry) + struct drm_i915_gem_exec_object *entry, + struct drm_i915_gem_relocation_entry *relocs) { struct drm_device *dev = obj->dev; drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_i915_gem_relocation_entry reloc; - struct drm_i915_gem_relocation_entry __user *relocs; struct drm_i915_gem_object *obj_priv = obj->driver_private; int i, ret; void __iomem *reloc_page; @@ -2730,25 +2729,18 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, entry->offset = obj_priv->gtt_offset; - relocs = (struct drm_i915_gem_relocation_entry __user *) - (uintptr_t) entry->relocs_ptr; /* Apply the relocations, using the GTT aperture to avoid cache * flushing requirements. */ for (i = 0; i < entry->relocation_count; i++) { + struct drm_i915_gem_relocation_entry *reloc= &relocs[i]; struct drm_gem_object *target_obj; struct drm_i915_gem_object *target_obj_priv; uint32_t reloc_val, reloc_offset; uint32_t __iomem *reloc_entry; - ret = copy_from_user(&reloc, relocs + i, sizeof(reloc)); - if (ret != 0) { - i915_gem_object_unpin(obj); - return ret; - } - target_obj = drm_gem_object_lookup(obj->dev, file_priv, - reloc.target_handle); + reloc->target_handle); if (target_obj == NULL) { i915_gem_object_unpin(obj); return -EBADF; @@ -2760,53 +2752,53 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, */ if (target_obj_priv->gtt_space == NULL) { DRM_ERROR("No GTT space found for object %d\n", - reloc.target_handle); + reloc->target_handle); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); return -EINVAL; } - if (reloc.offset > obj->size - 4) { + if (reloc->offset > obj->size - 4) { DRM_ERROR("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", - obj, reloc.target_handle, - (int) reloc.offset, (int) obj->size); + obj, reloc->target_handle, + (int) reloc->offset, (int) obj->size); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); return -EINVAL; } - if (reloc.offset & 3) { + if (reloc->offset & 3) { DRM_ERROR("Relocation not 4-byte aligned: " "obj %p target %d offset %d.\n", - obj, reloc.target_handle, - (int) reloc.offset); + obj, reloc->target_handle, + (int) reloc->offset); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); return -EINVAL; } - if (reloc.write_domain & I915_GEM_DOMAIN_CPU || - reloc.read_domains & I915_GEM_DOMAIN_CPU) { + if (reloc->write_domain & I915_GEM_DOMAIN_CPU || + reloc->read_domains & I915_GEM_DOMAIN_CPU) { DRM_ERROR("reloc with read/write CPU domains: " "obj %p target %d offset %d " "read %08x write %08x", - obj, reloc.target_handle, - (int) reloc.offset, - reloc.read_domains, - reloc.write_domain); + obj, reloc->target_handle, + (int) reloc->offset, + reloc->read_domains, + reloc->write_domain); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); return -EINVAL; } - if (reloc.write_domain && target_obj->pending_write_domain && - reloc.write_domain != target_obj->pending_write_domain) { + if (reloc->write_domain && target_obj->pending_write_domain && + reloc->write_domain != target_obj->pending_write_domain) { DRM_ERROR("Write domain conflict: " "obj %p target %d offset %d " "new %08x old %08x\n", - obj, reloc.target_handle, - (int) reloc.offset, - reloc.write_domain, + obj, reloc->target_handle, + (int) reloc->offset, + reloc->write_domain, target_obj->pending_write_domain); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); @@ -2819,22 +2811,22 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, "presumed %08x delta %08x\n", __func__, obj, - (int) reloc.offset, - (int) reloc.target_handle, - (int) reloc.read_domains, - (int) reloc.write_domain, + (int) reloc->offset, + (int) reloc->target_handle, + (int) reloc->read_domains, + (int) reloc->write_domain, (int) target_obj_priv->gtt_offset, - (int) reloc.presumed_offset, - reloc.delta); + (int) reloc->presumed_offset, + reloc->delta); #endif - target_obj->pending_read_domains |= reloc.read_domains; - target_obj->pending_write_domain |= reloc.write_domain; + target_obj->pending_read_domains |= reloc->read_domains; + target_obj->pending_write_domain |= reloc->write_domain; /* If the relocation already has the right value in it, no * more work needs to be done. */ - if (target_obj_priv->gtt_offset == reloc.presumed_offset) { + if (target_obj_priv->gtt_offset == reloc->presumed_offset) { drm_gem_object_unreference(target_obj); continue; } @@ -2849,32 +2841,26 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, /* Map the page containing the relocation we're going to * perform. */ - reloc_offset = obj_priv->gtt_offset + reloc.offset; + reloc_offset = obj_priv->gtt_offset + reloc->offset; reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, (reloc_offset & ~(PAGE_SIZE - 1))); reloc_entry = (uint32_t __iomem *)(reloc_page + (reloc_offset & (PAGE_SIZE - 1))); - reloc_val = target_obj_priv->gtt_offset + reloc.delta; + reloc_val = target_obj_priv->gtt_offset + reloc->delta; #if WATCH_BUF DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n", - obj, (unsigned int) reloc.offset, + obj, (unsigned int) reloc->offset, readl(reloc_entry), reloc_val); #endif writel(reloc_val, reloc_entry); io_mapping_unmap_atomic(reloc_page); - /* Write the updated presumed offset for this entry back out - * to the user. + /* The updated presumed offset for this entry will be + * copied back out to the user. */ - reloc.presumed_offset = target_obj_priv->gtt_offset; - ret = copy_to_user(relocs + i, &reloc, sizeof(reloc)); - if (ret != 0) { - drm_gem_object_unreference(target_obj); - i915_gem_object_unpin(obj); - return ret; - } + reloc->presumed_offset = target_obj_priv->gtt_offset; drm_gem_object_unreference(target_obj); } @@ -2971,6 +2957,75 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) return ret; } +static int +i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, + uint32_t buffer_count, + struct drm_i915_gem_relocation_entry **relocs) +{ + uint32_t reloc_count = 0, reloc_index = 0, i; + int ret; + + *relocs = NULL; + for (i = 0; i < buffer_count; i++) { + if (reloc_count + exec_list[i].relocation_count < reloc_count) + return -EINVAL; + reloc_count += exec_list[i].relocation_count; + } + + *relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER); + if (*relocs == NULL) + return -ENOMEM; + + for (i = 0; i < buffer_count; i++) { + struct drm_i915_gem_relocation_entry __user *user_relocs; + + user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; + + ret = copy_from_user(&(*relocs)[reloc_index], + user_relocs, + exec_list[i].relocation_count * + sizeof(**relocs)); + if (ret != 0) { + drm_free(*relocs, reloc_count * sizeof(**relocs), + DRM_MEM_DRIVER); + *relocs = NULL; + return ret; + } + + reloc_index += exec_list[i].relocation_count; + } + + return ret; +} + +static int +i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list, + uint32_t buffer_count, + struct drm_i915_gem_relocation_entry *relocs) +{ + uint32_t reloc_count = 0, i; + int ret; + + for (i = 0; i < buffer_count; i++) { + struct drm_i915_gem_relocation_entry __user *user_relocs; + + user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; + + if (ret == 0) { + ret = copy_to_user(user_relocs, + &relocs[reloc_count], + exec_list[i].relocation_count * + sizeof(*relocs)); + } + + reloc_count += exec_list[i].relocation_count; + } + + drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER); + + return ret; +} + int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -2983,9 +3038,10 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_gem_object *batch_obj; struct drm_i915_gem_object *obj_priv; struct drm_clip_rect *cliprects = NULL; - int ret, i, pinned = 0; + struct drm_i915_gem_relocation_entry *relocs; + int ret, ret2, i, pinned = 0; uint64_t exec_offset; - uint32_t seqno, flush_domains; + uint32_t seqno, flush_domains, reloc_index; int pin_tries; #if WATCH_EXEC @@ -3036,6 +3092,11 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, } } + ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count, + &relocs); + if (ret != 0) + goto pre_mutex_err; + mutex_lock(&dev->struct_mutex); i915_verify_inactive(dev, __FILE__, __LINE__); @@ -3078,15 +3139,19 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, /* Pin and relocate */ for (pin_tries = 0; ; pin_tries++) { ret = 0; + reloc_index = 0; + for (i = 0; i < args->buffer_count; i++) { object_list[i]->pending_read_domains = 0; object_list[i]->pending_write_domain = 0; ret = i915_gem_object_pin_and_relocate(object_list[i], file_priv, - &exec_list[i]); + &exec_list[i], + &relocs[reloc_index]); if (ret) break; pinned = i + 1; + reloc_index += exec_list[i].relocation_count; } /* success */ if (ret == 0) @@ -3236,6 +3301,20 @@ err: args->buffer_count, ret); } + /* Copy the updated relocations out regardless of current error + * state. Failure to update the relocs would mean that the next + * time userland calls execbuf, it would do so with presumed offset + * state that didn't match the actual object state. + */ + ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count, + relocs); + if (ret2 != 0) { + DRM_ERROR("Failed to copy relocations back out: %d\n", ret2); + + if (ret == 0) + ret = ret2; + } + pre_mutex_err: drm_free(object_list, sizeof(*object_list) * args->buffer_count, DRM_MEM_DRIVER); -- cgit v1.2.3-70-g09d2 From ad086c833d00ef3be56ec554b1061f19e87a6210 Mon Sep 17 00:00:00 2001 From: "Owain G. Ainsworth" Date: Fri, 20 Feb 2009 08:30:19 +0000 Subject: i915/drm: Remove two redundant agp_chipset_flushes agp_chipset_flush() is for flushing the intel GMCH write cache via the IFP, these two uses are for when we're getting the object into the cpu READ domain, and thus should not be needed. This confused me when I was getting my head around the code. With thanks to airlied for helping me check my mental picture of how the flushes and clflushes are supposed to be used. Signed-off-by: Owain G. Ainsworth Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f135c903305..b52cba0f16d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2381,7 +2381,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) { - struct drm_device *dev = obj->dev; int ret; i915_gem_object_flush_gpu_write_domain(obj); @@ -2400,7 +2399,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) /* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { i915_gem_clflush_object(obj); - drm_agp_chipset_flush(dev); obj->read_domains |= I915_GEM_DOMAIN_CPU; } @@ -2612,7 +2610,6 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) { - struct drm_device *dev = obj->dev; struct drm_i915_gem_object *obj_priv = obj->driver_private; if (!obj_priv->page_cpu_valid) @@ -2628,7 +2625,6 @@ i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) continue; drm_clflush_pages(obj_priv->pages + i, 1); } - drm_agp_chipset_flush(dev); } /* Free the page_cpu_valid mappings which are now stale, whether -- cgit v1.2.3-70-g09d2 From d008877550d8ca8c6878dd494e50c1b9209f38d4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 28 Mar 2009 20:29:48 -0400 Subject: drm/i915: check the return value from the copy from user This produced a warning on my build, not sure why super-warning-man didn't notice this one, its much worse than the %z one. Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5d2bdf2cc9..e0389ad1477 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -446,13 +446,16 @@ fast_shmem_write(struct page **pages, int length) { char __iomem *vaddr; + unsigned long unwritten; vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); if (vaddr == NULL) return -ENOMEM; - __copy_from_user_inatomic(vaddr + page_offset, data, length); + unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length); kunmap_atomic(vaddr, KM_USER0); + if (unwritten) + return -EFAULT; return 0; } -- cgit v1.2.3-70-g09d2