diff options
Diffstat (limited to 'drivers/gpu/drm')
145 files changed, 5991 insertions, 2606 deletions
diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c index 3f65dd6676b..a28640f47c2 100644 --- a/drivers/gpu/drm/ast/ast_fb.c +++ b/drivers/gpu/drm/ast/ast_fb.c @@ -65,7 +65,7 @@ static void ast_dirty_update(struct ast_fbdev *afbdev, * then the BO is being moved and we should * store up the damage until later. */ - if (!drm_can_sleep()) + if (drm_can_sleep()) ret = ast_bo_reserve(bo, true); if (ret) { if (ret != -EBUSY) diff --git a/drivers/gpu/drm/cirrus/cirrus_fbdev.c b/drivers/gpu/drm/cirrus/cirrus_fbdev.c index 2fd4a92162c..32bbba0a787 100644 --- a/drivers/gpu/drm/cirrus/cirrus_fbdev.c +++ b/drivers/gpu/drm/cirrus/cirrus_fbdev.c @@ -39,7 +39,7 @@ static void cirrus_dirty_update(struct cirrus_fbdev *afbdev, * then the BO is being moved and we should * store up the damage until later. */ - if (!drm_can_sleep()) + if (drm_can_sleep()) ret = cirrus_bo_reserve(bo, true); if (ret) { if (ret != -EBUSY) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 35ea15d5fff..b1c2b278005 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -3784,9 +3784,26 @@ int drm_mode_create_dumb_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_create_dumb *args = data; + u32 cpp, stride, size; if (!dev->driver->dumb_create) return -ENOSYS; + if (!args->width || !args->height || !args->bpp) + return -EINVAL; + + /* overflow checks for 32bit size calculations */ + cpp = DIV_ROUND_UP(args->bpp, 8); + if (cpp > 0xffffffffU / args->width) + return -EINVAL; + stride = cpp * args->width; + if (args->height > 0xffffffffU / stride) + return -EINVAL; + + /* test for wrap-around */ + size = args->height * stride; + if (PAGE_ALIGN(size) == 0) + return -EINVAL; + return dev->driver->dumb_create(file_priv, dev, args); } diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 9e978aae897..35251af3b14 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -346,3 +346,398 @@ int drm_dp_bw_code_to_link_rate(u8 link_bw) } } EXPORT_SYMBOL(drm_dp_bw_code_to_link_rate); + +/** + * DOC: dp helpers + * + * The DisplayPort AUX channel is an abstraction to allow generic, driver- + * independent access to AUX functionality. Drivers can take advantage of + * this by filling in the fields of the drm_dp_aux structure. + * + * Transactions are described using a hardware-independent drm_dp_aux_msg + * structure, which is passed into a driver's .transfer() implementation. + * Both native and I2C-over-AUX transactions are supported. + */ + +static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request, + unsigned int offset, void *buffer, size_t size) +{ + struct drm_dp_aux_msg msg; + unsigned int retry; + int err; + + memset(&msg, 0, sizeof(msg)); + msg.address = offset; + msg.request = request; + msg.buffer = buffer; + msg.size = size; + + /* + * The specification doesn't give any recommendation on how often to + * retry native transactions, so retry 7 times like for I2C-over-AUX + * transactions. + */ + for (retry = 0; retry < 7; retry++) { + err = aux->transfer(aux, &msg); + if (err < 0) { + if (err == -EBUSY) + continue; + + return err; + } + + if (err < size) + return -EPROTO; + + switch (msg.reply & DP_AUX_NATIVE_REPLY_MASK) { + case DP_AUX_NATIVE_REPLY_ACK: + return err; + + case DP_AUX_NATIVE_REPLY_NACK: + return -EIO; + + case DP_AUX_NATIVE_REPLY_DEFER: + usleep_range(400, 500); + break; + } + } + + DRM_ERROR("too many retries, giving up\n"); + return -EIO; +} + +/** + * drm_dp_dpcd_read() - read a series of bytes from the DPCD + * @aux: DisplayPort AUX channel + * @offset: address of the (first) register to read + * @buffer: buffer to store the register values + * @size: number of bytes in @buffer + * + * Returns the number of bytes transferred on success, or a negative error + * code on failure. -EIO is returned if the request was NAKed by the sink or + * if the retry count was exceeded. If not all bytes were transferred, this + * function returns -EPROTO. Errors from the underlying AUX channel transfer + * function, with the exception of -EBUSY (which causes the transaction to + * be retried), are propagated to the caller. + */ +ssize_t drm_dp_dpcd_read(struct drm_dp_aux *aux, unsigned int offset, + void *buffer, size_t size) +{ + return drm_dp_dpcd_access(aux, DP_AUX_NATIVE_READ, offset, buffer, + size); +} +EXPORT_SYMBOL(drm_dp_dpcd_read); + +/** + * drm_dp_dpcd_write() - write a series of bytes to the DPCD + * @aux: DisplayPort AUX channel + * @offset: address of the (first) register to write + * @buffer: buffer containing the values to write + * @size: number of bytes in @buffer + * + * Returns the number of bytes transferred on success, or a negative error + * code on failure. -EIO is returned if the request was NAKed by the sink or + * if the retry count was exceeded. If not all bytes were transferred, this + * function returns -EPROTO. Errors from the underlying AUX channel transfer + * function, with the exception of -EBUSY (which causes the transaction to + * be retried), are propagated to the caller. + */ +ssize_t drm_dp_dpcd_write(struct drm_dp_aux *aux, unsigned int offset, + void *buffer, size_t size) +{ + return drm_dp_dpcd_access(aux, DP_AUX_NATIVE_WRITE, offset, buffer, + size); +} +EXPORT_SYMBOL(drm_dp_dpcd_write); + +/** + * drm_dp_dpcd_read_link_status() - read DPCD link status (bytes 0x202-0x207) + * @aux: DisplayPort AUX channel + * @status: buffer to store the link status in (must be at least 6 bytes) + * + * Returns the number of bytes transferred on success or a negative error + * code on failure. + */ +int drm_dp_dpcd_read_link_status(struct drm_dp_aux *aux, + u8 status[DP_LINK_STATUS_SIZE]) +{ + return drm_dp_dpcd_read(aux, DP_LANE0_1_STATUS, status, + DP_LINK_STATUS_SIZE); +} +EXPORT_SYMBOL(drm_dp_dpcd_read_link_status); + +/** + * drm_dp_link_probe() - probe a DisplayPort link for capabilities + * @aux: DisplayPort AUX channel + * @link: pointer to structure in which to return link capabilities + * + * The structure filled in by this function can usually be passed directly + * into drm_dp_link_power_up() and drm_dp_link_configure() to power up and + * configure the link based on the link's capabilities. + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link) +{ + u8 values[3]; + int err; + + memset(link, 0, sizeof(*link)); + + err = drm_dp_dpcd_read(aux, DP_DPCD_REV, values, sizeof(values)); + if (err < 0) + return err; + + link->revision = values[0]; + link->rate = drm_dp_bw_code_to_link_rate(values[1]); + link->num_lanes = values[2] & DP_MAX_LANE_COUNT_MASK; + + if (values[2] & DP_ENHANCED_FRAME_CAP) + link->capabilities |= DP_LINK_CAP_ENHANCED_FRAMING; + + return 0; +} +EXPORT_SYMBOL(drm_dp_link_probe); + +/** + * drm_dp_link_power_up() - power up a DisplayPort link + * @aux: DisplayPort AUX channel + * @link: pointer to a structure containing the link configuration + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link) +{ + u8 value; + int err; + + /* DP_SET_POWER register is only available on DPCD v1.1 and later */ + if (link->revision < 0x11) + return 0; + + err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); + if (err < 0) + return err; + + value &= ~DP_SET_POWER_MASK; + value |= DP_SET_POWER_D0; + + err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); + if (err < 0) + return err; + + /* + * According to the DP 1.1 specification, a "Sink Device must exit the + * power saving state within 1 ms" (Section 2.5.3.1, Table 5-52, "Sink + * Control Field" (register 0x600). + */ + usleep_range(1000, 2000); + + return 0; +} +EXPORT_SYMBOL(drm_dp_link_power_up); + +/** + * drm_dp_link_configure() - configure a DisplayPort link + * @aux: DisplayPort AUX channel + * @link: pointer to a structure containing the link configuration + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link) +{ + u8 values[2]; + int err; + + values[0] = drm_dp_link_rate_to_bw_code(link->rate); + values[1] = link->num_lanes; + + if (link->capabilities & DP_LINK_CAP_ENHANCED_FRAMING) + values[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; + + err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, values, sizeof(values)); + if (err < 0) + return err; + + return 0; +} +EXPORT_SYMBOL(drm_dp_link_configure); + +/* + * I2C-over-AUX implementation + */ + +static u32 drm_dp_i2c_functionality(struct i2c_adapter *adapter) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | + I2C_FUNC_SMBUS_READ_BLOCK_DATA | + I2C_FUNC_SMBUS_BLOCK_PROC_CALL | + I2C_FUNC_10BIT_ADDR; +} + +/* + * Transfer a single I2C-over-AUX message and handle various error conditions, + * retrying the transaction as appropriate. + */ +static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) +{ + unsigned int retry; + int err; + + /* + * DP1.2 sections 2.7.7.1.5.6.1 and 2.7.7.1.6.6.1: A DP Source device + * is required to retry at least seven times upon receiving AUX_DEFER + * before giving up the AUX transaction. + */ + for (retry = 0; retry < 7; retry++) { + err = aux->transfer(aux, msg); + if (err < 0) { + if (err == -EBUSY) + continue; + + DRM_DEBUG_KMS("transaction failed: %d\n", err); + return err; + } + + if (err < msg->size) + return -EPROTO; + + switch (msg->reply & DP_AUX_NATIVE_REPLY_MASK) { + case DP_AUX_NATIVE_REPLY_ACK: + /* + * For I2C-over-AUX transactions this isn't enough, we + * need to check for the I2C ACK reply. + */ + break; + + case DP_AUX_NATIVE_REPLY_NACK: + DRM_DEBUG_KMS("native nack\n"); + return -EREMOTEIO; + + case DP_AUX_NATIVE_REPLY_DEFER: + DRM_DEBUG_KMS("native defer"); + /* + * We could check for I2C bit rate capabilities and if + * available adjust this interval. We could also be + * more careful with DP-to-legacy adapters where a + * long legacy cable may force very low I2C bit rates. + * + * For now just defer for long enough to hopefully be + * safe for all use-cases. + */ + usleep_range(500, 600); + continue; + + default: + DRM_ERROR("invalid native reply %#04x\n", msg->reply); + return -EREMOTEIO; + } + + switch (msg->reply & DP_AUX_I2C_REPLY_MASK) { + case DP_AUX_I2C_REPLY_ACK: + /* + * Both native ACK and I2C ACK replies received. We + * can assume the transfer was successful. + */ + return 0; + + case DP_AUX_I2C_REPLY_NACK: + DRM_DEBUG_KMS("I2C nack\n"); + return -EREMOTEIO; + + case DP_AUX_I2C_REPLY_DEFER: + DRM_DEBUG_KMS("I2C defer\n"); + usleep_range(400, 500); + continue; + + default: + DRM_ERROR("invalid I2C reply %#04x\n", msg->reply); + return -EREMOTEIO; + } + } + + DRM_ERROR("too many retries, giving up\n"); + return -EREMOTEIO; +} + +static int drm_dp_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, + int num) +{ + struct drm_dp_aux *aux = adapter->algo_data; + unsigned int i, j; + + for (i = 0; i < num; i++) { + struct drm_dp_aux_msg msg; + int err; + + /* + * Many hardware implementations support FIFOs larger than a + * single byte, but it has been empirically determined that + * transferring data in larger chunks can actually lead to + * decreased performance. Therefore each message is simply + * transferred byte-by-byte. + */ + for (j = 0; j < msgs[i].len; j++) { + memset(&msg, 0, sizeof(msg)); + msg.address = msgs[i].addr; + + msg.request = (msgs[i].flags & I2C_M_RD) ? + DP_AUX_I2C_READ : + DP_AUX_I2C_WRITE; + + /* + * All messages except the last one are middle-of- + * transfer messages. + */ + if ((i < num - 1) || (j < msgs[i].len - 1)) + msg.request |= DP_AUX_I2C_MOT; + + msg.buffer = msgs[i].buf + j; + msg.size = 1; + + err = drm_dp_i2c_do_msg(aux, &msg); + if (err < 0) + return err; + } + } + + return num; +} + +static const struct i2c_algorithm drm_dp_i2c_algo = { + .functionality = drm_dp_i2c_functionality, + .master_xfer = drm_dp_i2c_xfer, +}; + +/** + * drm_dp_aux_register_i2c_bus() - register an I2C adapter for I2C-over-AUX + * @aux: DisplayPort AUX channel + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_aux_register_i2c_bus(struct drm_dp_aux *aux) +{ + aux->ddc.algo = &drm_dp_i2c_algo; + aux->ddc.algo_data = aux; + aux->ddc.retries = 3; + + aux->ddc.class = I2C_CLASS_DDC; + aux->ddc.owner = THIS_MODULE; + aux->ddc.dev.parent = aux->dev; + aux->ddc.dev.of_node = aux->dev->of_node; + + strncpy(aux->ddc.name, dev_name(aux->dev), sizeof(aux->ddc.name)); + + return i2c_add_adapter(&aux->ddc); +} +EXPORT_SYMBOL(drm_dp_aux_register_i2c_bus); + +/** + * drm_dp_aux_unregister_i2c_bus() - unregister an I2C-over-AUX adapter + * @aux: DisplayPort AUX channel + */ +void drm_dp_aux_unregister_i2c_bus(struct drm_dp_aux *aux) +{ + i2c_del_adapter(&aux->ddc); +} +EXPORT_SYMBOL(drm_dp_aux_unregister_i2c_bus); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 345be03c23d..ec651be2f3c 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -344,7 +344,7 @@ long drm_ioctl(struct file *filp, DRM_DEBUG("pid=%d, dev=0x%lx, auth=%d, %s\n", task_pid_nr(current), - (long)old_encode_dev(file_priv->minor->device), + (long)old_encode_dev(file_priv->minor->kdev->devt), file_priv->authenticated, ioctl->name); /* Do not trust userspace, use our own definition */ @@ -402,7 +402,7 @@ long drm_ioctl(struct file *filp, if (!ioctl) DRM_DEBUG("invalid ioctl: pid=%d, dev=0x%lx, auth=%d, cmd=0x%02x, nr=0x%02x\n", task_pid_nr(current), - (long)old_encode_dev(file_priv->minor->device), + (long)old_encode_dev(file_priv->minor->kdev->devt), file_priv->authenticated, cmd, nr); if (kdata != stack_kdata) diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 7f2af9aca03..8f46fe273ba 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -39,12 +39,12 @@ #include <linux/slab.h> #include <linux/module.h> -/* from BKL pushdown: note that nothing else serializes idr_find() */ +/* from BKL pushdown */ DEFINE_MUTEX(drm_global_mutex); EXPORT_SYMBOL(drm_global_mutex); static int drm_open_helper(struct inode *inode, struct file *filp, - struct drm_device * dev); + struct drm_minor *minor); static int drm_setup(struct drm_device * dev) { @@ -79,24 +79,18 @@ static int drm_setup(struct drm_device * dev) */ int drm_open(struct inode *inode, struct file *filp) { - struct drm_device *dev = NULL; - int minor_id = iminor(inode); + struct drm_device *dev; struct drm_minor *minor; - int retcode = 0; + int retcode; int need_setup = 0; struct address_space *old_mapping; struct address_space *old_imapping; - minor = idr_find(&drm_minors_idr, minor_id); - if (!minor) - return -ENODEV; - - if (!(dev = minor->dev)) - return -ENODEV; - - if (drm_device_is_unplugged(dev)) - return -ENODEV; + minor = drm_minor_acquire(iminor(inode)); + if (IS_ERR(minor)) + return PTR_ERR(minor); + dev = minor->dev; if (!dev->open_count++) need_setup = 1; mutex_lock(&dev->struct_mutex); @@ -110,7 +104,7 @@ int drm_open(struct inode *inode, struct file *filp) filp->f_mapping = dev->dev_mapping; mutex_unlock(&dev->struct_mutex); - retcode = drm_open_helper(inode, filp, dev); + retcode = drm_open_helper(inode, filp, minor); if (retcode) goto err_undo; if (need_setup) { @@ -128,6 +122,7 @@ err_undo: dev->dev_mapping = old_mapping; mutex_unlock(&dev->struct_mutex); dev->open_count--; + drm_minor_release(minor); return retcode; } EXPORT_SYMBOL(drm_open); @@ -143,33 +138,30 @@ EXPORT_SYMBOL(drm_open); */ int drm_stub_open(struct inode *inode, struct file *filp) { - struct drm_device *dev = NULL; + struct drm_device *dev; struct drm_minor *minor; - int minor_id = iminor(inode); int err = -ENODEV; const struct file_operations *new_fops; DRM_DEBUG("\n"); mutex_lock(&drm_global_mutex); - minor = idr_find(&drm_minors_idr, minor_id); - if (!minor) - goto out; - - if (!(dev = minor->dev)) - goto out; - - if (drm_device_is_unplugged(dev)) - goto out; + minor = drm_minor_acquire(iminor(inode)); + if (IS_ERR(minor)) + goto out_unlock; + dev = minor->dev; new_fops = fops_get(dev->driver->fops); if (!new_fops) - goto out; + goto out_release; replace_fops(filp, new_fops); if (filp->f_op->open) err = filp->f_op->open(inode, filp); -out: + +out_release: + drm_minor_release(minor); +out_unlock: mutex_unlock(&drm_global_mutex); return err; } @@ -196,16 +188,16 @@ static int drm_cpu_valid(void) * * \param inode device inode. * \param filp file pointer. - * \param dev device. + * \param minor acquired minor-object. * \return zero on success or a negative number on failure. * * Creates and initializes a drm_file structure for the file private data in \p * filp and add it into the double linked list in \p dev. */ static int drm_open_helper(struct inode *inode, struct file *filp, - struct drm_device * dev) + struct drm_minor *minor) { - int minor_id = iminor(inode); + struct drm_device *dev = minor->dev; struct drm_file *priv; int ret; @@ -216,7 +208,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp, if (dev->switch_power_state != DRM_SWITCH_POWER_ON && dev->switch_power_state != DRM_SWITCH_POWER_DYNAMIC_OFF) return -EINVAL; - DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor_id); + DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor->index); priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) @@ -226,11 +218,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp, priv->filp = filp; priv->uid = current_euid(); priv->pid = get_pid(task_pid(current)); - priv->minor = idr_find(&drm_minors_idr, minor_id); - if (!priv->minor) { - ret = -ENODEV; - goto out_put_pid; - } + priv->minor = minor; /* for compatibility root is always authenticated */ priv->always_authenticated = capable(CAP_SYS_ADMIN); @@ -336,7 +324,6 @@ out_prime_destroy: drm_prime_destroy_file_private(&priv->prime); if (dev->driver->driver_features & DRIVER_GEM) drm_gem_release(dev, priv); -out_put_pid: put_pid(priv->pid); kfree(priv); filp->private_data = NULL; @@ -458,7 +445,8 @@ int drm_lastclose(struct drm_device * dev) int drm_release(struct inode *inode, struct file *filp) { struct drm_file *file_priv = filp->private_data; - struct drm_device *dev = file_priv->minor->dev; + struct drm_minor *minor = file_priv->minor; + struct drm_device *dev = minor->dev; int retcode = 0; mutex_lock(&drm_global_mutex); @@ -474,7 +462,7 @@ int drm_release(struct inode *inode, struct file *filp) DRM_DEBUG("pid = %d, device = 0x%lx, open_count = %d\n", task_pid_nr(current), - (long)old_encode_dev(file_priv->minor->device), + (long)old_encode_dev(file_priv->minor->kdev->devt), dev->open_count); /* Release any auth tokens that might point to this file_priv, @@ -580,6 +568,8 @@ int drm_release(struct inode *inode, struct file *filp) } mutex_unlock(&drm_global_mutex); + drm_minor_release(minor); + return retcode; } EXPORT_SYMBOL(drm_release); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 5bbad873c79..154d6c6955c 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -692,7 +692,9 @@ drm_gem_object_release(struct drm_gem_object *obj) WARN_ON(obj->dma_buf); if (obj->filp) - fput(obj->filp); + fput(obj->filp); + + drm_gem_free_mmap_offset(obj); } EXPORT_SYMBOL(drm_gem_object_release); @@ -782,7 +784,7 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = dev->driver->gem_vm_ops; vma->vm_private_data = obj; - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); /* Take a ref for this mapping of the object, so that the fault * handler can dereference the mmap offset's pointer to the object. @@ -818,7 +820,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) struct drm_device *dev = priv->minor->dev; struct drm_gem_object *obj; struct drm_vma_offset_node *node; - int ret = 0; + int ret; if (drm_device_is_unplugged(dev)) return -ENODEV; diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index dffc836144c..f4dc9b7a383 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -296,6 +296,18 @@ int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_priv) case DRM_CAP_ASYNC_PAGE_FLIP: req->value = dev->mode_config.async_page_flip; break; + case DRM_CAP_CURSOR_WIDTH: + if (dev->mode_config.cursor_width) + req->value = dev->mode_config.cursor_width; + else + req->value = 64; + break; + case DRM_CAP_CURSOR_HEIGHT: + if (dev->mode_config.cursor_height) + req->value = dev->mode_config.cursor_height; + else + req->value = 64; + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 5736aaa7e86..9ded847b05b 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -351,7 +351,7 @@ err_agp: drm_pci_agp_destroy(dev); pci_disable_device(pdev); err_free: - drm_dev_free(dev); + drm_dev_unref(dev); return ret; } EXPORT_SYMBOL(drm_get_pci_dev); diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c index 21fc82006b7..319ff538560 100644 --- a/drivers/gpu/drm/drm_platform.c +++ b/drivers/gpu/drm/drm_platform.c @@ -64,7 +64,7 @@ static int drm_get_platform_dev(struct platform_device *platdev, return 0; err_free: - drm_dev_free(dev); + drm_dev_unref(dev); return ret; } diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index 98a33c580ca..c23eaf6442f 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -70,6 +70,7 @@ module_param_named(vblankoffdelay, drm_vblank_offdelay, int, 0600); module_param_named(timestamp_precision_usec, drm_timestamp_precision, int, 0600); module_param_named(timestamp_monotonic, drm_timestamp_monotonic, int, 0600); +static DEFINE_SPINLOCK(drm_minor_lock); struct idr drm_minors_idr; struct class *drm_class; @@ -117,26 +118,6 @@ void drm_ut_debug_printk(unsigned int request_level, } EXPORT_SYMBOL(drm_ut_debug_printk); -static int drm_minor_get_id(struct drm_device *dev, int type) -{ - int ret; - int base = 0, limit = 63; - - if (type == DRM_MINOR_CONTROL) { - base += 64; - limit = base + 63; - } else if (type == DRM_MINOR_RENDER) { - base += 128; - limit = base + 63; - } - - mutex_lock(&dev->struct_mutex); - ret = idr_alloc(&drm_minors_idr, NULL, base, limit, GFP_KERNEL); - mutex_unlock(&dev->struct_mutex); - - return ret == -ENOSPC ? -EINVAL : ret; -} - struct drm_master *drm_master_create(struct drm_minor *minor) { struct drm_master *master; @@ -260,119 +241,183 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data, return 0; } -/** - * drm_get_minor - Allocate and register new DRM minor - * @dev: DRM device - * @minor: Pointer to where new minor is stored - * @type: Type of minor - * - * Allocate a new minor of the given type and register it. A pointer to the new - * minor is returned in @minor. - * Caller must hold the global DRM mutex. +/* + * DRM Minors + * A DRM device can provide several char-dev interfaces on the DRM-Major. Each + * of them is represented by a drm_minor object. Depending on the capabilities + * of the device-driver, different interfaces are registered. * - * RETURNS: - * 0 on success, negative error code on failure. + * Minors can be accessed via dev->$minor_name. This pointer is either + * NULL or a valid drm_minor pointer and stays valid as long as the device is + * valid. This means, DRM minors have the same life-time as the underlying + * device. However, this doesn't mean that the minor is active. Minors are + * registered and unregistered dynamically according to device-state. */ -static int drm_get_minor(struct drm_device *dev, struct drm_minor **minor, - int type) + +static struct drm_minor **drm_minor_get_slot(struct drm_device *dev, + unsigned int type) +{ + switch (type) { + case DRM_MINOR_LEGACY: + return &dev->primary; + case DRM_MINOR_RENDER: + return &dev->render; + case DRM_MINOR_CONTROL: + return &dev->control; + default: + return NULL; + } +} + +static int drm_minor_alloc(struct drm_device *dev, unsigned int type) +{ + struct drm_minor *minor; + + minor = kzalloc(sizeof(*minor), GFP_KERNEL); + if (!minor) + return -ENOMEM; + + minor->type = type; + minor->dev = dev; + INIT_LIST_HEAD(&minor->master_list); + + *drm_minor_get_slot(dev, type) = minor; + return 0; +} + +static void drm_minor_free(struct drm_device *dev, unsigned int type) +{ + struct drm_minor **slot; + + slot = drm_minor_get_slot(dev, type); + if (*slot) { + kfree(*slot); + *slot = NULL; + } +} + +static int drm_minor_register(struct drm_device *dev, unsigned int type) { struct drm_minor *new_minor; + unsigned long flags; int ret; int minor_id; DRM_DEBUG("\n"); - minor_id = drm_minor_get_id(dev, type); + new_minor = *drm_minor_get_slot(dev, type); + if (!new_minor) + return 0; + + idr_preload(GFP_KERNEL); + spin_lock_irqsave(&drm_minor_lock, flags); + minor_id = idr_alloc(&drm_minors_idr, + NULL, + 64 * type, + 64 * (type + 1), + GFP_NOWAIT); + spin_unlock_irqrestore(&drm_minor_lock, flags); + idr_preload_end(); + if (minor_id < 0) return minor_id; - new_minor = kzalloc(sizeof(struct drm_minor), GFP_KERNEL); - if (!new_minor) { - ret = -ENOMEM; - goto err_idr; - } - - new_minor->type = type; - new_minor->device = MKDEV(DRM_MAJOR, minor_id); - new_minor->dev = dev; new_minor->index = minor_id; - INIT_LIST_HEAD(&new_minor->master_list); - - idr_replace(&drm_minors_idr, new_minor, minor_id); -#if defined(CONFIG_DEBUG_FS) ret = drm_debugfs_init(new_minor, minor_id, drm_debugfs_root); if (ret) { DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n"); - goto err_mem; + goto err_id; } -#endif ret = drm_sysfs_device_add(new_minor); if (ret) { - printk(KERN_ERR - "DRM: Error sysfs_device_add.\n"); + DRM_ERROR("DRM: Error sysfs_device_add.\n"); goto err_debugfs; } - *minor = new_minor; + + /* replace NULL with @minor so lookups will succeed from now on */ + spin_lock_irqsave(&drm_minor_lock, flags); + idr_replace(&drm_minors_idr, new_minor, new_minor->index); + spin_unlock_irqrestore(&drm_minor_lock, flags); DRM_DEBUG("new minor assigned %d\n", minor_id); return 0; - err_debugfs: -#if defined(CONFIG_DEBUG_FS) drm_debugfs_cleanup(new_minor); -err_mem: -#endif - kfree(new_minor); -err_idr: +err_id: + spin_lock_irqsave(&drm_minor_lock, flags); idr_remove(&drm_minors_idr, minor_id); - *minor = NULL; + spin_unlock_irqrestore(&drm_minor_lock, flags); + new_minor->index = 0; return ret; } -/** - * drm_unplug_minor - Unplug DRM minor - * @minor: Minor to unplug - * - * Unplugs the given DRM minor but keeps the object. So after this returns, - * minor->dev is still valid so existing open-files can still access it to get - * device information from their drm_file ojects. - * If the minor is already unplugged or if @minor is NULL, nothing is done. - * The global DRM mutex must be held by the caller. - */ -static void drm_unplug_minor(struct drm_minor *minor) +static void drm_minor_unregister(struct drm_device *dev, unsigned int type) { + struct drm_minor *minor; + unsigned long flags; + + minor = *drm_minor_get_slot(dev, type); if (!minor || !minor->kdev) return; -#if defined(CONFIG_DEBUG_FS) - drm_debugfs_cleanup(minor); -#endif + spin_lock_irqsave(&drm_minor_lock, flags); + idr_remove(&drm_minors_idr, minor->index); + spin_unlock_irqrestore(&drm_minor_lock, flags); + minor->index = 0; + drm_debugfs_cleanup(minor); drm_sysfs_device_remove(minor); - idr_remove(&drm_minors_idr, minor->index); } /** - * drm_put_minor - Destroy DRM minor - * @minor: Minor to destroy + * drm_minor_acquire - Acquire a DRM minor + * @minor_id: Minor ID of the DRM-minor + * + * Looks up the given minor-ID and returns the respective DRM-minor object. The + * refence-count of the underlying device is increased so you must release this + * object with drm_minor_release(). + * + * As long as you hold this minor, it is guaranteed that the object and the + * minor->dev pointer will stay valid! However, the device may get unplugged and + * unregistered while you hold the minor. * - * This calls drm_unplug_minor() on the given minor and then frees it. Nothing - * is done if @minor is NULL. It is fine to call this on already unplugged - * minors. - * The global DRM mutex must be held by the caller. + * Returns: + * Pointer to minor-object with increased device-refcount, or PTR_ERR on + * failure. */ -static void drm_put_minor(struct drm_minor *minor) +struct drm_minor *drm_minor_acquire(unsigned int minor_id) { - if (!minor) - return; + struct drm_minor *minor; + unsigned long flags; + + spin_lock_irqsave(&drm_minor_lock, flags); + minor = idr_find(&drm_minors_idr, minor_id); + if (minor) + drm_dev_ref(minor->dev); + spin_unlock_irqrestore(&drm_minor_lock, flags); + + if (!minor) { + return ERR_PTR(-ENODEV); + } else if (drm_device_is_unplugged(minor->dev)) { + drm_dev_unref(minor->dev); + return ERR_PTR(-ENODEV); + } - DRM_DEBUG("release secondary minor %d\n", minor->index); + return minor; +} - drm_unplug_minor(minor); - kfree(minor); +/** + * drm_minor_release - Release DRM minor + * @minor: Pointer to DRM minor object + * + * Release a minor that was previously acquired via drm_minor_acquire(). + */ +void drm_minor_release(struct drm_minor *minor) +{ + drm_dev_unref(minor->dev); } /** @@ -392,18 +437,16 @@ void drm_put_dev(struct drm_device *dev) } drm_dev_unregister(dev); - drm_dev_free(dev); + drm_dev_unref(dev); } EXPORT_SYMBOL(drm_put_dev); void drm_unplug_dev(struct drm_device *dev) { /* for a USB device */ - if (drm_core_check_feature(dev, DRIVER_MODESET)) - drm_unplug_minor(dev->control); - if (dev->render) - drm_unplug_minor(dev->render); - drm_unplug_minor(dev->primary); + drm_minor_unregister(dev, DRM_MINOR_LEGACY); + drm_minor_unregister(dev, DRM_MINOR_RENDER); + drm_minor_unregister(dev, DRM_MINOR_CONTROL); mutex_lock(&drm_global_mutex); @@ -425,6 +468,9 @@ EXPORT_SYMBOL(drm_unplug_dev); * Call drm_dev_register() to advertice the device to user space and register it * with other core subsystems. * + * The initial ref-count of the object is 1. Use drm_dev_ref() and + * drm_dev_unref() to take and drop further ref-counts. + * * RETURNS: * Pointer to new DRM device, or NULL if out of memory. */ @@ -438,6 +484,7 @@ struct drm_device *drm_dev_alloc(struct drm_driver *driver, if (!dev) return NULL; + kref_init(&dev->ref); dev->dev = parent; dev->driver = driver; @@ -452,8 +499,24 @@ struct drm_device *drm_dev_alloc(struct drm_driver *driver, mutex_init(&dev->struct_mutex); mutex_init(&dev->ctxlist_mutex); + if (drm_core_check_feature(dev, DRIVER_MODESET)) { + ret = drm_minor_alloc(dev, DRM_MINOR_CONTROL); + if (ret) + goto err_minors; + } + + if (drm_core_check_feature(dev, DRIVER_RENDER) && drm_rnodes) { + ret = drm_minor_alloc(dev, DRM_MINOR_RENDER); + if (ret) + goto err_minors; + } + + ret = drm_minor_alloc(dev, DRM_MINOR_LEGACY); + if (ret) + goto err_minors; + if (drm_ht_create(&dev->map_hash, 12)) - goto err_free; + goto err_minors; ret = drm_ctxbitmap_init(dev); if (ret) { @@ -475,27 +538,18 @@ err_ctxbitmap: drm_ctxbitmap_cleanup(dev); err_ht: drm_ht_remove(&dev->map_hash); -err_free: +err_minors: + drm_minor_free(dev, DRM_MINOR_LEGACY); + drm_minor_free(dev, DRM_MINOR_RENDER); + drm_minor_free(dev, DRM_MINOR_CONTROL); kfree(dev); return NULL; } EXPORT_SYMBOL(drm_dev_alloc); -/** - * drm_dev_free - Free DRM device - * @dev: DRM device to free - * - * Free a DRM device that has previously been allocated via drm_dev_alloc(). - * You must not use kfree() instead or you will leak memory. - * - * This must not be called once the device got registered. Use drm_put_dev() - * instead, which then calls drm_dev_free(). - */ -void drm_dev_free(struct drm_device *dev) +static void drm_dev_release(struct kref *ref) { - drm_put_minor(dev->control); - drm_put_minor(dev->render); - drm_put_minor(dev->primary); + struct drm_device *dev = container_of(ref, struct drm_device, ref); if (dev->driver->driver_features & DRIVER_GEM) drm_gem_destroy(dev); @@ -503,10 +557,46 @@ void drm_dev_free(struct drm_device *dev) drm_ctxbitmap_cleanup(dev); drm_ht_remove(&dev->map_hash); + drm_minor_free(dev, DRM_MINOR_LEGACY); + drm_minor_free(dev, DRM_MINOR_RENDER); + drm_minor_free(dev, DRM_MINOR_CONTROL); + kfree(dev->devname); kfree(dev); } -EXPORT_SYMBOL(drm_dev_free); + +/** + * drm_dev_ref - Take reference of a DRM device + * @dev: device to take reference of or NULL + * + * This increases the ref-count of @dev by one. You *must* already own a + * reference when calling this. Use drm_dev_unref() to drop this reference + * again. + * + * This function never fails. However, this function does not provide *any* + * guarantee whether the device is alive or running. It only provides a + * reference to the object and the memory associated with it. + */ +void drm_dev_ref(struct drm_device *dev) +{ + if (dev) + kref_get(&dev->ref); +} +EXPORT_SYMBOL(drm_dev_ref); + +/** + * drm_dev_unref - Drop reference of a DRM device + * @dev: device to drop reference of or NULL + * + * This decreases the ref-count of @dev by one. The device is destroyed if the + * ref-count drops to zero. + */ +void drm_dev_unref(struct drm_device *dev) +{ + if (dev) + kref_put(&dev->ref, drm_dev_release); +} +EXPORT_SYMBOL(drm_dev_unref); /** * drm_dev_register - Register DRM device @@ -527,26 +617,22 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) mutex_lock(&drm_global_mutex); - if (drm_core_check_feature(dev, DRIVER_MODESET)) { - ret = drm_get_minor(dev, &dev->control, DRM_MINOR_CONTROL); - if (ret) - goto out_unlock; - } + ret = drm_minor_register(dev, DRM_MINOR_CONTROL); + if (ret) + goto err_minors; - if (drm_core_check_feature(dev, DRIVER_RENDER) && drm_rnodes) { - ret = drm_get_minor(dev, &dev->render, DRM_MINOR_RENDER); - if (ret) - goto err_control_node; - } + ret = drm_minor_register(dev, DRM_MINOR_RENDER); + if (ret) + goto err_minors; - ret = drm_get_minor(dev, &dev->primary, DRM_MINOR_LEGACY); + ret = drm_minor_register(dev, DRM_MINOR_LEGACY); if (ret) - goto err_render_node; + goto err_minors; if (dev->driver->load) { ret = dev->driver->load(dev, flags); if (ret) - goto err_primary_node; + goto err_minors; } /* setup grouping for legacy outputs */ @@ -563,12 +649,10 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) err_unload: if (dev->driver->unload) dev->driver->unload(dev); -err_primary_node: - drm_unplug_minor(dev->primary); -err_render_node: - drm_unplug_minor(dev->render); -err_control_node: - drm_unplug_minor(dev->control); +err_minors: + drm_minor_unregister(dev, DRM_MINOR_LEGACY); + drm_minor_unregister(dev, DRM_MINOR_RENDER); + drm_minor_unregister(dev, DRM_MINOR_CONTROL); out_unlock: mutex_unlock(&drm_global_mutex); return ret; @@ -581,7 +665,7 @@ EXPORT_SYMBOL(drm_dev_register); * * Unregister the DRM device from the system. This does the reverse of * drm_dev_register() but does not deallocate the device. The caller must call - * drm_dev_free() to free all resources. + * drm_dev_unref() to drop their final reference. */ void drm_dev_unregister(struct drm_device *dev) { @@ -600,8 +684,8 @@ void drm_dev_unregister(struct drm_device *dev) list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head) drm_rmmap(dev, r_list->map); - drm_unplug_minor(dev->control); - drm_unplug_minor(dev->render); - drm_unplug_minor(dev->primary); + drm_minor_unregister(dev, DRM_MINOR_LEGACY); + drm_minor_unregister(dev, DRM_MINOR_RENDER); + drm_minor_unregister(dev, DRM_MINOR_CONTROL); } EXPORT_SYMBOL(drm_dev_unregister); diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c index 0f8cb1ae760..c3406aad294 100644 --- a/drivers/gpu/drm/drm_usb.c +++ b/drivers/gpu/drm/drm_usb.c @@ -30,7 +30,7 @@ int drm_get_usb_dev(struct usb_interface *interface, return 0; err_free: - drm_dev_free(dev); + drm_dev_unref(dev); return ret; } diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index f227f544aa3..6e1a1a20cf6 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -51,7 +51,7 @@ config DRM_EXYNOS_G2D config DRM_EXYNOS_IPP bool "Exynos DRM IPP" - depends on DRM_EXYNOS && !ARCH_MULTIPLATFORM + depends on DRM_EXYNOS help Choose this option if you want to use IPP feature for DRM. @@ -69,6 +69,6 @@ config DRM_EXYNOS_ROTATOR config DRM_EXYNOS_GSC bool "Exynos DRM GSC" - depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 + depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 && !ARCH_MULTIPLATFORM help Choose this option if you want to use Exynos GSC for DRM. diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c index ebc01503d50..6f3400f3978 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c @@ -36,9 +36,9 @@ enum exynos_crtc_mode { * @pipe: a crtc index created at load() with a new crtc object creation * and the crtc object would be set to private->crtc array * to get a crtc object corresponding to this pipe from private->crtc - * array when irq interrupt occured. the reason of using this pipe is that + * array when irq interrupt occurred. the reason of using this pipe is that * drm framework doesn't support multiple irq yet. - * we can refer to the crtc to current hardware interrupt occured through + * we can refer to the crtc to current hardware interrupt occurred through * this pipe value. * @dpms: store the crtc dpms value * @mode: store the crtc mode value diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 9d096a0c5f8..215131ab1dd 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -171,22 +171,24 @@ static int exynos_drm_open(struct drm_device *dev, struct drm_file *file) file->driver_priv = file_priv; ret = exynos_drm_subdrv_open(dev, file); - if (ret) { - kfree(file_priv); - file->driver_priv = NULL; - } + if (ret) + goto out; anon_filp = anon_inode_getfile("exynos_gem", &exynos_drm_gem_fops, NULL, 0); if (IS_ERR(anon_filp)) { - kfree(file_priv); - return PTR_ERR(anon_filp); + ret = PTR_ERR(anon_filp); + goto out; } anon_filp->f_mode = FMODE_READ | FMODE_WRITE; file_priv->anon_filp = anon_filp; return ret; +out: + kfree(file_priv); + file->driver_priv = NULL; + return ret; } static void exynos_drm_preclose(struct drm_device *dev, diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 8adfc8f1e08..30d76b2ff9c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -345,7 +345,7 @@ static bool fimc_check_ovf(struct fimc_context *ctx) fimc_write(cfg, EXYNOS_CIWDOFST); - dev_err(ippdrv->dev, "occured overflow at %d, status 0x%x.\n", + dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n", ctx->id, status); return true; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 7bccedca487..6c1885eedfd 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -607,7 +607,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset) reg_type = REG_TYPE_NONE; DRM_ERROR("Unknown register offset![%d]\n", reg_offset); break; - }; + } return reg_type; } @@ -1126,7 +1126,7 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data, * G2D interrupt event once current command list execution is * finished. * Otherwise only ACF bit should be set to INTEN register so - * that one interrupt is occured after all command lists + * that one interrupt is occurred after all command lists * have been completed. */ if (node->event) { diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 49b8c9b2290..42d2904d88c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -604,7 +604,7 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, int ret; /* - * alocate memory to be used for framebuffer. + * allocate memory to be used for framebuffer. * - this callback would be called by user application * with DRM_IOCTL_MODE_CREATE_DUMB command. */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index fde860c7eba..1592c0ba7de 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -60,7 +60,7 @@ struct exynos_drm_gem_buf { * @vma: a pointer to vm_area. * @flags: indicate memory type to allocated buffer and cache attruibute. * - * P.S. this object would be transfered to user as kms_bo.handle so + * P.S. this object would be transferred to user as kms_bo.handle so * user can access the buffer through kms_bo.handle. */ struct exynos_drm_gem_obj { diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index cd6aebd53bd..fa75059a610 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1301,13 +1301,13 @@ static irqreturn_t gsc_irq_handler(int irq, void *dev_id) status = gsc_read(GSC_IRQ); if (status & GSC_IRQ_STATUS_OR_IRQ) { - dev_err(ippdrv->dev, "occured overflow at %d, status 0x%x.\n", + dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n", ctx->id, status); return IRQ_NONE; } if (status & GSC_IRQ_STATUS_OR_FRM_DONE) { - dev_dbg(ippdrv->dev, "occured frame done at %d, status 0x%x.\n", + dev_dbg(ippdrv->dev, "occurred frame done at %d, status 0x%x.\n", ctx->id, status); buf_id[EXYNOS_DRM_OPS_SRC] = gsc_get_src_buf_index(ctx); diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c index 824e0705c8d..09312b87747 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -16,7 +16,6 @@ #include <linux/types.h> #include <linux/clk.h> #include <linux/pm_runtime.h> -#include <plat/map-base.h> #include <drm/drmP.h> #include <drm/exynos_drm.h> @@ -335,7 +334,7 @@ int exynos_drm_ipp_get_property(struct drm_device *drm_dev, void *data, } else { /* * Getting ippdrv capability by ipp_id. - * some deivce not supported wb, output interface. + * some device not supported wb, output interface. * so, user application detect correct ipp driver * using this ioctl. */ @@ -826,7 +825,7 @@ static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node, DRM_DEBUG_KMS("count[%d]e[0x%x]\n", count++, (int)e); /* - * quf == NULL condition means all event deletion. + * qbuf == NULL condition means all event deletion. * stop operations want to delete all event list. * another case delete only same buf id. */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h index 4cadbea7dbd..ab1634befc0 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.h +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.h @@ -48,7 +48,7 @@ struct drm_exynos_ipp_cmd_work { /* * A structure of command node. * - * @priv: IPP private infomation. + * @priv: IPP private information. * @list: list head to command queue information. * @event_list: list head of event. * @mem_list: list head to source,destination memory queue information. @@ -92,7 +92,7 @@ struct drm_exynos_ipp_buf_info { }; /* - * A structure of wb setting infomation. + * A structure of wb setting information. * * @enable: enable flag for wb. * @refresh: HZ of the refresh rate. diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index a0e10aeb0e6..c021ddc1ffb 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -34,6 +34,7 @@ #include <linux/io.h> #include <linux/of.h> #include <linux/of_gpio.h> +#include <linux/hdmi.h> #include <drm/exynos_drm.h> @@ -59,19 +60,6 @@ #define HDMI_AUI_VERSION 0x01 #define HDMI_AUI_LENGTH 0x0A -/* HDMI infoframe to configure HDMI out packet header, AUI and AVI */ -enum HDMI_PACKET_TYPE { - /* refer to Table 5-8 Packet Type in HDMI specification v1.4a */ - /* InfoFrame packet type */ - HDMI_PACKET_TYPE_INFOFRAME = 0x80, - /* Vendor-Specific InfoFrame */ - HDMI_PACKET_TYPE_VSI = HDMI_PACKET_TYPE_INFOFRAME + 1, - /* Auxiliary Video information InfoFrame */ - HDMI_PACKET_TYPE_AVI = HDMI_PACKET_TYPE_INFOFRAME + 2, - /* Audio information InfoFrame */ - HDMI_PACKET_TYPE_AUI = HDMI_PACKET_TYPE_INFOFRAME + 4 -}; - enum hdmi_type { HDMI_TYPE13, HDMI_TYPE14, @@ -379,12 +367,6 @@ static const struct hdmiphy_config hdmiphy_v14_configs[] = { }, }; -struct hdmi_infoframe { - enum HDMI_PACKET_TYPE type; - u8 ver; - u8 len; -}; - static inline u32 hdmi_reg_read(struct hdmi_context *hdata, u32 reg_id) { return readl(hdata->regs + reg_id); @@ -682,7 +664,7 @@ static u8 hdmi_chksum(struct hdmi_context *hdata, } static void hdmi_reg_infoframe(struct hdmi_context *hdata, - struct hdmi_infoframe *infoframe) + union hdmi_infoframe *infoframe) { u32 hdr_sum; u8 chksum; @@ -700,13 +682,15 @@ static void hdmi_reg_infoframe(struct hdmi_context *hdata, return; } - switch (infoframe->type) { - case HDMI_PACKET_TYPE_AVI: + switch (infoframe->any.type) { + case HDMI_INFOFRAME_TYPE_AVI: hdmi_reg_writeb(hdata, HDMI_AVI_CON, HDMI_AVI_CON_EVERY_VSYNC); - hdmi_reg_writeb(hdata, HDMI_AVI_HEADER0, infoframe->type); - hdmi_reg_writeb(hdata, HDMI_AVI_HEADER1, infoframe->ver); - hdmi_reg_writeb(hdata, HDMI_AVI_HEADER2, infoframe->len); - hdr_sum = infoframe->type + infoframe->ver + infoframe->len; + hdmi_reg_writeb(hdata, HDMI_AVI_HEADER0, infoframe->any.type); + hdmi_reg_writeb(hdata, HDMI_AVI_HEADER1, + infoframe->any.version); + hdmi_reg_writeb(hdata, HDMI_AVI_HEADER2, infoframe->any.length); + hdr_sum = infoframe->any.type + infoframe->any.version + + infoframe->any.length; /* Output format zero hardcoded ,RGB YBCR selection */ hdmi_reg_writeb(hdata, HDMI_AVI_BYTE(1), 0 << 5 | @@ -722,18 +706,20 @@ static void hdmi_reg_infoframe(struct hdmi_context *hdata, hdmi_reg_writeb(hdata, HDMI_AVI_BYTE(4), vic); chksum = hdmi_chksum(hdata, HDMI_AVI_BYTE(1), - infoframe->len, hdr_sum); + infoframe->any.length, hdr_sum); DRM_DEBUG_KMS("AVI checksum = 0x%x\n", chksum); hdmi_reg_writeb(hdata, HDMI_AVI_CHECK_SUM, chksum); break; - case HDMI_PACKET_TYPE_AUI: + case HDMI_INFOFRAME_TYPE_AUDIO: hdmi_reg_writeb(hdata, HDMI_AUI_CON, 0x02); - hdmi_reg_writeb(hdata, HDMI_AUI_HEADER0, infoframe->type); - hdmi_reg_writeb(hdata, HDMI_AUI_HEADER1, infoframe->ver); - hdmi_reg_writeb(hdata, HDMI_AUI_HEADER2, infoframe->len); - hdr_sum = infoframe->type + infoframe->ver + infoframe->len; + hdmi_reg_writeb(hdata, HDMI_AUI_HEADER0, infoframe->any.type); + hdmi_reg_writeb(hdata, HDMI_AUI_HEADER1, + infoframe->any.version); + hdmi_reg_writeb(hdata, HDMI_AUI_HEADER2, infoframe->any.length); + hdr_sum = infoframe->any.type + infoframe->any.version + + infoframe->any.length; chksum = hdmi_chksum(hdata, HDMI_AUI_BYTE(1), - infoframe->len, hdr_sum); + infoframe->any.length, hdr_sum); DRM_DEBUG_KMS("AUI checksum = 0x%x\n", chksum); hdmi_reg_writeb(hdata, HDMI_AUI_CHECK_SUM, chksum); break; @@ -985,7 +971,7 @@ static void hdmi_conf_reset(struct hdmi_context *hdata) static void hdmi_conf_init(struct hdmi_context *hdata) { - struct hdmi_infoframe infoframe; + union hdmi_infoframe infoframe; /* disable HPD interrupts from HDMI IP block, use GPIO instead */ hdmi_reg_writemask(hdata, HDMI_INTC_CON, 0, HDMI_INTC_EN_GLOBAL | @@ -1021,14 +1007,14 @@ static void hdmi_conf_init(struct hdmi_context *hdata) hdmi_reg_writeb(hdata, HDMI_V13_AUI_CON, 0x02); hdmi_reg_writeb(hdata, HDMI_V13_ACR_CON, 0x04); } else { - infoframe.type = HDMI_PACKET_TYPE_AVI; - infoframe.ver = HDMI_AVI_VERSION; - infoframe.len = HDMI_AVI_LENGTH; + infoframe.any.type = HDMI_INFOFRAME_TYPE_AVI; + infoframe.any.version = HDMI_AVI_VERSION; + infoframe.any.length = HDMI_AVI_LENGTH; hdmi_reg_infoframe(hdata, &infoframe); - infoframe.type = HDMI_PACKET_TYPE_AUI; - infoframe.ver = HDMI_AUI_VERSION; - infoframe.len = HDMI_AUI_LENGTH; + infoframe.any.type = HDMI_INFOFRAME_TYPE_AUDIO; + infoframe.any.version = HDMI_AUI_VERSION; + infoframe.any.length = HDMI_AUI_LENGTH; hdmi_reg_infoframe(hdata, &infoframe); /* enable AVI packet every vsync, fixes purple line problem */ diff --git a/drivers/gpu/drm/gma500/backlight.c b/drivers/gpu/drm/gma500/backlight.c index 143eba3309c..ea7dfc59d79 100644 --- a/drivers/gpu/drm/gma500/backlight.c +++ b/drivers/gpu/drm/gma500/backlight.c @@ -26,13 +26,13 @@ #include "intel_bios.h" #include "power.h" +#ifdef CONFIG_BACKLIGHT_CLASS_DEVICE static void do_gma_backlight_set(struct drm_device *dev) { -#ifdef CONFIG_BACKLIGHT_CLASS_DEVICE struct drm_psb_private *dev_priv = dev->dev_private; backlight_update_status(dev_priv->backlight_device); -#endif } +#endif void gma_backlight_enable(struct drm_device *dev) { diff --git a/drivers/gpu/drm/gma500/opregion.c b/drivers/gpu/drm/gma500/opregion.c index ad0d6de938f..13ec6283bf5 100644 --- a/drivers/gpu/drm/gma500/opregion.c +++ b/drivers/gpu/drm/gma500/opregion.c @@ -22,7 +22,6 @@ * */ #include <linux/acpi.h> -#include <linux/acpi_io.h> #include "psb_drv.h" #include "psb_intel_reg.h" diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c index 400b0c4a10f..48af5cac190 100644 --- a/drivers/gpu/drm/i2c/tda998x_drv.c +++ b/drivers/gpu/drm/i2c/tda998x_drv.c @@ -19,6 +19,8 @@ #include <linux/hdmi.h> #include <linux/module.h> +#include <linux/irq.h> +#include <sound/asoundef.h> #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> @@ -30,6 +32,7 @@ struct tda998x_priv { struct i2c_client *cec; + struct i2c_client *hdmi; uint16_t rev; uint8_t current_page; int dpms; @@ -38,6 +41,10 @@ struct tda998x_priv { u8 vip_cntrl_1; u8 vip_cntrl_2; struct tda998x_encoder_params params; + + wait_queue_head_t wq_edid; + volatile int wq_edid_wait; + struct drm_encoder *encoder; }; #define to_tda998x_priv(x) ((struct tda998x_priv *)to_encoder_slave(x)->slave_priv) @@ -120,6 +127,8 @@ struct tda998x_priv { # define VIP_CNTRL_5_CKCASE (1 << 0) # define VIP_CNTRL_5_SP_CNT(x) (((x) & 3) << 1) #define REG_MUX_AP REG(0x00, 0x26) /* read/write */ +# define MUX_AP_SELECT_I2S 0x64 +# define MUX_AP_SELECT_SPDIF 0x40 #define REG_MUX_VP_VIP_OUT REG(0x00, 0x27) /* read/write */ #define REG_MAT_CONTRL REG(0x00, 0x80) /* write */ # define MAT_CONTRL_MAT_SC(x) (((x) & 3) << 0) @@ -197,10 +206,11 @@ struct tda998x_priv { #define REG_I2S_FORMAT REG(0x00, 0xfc) /* read/write */ # define I2S_FORMAT(x) (((x) & 3) << 0) #define REG_AIP_CLKSEL REG(0x00, 0xfd) /* write */ -# define AIP_CLKSEL_FS(x) (((x) & 3) << 0) -# define AIP_CLKSEL_CLK_POL(x) (((x) & 1) << 2) -# define AIP_CLKSEL_AIP(x) (((x) & 7) << 3) - +# define AIP_CLKSEL_AIP_SPDIF (0 << 3) +# define AIP_CLKSEL_AIP_I2S (1 << 3) +# define AIP_CLKSEL_FS_ACLK (0 << 0) +# define AIP_CLKSEL_FS_MCLK (1 << 0) +# define AIP_CLKSEL_FS_FS64SPDIF (2 << 0) /* Page 02h: PLL settings */ #define REG_PLL_SERIAL_1 REG(0x02, 0x00) /* read/write */ @@ -208,7 +218,7 @@ struct tda998x_priv { # define PLL_SERIAL_1_SRL_IZ(x) (((x) & 3) << 1) # define PLL_SERIAL_1_SRL_MAN_IZ (1 << 6) #define REG_PLL_SERIAL_2 REG(0x02, 0x01) /* read/write */ -# define PLL_SERIAL_2_SRL_NOSC(x) (((x) & 3) << 0) +# define PLL_SERIAL_2_SRL_NOSC(x) ((x) << 0) # define PLL_SERIAL_2_SRL_PR(x) (((x) & 0xf) << 4) #define REG_PLL_SERIAL_3 REG(0x02, 0x02) /* read/write */ # define PLL_SERIAL_3_SRL_CCIR (1 << 0) @@ -304,11 +314,16 @@ struct tda998x_priv { /* CEC registers: (not paged) */ +#define REG_CEC_INTSTATUS 0xee /* read */ +# define CEC_INTSTATUS_CEC (1 << 0) +# define CEC_INTSTATUS_HDMI (1 << 1) #define REG_CEC_FRO_IM_CLK_CTRL 0xfb /* read/write */ # define CEC_FRO_IM_CLK_CTRL_GHOST_DIS (1 << 7) # define CEC_FRO_IM_CLK_CTRL_ENA_OTP (1 << 6) # define CEC_FRO_IM_CLK_CTRL_IMCLK_SEL (1 << 1) # define CEC_FRO_IM_CLK_CTRL_FRO_DIV (1 << 0) +#define REG_CEC_RXSHPDINTENA 0xfc /* read/write */ +#define REG_CEC_RXSHPDINT 0xfd /* read */ #define REG_CEC_RXSHPDLEV 0xfe /* read */ # define CEC_RXSHPDLEV_RXSENS (1 << 0) # define CEC_RXSHPDLEV_HPD (1 << 1) @@ -328,21 +343,21 @@ struct tda998x_priv { #define TDA19988 0x0301 static void -cec_write(struct drm_encoder *encoder, uint16_t addr, uint8_t val) +cec_write(struct tda998x_priv *priv, uint16_t addr, uint8_t val) { - struct i2c_client *client = to_tda998x_priv(encoder)->cec; + struct i2c_client *client = priv->cec; uint8_t buf[] = {addr, val}; int ret; - ret = i2c_master_send(client, buf, ARRAY_SIZE(buf)); + ret = i2c_master_send(client, buf, sizeof(buf)); if (ret < 0) dev_err(&client->dev, "Error %d writing to cec:0x%x\n", ret, addr); } static uint8_t -cec_read(struct drm_encoder *encoder, uint8_t addr) +cec_read(struct tda998x_priv *priv, uint8_t addr) { - struct i2c_client *client = to_tda998x_priv(encoder)->cec; + struct i2c_client *client = priv->cec; uint8_t val; int ret; @@ -361,32 +376,36 @@ fail: return 0; } -static void -set_page(struct drm_encoder *encoder, uint16_t reg) +static int +set_page(struct tda998x_priv *priv, uint16_t reg) { - struct tda998x_priv *priv = to_tda998x_priv(encoder); - if (REG2PAGE(reg) != priv->current_page) { - struct i2c_client *client = drm_i2c_encoder_get_client(encoder); + struct i2c_client *client = priv->hdmi; uint8_t buf[] = { REG_CURPAGE, REG2PAGE(reg) }; int ret = i2c_master_send(client, buf, sizeof(buf)); - if (ret < 0) - dev_err(&client->dev, "Error %d writing to REG_CURPAGE\n", ret); + if (ret < 0) { + dev_err(&client->dev, "setpage %04x err %d\n", + reg, ret); + return ret; + } priv->current_page = REG2PAGE(reg); } + return 0; } static int -reg_read_range(struct drm_encoder *encoder, uint16_t reg, char *buf, int cnt) +reg_read_range(struct tda998x_priv *priv, uint16_t reg, char *buf, int cnt) { - struct i2c_client *client = drm_i2c_encoder_get_client(encoder); + struct i2c_client *client = priv->hdmi; uint8_t addr = REG2ADDR(reg); int ret; - set_page(encoder, reg); + ret = set_page(priv, reg); + if (ret < 0) + return ret; ret = i2c_master_send(client, &addr, sizeof(addr)); if (ret < 0) @@ -404,100 +423,147 @@ fail: } static void -reg_write_range(struct drm_encoder *encoder, uint16_t reg, uint8_t *p, int cnt) +reg_write_range(struct tda998x_priv *priv, uint16_t reg, uint8_t *p, int cnt) { - struct i2c_client *client = drm_i2c_encoder_get_client(encoder); + struct i2c_client *client = priv->hdmi; uint8_t buf[cnt+1]; int ret; buf[0] = REG2ADDR(reg); memcpy(&buf[1], p, cnt); - set_page(encoder, reg); + ret = set_page(priv, reg); + if (ret < 0) + return; ret = i2c_master_send(client, buf, cnt + 1); if (ret < 0) dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg); } -static uint8_t -reg_read(struct drm_encoder *encoder, uint16_t reg) +static int +reg_read(struct tda998x_priv *priv, uint16_t reg) { uint8_t val = 0; - reg_read_range(encoder, reg, &val, sizeof(val)); + int ret; + + ret = reg_read_range(priv, reg, &val, sizeof(val)); + if (ret < 0) + return ret; return val; } static void -reg_write(struct drm_encoder *encoder, uint16_t reg, uint8_t val) +reg_write(struct tda998x_priv *priv, uint16_t reg, uint8_t val) { - struct i2c_client *client = drm_i2c_encoder_get_client(encoder); + struct i2c_client *client = priv->hdmi; uint8_t buf[] = {REG2ADDR(reg), val}; int ret; - set_page(encoder, reg); + ret = set_page(priv, reg); + if (ret < 0) + return; - ret = i2c_master_send(client, buf, ARRAY_SIZE(buf)); + ret = i2c_master_send(client, buf, sizeof(buf)); if (ret < 0) dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg); } static void -reg_write16(struct drm_encoder *encoder, uint16_t reg, uint16_t val) +reg_write16(struct tda998x_priv *priv, uint16_t reg, uint16_t val) { - struct i2c_client *client = drm_i2c_encoder_get_client(encoder); + struct i2c_client *client = priv->hdmi; uint8_t buf[] = {REG2ADDR(reg), val >> 8, val}; int ret; - set_page(encoder, reg); + ret = set_page(priv, reg); + if (ret < 0) + return; - ret = i2c_master_send(client, buf, ARRAY_SIZE(buf)); + ret = i2c_master_send(client, buf, sizeof(buf)); if (ret < 0) dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg); } static void -reg_set(struct drm_encoder *encoder, uint16_t reg, uint8_t val) +reg_set(struct tda998x_priv *priv, uint16_t reg, uint8_t val) { - reg_write(encoder, reg, reg_read(encoder, reg) | val); + int old_val; + + old_val = reg_read(priv, reg); + if (old_val >= 0) + reg_write(priv, reg, old_val | val); } static void -reg_clear(struct drm_encoder *encoder, uint16_t reg, uint8_t val) +reg_clear(struct tda998x_priv *priv, uint16_t reg, uint8_t val) { - reg_write(encoder, reg, reg_read(encoder, reg) & ~val); + int old_val; + + old_val = reg_read(priv, reg); + if (old_val >= 0) + reg_write(priv, reg, old_val & ~val); } static void -tda998x_reset(struct drm_encoder *encoder) +tda998x_reset(struct tda998x_priv *priv) { /* reset audio and i2c master: */ - reg_set(encoder, REG_SOFTRESET, SOFTRESET_AUDIO | SOFTRESET_I2C_MASTER); + reg_write(priv, REG_SOFTRESET, SOFTRESET_AUDIO | SOFTRESET_I2C_MASTER); msleep(50); - reg_clear(encoder, REG_SOFTRESET, SOFTRESET_AUDIO | SOFTRESET_I2C_MASTER); + reg_write(priv, REG_SOFTRESET, 0); msleep(50); /* reset transmitter: */ - reg_set(encoder, REG_MAIN_CNTRL0, MAIN_CNTRL0_SR); - reg_clear(encoder, REG_MAIN_CNTRL0, MAIN_CNTRL0_SR); + reg_set(priv, REG_MAIN_CNTRL0, MAIN_CNTRL0_SR); + reg_clear(priv, REG_MAIN_CNTRL0, MAIN_CNTRL0_SR); /* PLL registers common configuration */ - reg_write(encoder, REG_PLL_SERIAL_1, 0x00); - reg_write(encoder, REG_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(1)); - reg_write(encoder, REG_PLL_SERIAL_3, 0x00); - reg_write(encoder, REG_SERIALIZER, 0x00); - reg_write(encoder, REG_BUFFER_OUT, 0x00); - reg_write(encoder, REG_PLL_SCG1, 0x00); - reg_write(encoder, REG_AUDIO_DIV, AUDIO_DIV_SERCLK_8); - reg_write(encoder, REG_SEL_CLK, SEL_CLK_SEL_CLK1 | SEL_CLK_ENA_SC_CLK); - reg_write(encoder, REG_PLL_SCGN1, 0xfa); - reg_write(encoder, REG_PLL_SCGN2, 0x00); - reg_write(encoder, REG_PLL_SCGR1, 0x5b); - reg_write(encoder, REG_PLL_SCGR2, 0x00); - reg_write(encoder, REG_PLL_SCG2, 0x10); + reg_write(priv, REG_PLL_SERIAL_1, 0x00); + reg_write(priv, REG_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(1)); + reg_write(priv, REG_PLL_SERIAL_3, 0x00); + reg_write(priv, REG_SERIALIZER, 0x00); + reg_write(priv, REG_BUFFER_OUT, 0x00); + reg_write(priv, REG_PLL_SCG1, 0x00); + reg_write(priv, REG_AUDIO_DIV, AUDIO_DIV_SERCLK_8); + reg_write(priv, REG_SEL_CLK, SEL_CLK_SEL_CLK1 | SEL_CLK_ENA_SC_CLK); + reg_write(priv, REG_PLL_SCGN1, 0xfa); + reg_write(priv, REG_PLL_SCGN2, 0x00); + reg_write(priv, REG_PLL_SCGR1, 0x5b); + reg_write(priv, REG_PLL_SCGR2, 0x00); + reg_write(priv, REG_PLL_SCG2, 0x10); /* Write the default value MUX register */ - reg_write(encoder, REG_MUX_VP_VIP_OUT, 0x24); + reg_write(priv, REG_MUX_VP_VIP_OUT, 0x24); +} + +/* + * only 2 interrupts may occur: screen plug/unplug and EDID read + */ +static irqreturn_t tda998x_irq_thread(int irq, void *data) +{ + struct tda998x_priv *priv = data; + u8 sta, cec, lvl, flag0, flag1, flag2; + + if (!priv) + return IRQ_HANDLED; + sta = cec_read(priv, REG_CEC_INTSTATUS); + cec = cec_read(priv, REG_CEC_RXSHPDINT); + lvl = cec_read(priv, REG_CEC_RXSHPDLEV); + flag0 = reg_read(priv, REG_INT_FLAGS_0); + flag1 = reg_read(priv, REG_INT_FLAGS_1); + flag2 = reg_read(priv, REG_INT_FLAGS_2); + DRM_DEBUG_DRIVER( + "tda irq sta %02x cec %02x lvl %02x f0 %02x f1 %02x f2 %02x\n", + sta, cec, lvl, flag0, flag1, flag2); + if ((flag2 & INT_FLAGS_2_EDID_BLK_RD) && priv->wq_edid_wait) { + priv->wq_edid_wait = 0; + wake_up(&priv->wq_edid); + } else if (cec != 0) { /* HPD change */ + if (priv->encoder && priv->encoder->dev) + drm_helper_hpd_irq_event(priv->encoder->dev); + } + return IRQ_HANDLED; } static uint8_t tda998x_cksum(uint8_t *buf, size_t bytes) @@ -513,91 +579,88 @@ static uint8_t tda998x_cksum(uint8_t *buf, size_t bytes) #define PB(x) (HB(2) + 1 + (x)) static void -tda998x_write_if(struct drm_encoder *encoder, uint8_t bit, uint16_t addr, +tda998x_write_if(struct tda998x_priv *priv, uint8_t bit, uint16_t addr, uint8_t *buf, size_t size) { buf[PB(0)] = tda998x_cksum(buf, size); - reg_clear(encoder, REG_DIP_IF_FLAGS, bit); - reg_write_range(encoder, addr, buf, size); - reg_set(encoder, REG_DIP_IF_FLAGS, bit); + reg_clear(priv, REG_DIP_IF_FLAGS, bit); + reg_write_range(priv, addr, buf, size); + reg_set(priv, REG_DIP_IF_FLAGS, bit); } static void -tda998x_write_aif(struct drm_encoder *encoder, struct tda998x_encoder_params *p) +tda998x_write_aif(struct tda998x_priv *priv, struct tda998x_encoder_params *p) { - uint8_t buf[PB(5) + 1]; + u8 buf[PB(HDMI_AUDIO_INFOFRAME_SIZE) + 1]; - buf[HB(0)] = 0x84; + memset(buf, 0, sizeof(buf)); + buf[HB(0)] = HDMI_INFOFRAME_TYPE_AUDIO; buf[HB(1)] = 0x01; - buf[HB(2)] = 10; - buf[PB(0)] = 0; + buf[HB(2)] = HDMI_AUDIO_INFOFRAME_SIZE; buf[PB(1)] = p->audio_frame[1] & 0x07; /* CC */ buf[PB(2)] = p->audio_frame[2] & 0x1c; /* SF */ buf[PB(4)] = p->audio_frame[4]; buf[PB(5)] = p->audio_frame[5] & 0xf8; /* DM_INH + LSV */ - tda998x_write_if(encoder, DIP_IF_FLAGS_IF4, REG_IF4_HB0, buf, + tda998x_write_if(priv, DIP_IF_FLAGS_IF4, REG_IF4_HB0, buf, sizeof(buf)); } static void -tda998x_write_avi(struct drm_encoder *encoder, struct drm_display_mode *mode) +tda998x_write_avi(struct tda998x_priv *priv, struct drm_display_mode *mode) { - uint8_t buf[PB(13) + 1]; + u8 buf[PB(HDMI_AVI_INFOFRAME_SIZE) + 1]; memset(buf, 0, sizeof(buf)); - buf[HB(0)] = 0x82; + buf[HB(0)] = HDMI_INFOFRAME_TYPE_AVI; buf[HB(1)] = 0x02; - buf[HB(2)] = 13; + buf[HB(2)] = HDMI_AVI_INFOFRAME_SIZE; buf[PB(1)] = HDMI_SCAN_MODE_UNDERSCAN; + buf[PB(2)] = HDMI_ACTIVE_ASPECT_PICTURE; buf[PB(3)] = HDMI_QUANTIZATION_RANGE_FULL << 2; buf[PB(4)] = drm_match_cea_mode(mode); - tda998x_write_if(encoder, DIP_IF_FLAGS_IF2, REG_IF2_HB0, buf, + tda998x_write_if(priv, DIP_IF_FLAGS_IF2, REG_IF2_HB0, buf, sizeof(buf)); } -static void tda998x_audio_mute(struct drm_encoder *encoder, bool on) +static void tda998x_audio_mute(struct tda998x_priv *priv, bool on) { if (on) { - reg_set(encoder, REG_SOFTRESET, SOFTRESET_AUDIO); - reg_clear(encoder, REG_SOFTRESET, SOFTRESET_AUDIO); - reg_set(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO); + reg_set(priv, REG_SOFTRESET, SOFTRESET_AUDIO); + reg_clear(priv, REG_SOFTRESET, SOFTRESET_AUDIO); + reg_set(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO); } else { - reg_clear(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO); + reg_clear(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO); } } static void -tda998x_configure_audio(struct drm_encoder *encoder, +tda998x_configure_audio(struct tda998x_priv *priv, struct drm_display_mode *mode, struct tda998x_encoder_params *p) { - uint8_t buf[6], clksel_aip, clksel_fs, ca_i2s, cts_n, adiv; + uint8_t buf[6], clksel_aip, clksel_fs, cts_n, adiv; uint32_t n; /* Enable audio ports */ - reg_write(encoder, REG_ENA_AP, p->audio_cfg); - reg_write(encoder, REG_ENA_ACLK, p->audio_clk_cfg); + reg_write(priv, REG_ENA_AP, p->audio_cfg); + reg_write(priv, REG_ENA_ACLK, p->audio_clk_cfg); /* Set audio input source */ switch (p->audio_format) { case AFMT_SPDIF: - reg_write(encoder, REG_MUX_AP, 0x40); - clksel_aip = AIP_CLKSEL_AIP(0); - /* FS64SPDIF */ - clksel_fs = AIP_CLKSEL_FS(2); + reg_write(priv, REG_MUX_AP, MUX_AP_SELECT_SPDIF); + clksel_aip = AIP_CLKSEL_AIP_SPDIF; + clksel_fs = AIP_CLKSEL_FS_FS64SPDIF; cts_n = CTS_N_M(3) | CTS_N_K(3); - ca_i2s = 0; break; case AFMT_I2S: - reg_write(encoder, REG_MUX_AP, 0x64); - clksel_aip = AIP_CLKSEL_AIP(1); - /* ACLK */ - clksel_fs = AIP_CLKSEL_FS(0); + reg_write(priv, REG_MUX_AP, MUX_AP_SELECT_I2S); + clksel_aip = AIP_CLKSEL_AIP_I2S; + clksel_fs = AIP_CLKSEL_FS_ACLK; cts_n = CTS_N_M(3) | CTS_N_K(3); - ca_i2s = CA_I2S_CA_I2S(0); break; default: @@ -605,12 +668,10 @@ tda998x_configure_audio(struct drm_encoder *encoder, return; } - reg_write(encoder, REG_AIP_CLKSEL, clksel_aip); - reg_clear(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_LAYOUT); - - /* Enable automatic CTS generation */ - reg_clear(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_ACR_MAN); - reg_write(encoder, REG_CTS_N, cts_n); + reg_write(priv, REG_AIP_CLKSEL, clksel_aip); + reg_clear(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_LAYOUT | + AIP_CNTRL_0_ACR_MAN); /* auto CTS */ + reg_write(priv, REG_CTS_N, cts_n); /* * Audio input somehow depends on HDMI line rate which is @@ -619,11 +680,15 @@ tda998x_configure_audio(struct drm_encoder *encoder, * There is no detailed info in the datasheet, so we just * assume 100MHz requires larger divider. */ + adiv = AUDIO_DIV_SERCLK_8; if (mode->clock > 100000) - adiv = AUDIO_DIV_SERCLK_16; - else - adiv = AUDIO_DIV_SERCLK_8; - reg_write(encoder, REG_AUDIO_DIV, adiv); + adiv++; /* AUDIO_DIV_SERCLK_16 */ + + /* S/PDIF asks for a larger divider */ + if (p->audio_format == AFMT_SPDIF) + adiv++; /* AUDIO_DIV_SERCLK_16 or _32 */ + + reg_write(priv, REG_AUDIO_DIV, adiv); /* * This is the approximate value of N, which happens to be @@ -638,28 +703,29 @@ tda998x_configure_audio(struct drm_encoder *encoder, buf[3] = n; buf[4] = n >> 8; buf[5] = n >> 16; - reg_write_range(encoder, REG_ACR_CTS_0, buf, 6); + reg_write_range(priv, REG_ACR_CTS_0, buf, 6); /* Set CTS clock reference */ - reg_write(encoder, REG_AIP_CLKSEL, clksel_aip | clksel_fs); + reg_write(priv, REG_AIP_CLKSEL, clksel_aip | clksel_fs); /* Reset CTS generator */ - reg_set(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_CTS); - reg_clear(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_CTS); + reg_set(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_CTS); + reg_clear(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_CTS); /* Write the channel status */ - buf[0] = 0x04; + buf[0] = IEC958_AES0_CON_NOT_COPYRIGHT; buf[1] = 0x00; - buf[2] = 0x00; - buf[3] = 0xf1; - reg_write_range(encoder, REG_CH_STAT_B(0), buf, 4); + buf[2] = IEC958_AES3_CON_FS_NOTID; + buf[3] = IEC958_AES4_CON_ORIGFS_NOTID | + IEC958_AES4_CON_MAX_WORDLEN_24; + reg_write_range(priv, REG_CH_STAT_B(0), buf, 4); - tda998x_audio_mute(encoder, true); - mdelay(20); - tda998x_audio_mute(encoder, false); + tda998x_audio_mute(priv, true); + msleep(20); + tda998x_audio_mute(priv, false); /* Write the audio information packet */ - tda998x_write_aif(encoder, p); + tda998x_write_aif(priv, p); } /* DRM encoder functions */ @@ -701,19 +767,19 @@ tda998x_encoder_dpms(struct drm_encoder *encoder, int mode) switch (mode) { case DRM_MODE_DPMS_ON: /* enable video ports, audio will be enabled later */ - reg_write(encoder, REG_ENA_VP_0, 0xff); - reg_write(encoder, REG_ENA_VP_1, 0xff); - reg_write(encoder, REG_ENA_VP_2, 0xff); + reg_write(priv, REG_ENA_VP_0, 0xff); + reg_write(priv, REG_ENA_VP_1, 0xff); + reg_write(priv, REG_ENA_VP_2, 0xff); /* set muxing after enabling ports: */ - reg_write(encoder, REG_VIP_CNTRL_0, priv->vip_cntrl_0); - reg_write(encoder, REG_VIP_CNTRL_1, priv->vip_cntrl_1); - reg_write(encoder, REG_VIP_CNTRL_2, priv->vip_cntrl_2); + reg_write(priv, REG_VIP_CNTRL_0, priv->vip_cntrl_0); + reg_write(priv, REG_VIP_CNTRL_1, priv->vip_cntrl_1); + reg_write(priv, REG_VIP_CNTRL_2, priv->vip_cntrl_2); break; case DRM_MODE_DPMS_OFF: /* disable video ports */ - reg_write(encoder, REG_ENA_VP_0, 0x00); - reg_write(encoder, REG_ENA_VP_1, 0x00); - reg_write(encoder, REG_ENA_VP_2, 0x00); + reg_write(priv, REG_ENA_VP_0, 0x00); + reg_write(priv, REG_ENA_VP_1, 0x00); + reg_write(priv, REG_ENA_VP_2, 0x00); break; } @@ -824,112 +890,117 @@ tda998x_encoder_mode_set(struct drm_encoder *encoder, } div = 148500 / mode->clock; + if (div != 0) { + div--; + if (div > 3) + div = 3; + } /* mute the audio FIFO: */ - reg_set(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO); + reg_set(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO); /* set HDMI HDCP mode off: */ - reg_set(encoder, REG_TBG_CNTRL_1, TBG_CNTRL_1_DWIN_DIS); - reg_clear(encoder, REG_TX33, TX33_HDMI); + reg_write(priv, REG_TBG_CNTRL_1, TBG_CNTRL_1_DWIN_DIS); + reg_clear(priv, REG_TX33, TX33_HDMI); + reg_write(priv, REG_ENC_CNTRL, ENC_CNTRL_CTL_CODE(0)); - reg_write(encoder, REG_ENC_CNTRL, ENC_CNTRL_CTL_CODE(0)); /* no pre-filter or interpolator: */ - reg_write(encoder, REG_HVF_CNTRL_0, HVF_CNTRL_0_PREFIL(0) | + reg_write(priv, REG_HVF_CNTRL_0, HVF_CNTRL_0_PREFIL(0) | HVF_CNTRL_0_INTPOL(0)); - reg_write(encoder, REG_VIP_CNTRL_5, VIP_CNTRL_5_SP_CNT(0)); - reg_write(encoder, REG_VIP_CNTRL_4, VIP_CNTRL_4_BLANKIT(0) | + reg_write(priv, REG_VIP_CNTRL_5, VIP_CNTRL_5_SP_CNT(0)); + reg_write(priv, REG_VIP_CNTRL_4, VIP_CNTRL_4_BLANKIT(0) | VIP_CNTRL_4_BLC(0)); - reg_clear(encoder, REG_PLL_SERIAL_3, PLL_SERIAL_3_SRL_CCIR); - reg_clear(encoder, REG_PLL_SERIAL_1, PLL_SERIAL_1_SRL_MAN_IZ); - reg_clear(encoder, REG_PLL_SERIAL_3, PLL_SERIAL_3_SRL_DE); - reg_write(encoder, REG_SERIALIZER, 0); - reg_write(encoder, REG_HVF_CNTRL_1, HVF_CNTRL_1_VQR(0)); + reg_clear(priv, REG_PLL_SERIAL_1, PLL_SERIAL_1_SRL_MAN_IZ); + reg_clear(priv, REG_PLL_SERIAL_3, PLL_SERIAL_3_SRL_CCIR | + PLL_SERIAL_3_SRL_DE); + reg_write(priv, REG_SERIALIZER, 0); + reg_write(priv, REG_HVF_CNTRL_1, HVF_CNTRL_1_VQR(0)); /* TODO enable pixel repeat for pixel rates less than 25Msamp/s */ rep = 0; - reg_write(encoder, REG_RPT_CNTRL, 0); - reg_write(encoder, REG_SEL_CLK, SEL_CLK_SEL_VRF_CLK(0) | + reg_write(priv, REG_RPT_CNTRL, 0); + reg_write(priv, REG_SEL_CLK, SEL_CLK_SEL_VRF_CLK(0) | SEL_CLK_SEL_CLK1 | SEL_CLK_ENA_SC_CLK); - reg_write(encoder, REG_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(div) | + reg_write(priv, REG_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(div) | PLL_SERIAL_2_SRL_PR(rep)); /* set color matrix bypass flag: */ - reg_set(encoder, REG_MAT_CONTRL, MAT_CONTRL_MAT_BP); + reg_write(priv, REG_MAT_CONTRL, MAT_CONTRL_MAT_BP | + MAT_CONTRL_MAT_SC(1)); /* set BIAS tmds value: */ - reg_write(encoder, REG_ANA_GENERAL, 0x09); - - reg_clear(encoder, REG_TBG_CNTRL_0, TBG_CNTRL_0_SYNC_MTHD); + reg_write(priv, REG_ANA_GENERAL, 0x09); /* * Sync on rising HSYNC/VSYNC */ - reg_write(encoder, REG_VIP_CNTRL_3, 0); - reg_set(encoder, REG_VIP_CNTRL_3, VIP_CNTRL_3_SYNC_HS); + reg = VIP_CNTRL_3_SYNC_HS; /* * TDA19988 requires high-active sync at input stage, * so invert low-active sync provided by master encoder here */ if (mode->flags & DRM_MODE_FLAG_NHSYNC) - reg_set(encoder, REG_VIP_CNTRL_3, VIP_CNTRL_3_H_TGL); + reg |= VIP_CNTRL_3_H_TGL; if (mode->flags & DRM_MODE_FLAG_NVSYNC) - reg_set(encoder, REG_VIP_CNTRL_3, VIP_CNTRL_3_V_TGL); + reg |= VIP_CNTRL_3_V_TGL; + reg_write(priv, REG_VIP_CNTRL_3, reg); + + reg_write(priv, REG_VIDFORMAT, 0x00); + reg_write16(priv, REG_REFPIX_MSB, ref_pix); + reg_write16(priv, REG_REFLINE_MSB, ref_line); + reg_write16(priv, REG_NPIX_MSB, n_pix); + reg_write16(priv, REG_NLINE_MSB, n_line); + reg_write16(priv, REG_VS_LINE_STRT_1_MSB, vs1_line_s); + reg_write16(priv, REG_VS_PIX_STRT_1_MSB, vs1_pix_s); + reg_write16(priv, REG_VS_LINE_END_1_MSB, vs1_line_e); + reg_write16(priv, REG_VS_PIX_END_1_MSB, vs1_pix_e); + reg_write16(priv, REG_VS_LINE_STRT_2_MSB, vs2_line_s); + reg_write16(priv, REG_VS_PIX_STRT_2_MSB, vs2_pix_s); + reg_write16(priv, REG_VS_LINE_END_2_MSB, vs2_line_e); + reg_write16(priv, REG_VS_PIX_END_2_MSB, vs2_pix_e); + reg_write16(priv, REG_HS_PIX_START_MSB, hs_pix_s); + reg_write16(priv, REG_HS_PIX_STOP_MSB, hs_pix_e); + reg_write16(priv, REG_VWIN_START_1_MSB, vwin1_line_s); + reg_write16(priv, REG_VWIN_END_1_MSB, vwin1_line_e); + reg_write16(priv, REG_VWIN_START_2_MSB, vwin2_line_s); + reg_write16(priv, REG_VWIN_END_2_MSB, vwin2_line_e); + reg_write16(priv, REG_DE_START_MSB, de_pix_s); + reg_write16(priv, REG_DE_STOP_MSB, de_pix_e); + + if (priv->rev == TDA19988) { + /* let incoming pixels fill the active space (if any) */ + reg_write(priv, REG_ENABLE_SPACE, 0x00); + } /* * Always generate sync polarity relative to input sync and * revert input stage toggled sync at output stage */ - reg = TBG_CNTRL_1_TGL_EN; + reg = TBG_CNTRL_1_DWIN_DIS | TBG_CNTRL_1_TGL_EN; if (mode->flags & DRM_MODE_FLAG_NHSYNC) reg |= TBG_CNTRL_1_H_TGL; if (mode->flags & DRM_MODE_FLAG_NVSYNC) reg |= TBG_CNTRL_1_V_TGL; - reg_write(encoder, REG_TBG_CNTRL_1, reg); - - reg_write(encoder, REG_VIDFORMAT, 0x00); - reg_write16(encoder, REG_REFPIX_MSB, ref_pix); - reg_write16(encoder, REG_REFLINE_MSB, ref_line); - reg_write16(encoder, REG_NPIX_MSB, n_pix); - reg_write16(encoder, REG_NLINE_MSB, n_line); - reg_write16(encoder, REG_VS_LINE_STRT_1_MSB, vs1_line_s); - reg_write16(encoder, REG_VS_PIX_STRT_1_MSB, vs1_pix_s); - reg_write16(encoder, REG_VS_LINE_END_1_MSB, vs1_line_e); - reg_write16(encoder, REG_VS_PIX_END_1_MSB, vs1_pix_e); - reg_write16(encoder, REG_VS_LINE_STRT_2_MSB, vs2_line_s); - reg_write16(encoder, REG_VS_PIX_STRT_2_MSB, vs2_pix_s); - reg_write16(encoder, REG_VS_LINE_END_2_MSB, vs2_line_e); - reg_write16(encoder, REG_VS_PIX_END_2_MSB, vs2_pix_e); - reg_write16(encoder, REG_HS_PIX_START_MSB, hs_pix_s); - reg_write16(encoder, REG_HS_PIX_STOP_MSB, hs_pix_e); - reg_write16(encoder, REG_VWIN_START_1_MSB, vwin1_line_s); - reg_write16(encoder, REG_VWIN_END_1_MSB, vwin1_line_e); - reg_write16(encoder, REG_VWIN_START_2_MSB, vwin2_line_s); - reg_write16(encoder, REG_VWIN_END_2_MSB, vwin2_line_e); - reg_write16(encoder, REG_DE_START_MSB, de_pix_s); - reg_write16(encoder, REG_DE_STOP_MSB, de_pix_e); - - if (priv->rev == TDA19988) { - /* let incoming pixels fill the active space (if any) */ - reg_write(encoder, REG_ENABLE_SPACE, 0x01); - } + reg_write(priv, REG_TBG_CNTRL_1, reg); /* must be last register set: */ - reg_clear(encoder, REG_TBG_CNTRL_0, TBG_CNTRL_0_SYNC_ONCE); + reg_write(priv, REG_TBG_CNTRL_0, 0); /* Only setup the info frames if the sink is HDMI */ if (priv->is_hdmi_sink) { /* We need to turn HDMI HDCP stuff on to get audio through */ - reg_clear(encoder, REG_TBG_CNTRL_1, TBG_CNTRL_1_DWIN_DIS); - reg_write(encoder, REG_ENC_CNTRL, ENC_CNTRL_CTL_CODE(1)); - reg_set(encoder, REG_TX33, TX33_HDMI); + reg &= ~TBG_CNTRL_1_DWIN_DIS; + reg_write(priv, REG_TBG_CNTRL_1, reg); + reg_write(priv, REG_ENC_CNTRL, ENC_CNTRL_CTL_CODE(1)); + reg_set(priv, REG_TX33, TX33_HDMI); - tda998x_write_avi(encoder, adjusted_mode); + tda998x_write_avi(priv, adjusted_mode); if (priv->params.audio_cfg) - tda998x_configure_audio(encoder, adjusted_mode, + tda998x_configure_audio(priv, adjusted_mode, &priv->params); } } @@ -938,7 +1009,9 @@ static enum drm_connector_status tda998x_encoder_detect(struct drm_encoder *encoder, struct drm_connector *connector) { - uint8_t val = cec_read(encoder, REG_CEC_RXSHPDLEV); + struct tda998x_priv *priv = to_tda998x_priv(encoder); + uint8_t val = cec_read(priv, REG_CEC_RXSHPDLEV); + return (val & CEC_RXSHPDLEV_HPD) ? connector_status_connected : connector_status_disconnected; } @@ -946,46 +1019,57 @@ tda998x_encoder_detect(struct drm_encoder *encoder, static int read_edid_block(struct drm_encoder *encoder, uint8_t *buf, int blk) { + struct tda998x_priv *priv = to_tda998x_priv(encoder); uint8_t offset, segptr; int ret, i; - /* enable EDID read irq: */ - reg_set(encoder, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD); - offset = (blk & 1) ? 128 : 0; segptr = blk / 2; - reg_write(encoder, REG_DDC_ADDR, 0xa0); - reg_write(encoder, REG_DDC_OFFS, offset); - reg_write(encoder, REG_DDC_SEGM_ADDR, 0x60); - reg_write(encoder, REG_DDC_SEGM, segptr); + reg_write(priv, REG_DDC_ADDR, 0xa0); + reg_write(priv, REG_DDC_OFFS, offset); + reg_write(priv, REG_DDC_SEGM_ADDR, 0x60); + reg_write(priv, REG_DDC_SEGM, segptr); /* enable reading EDID: */ - reg_write(encoder, REG_EDID_CTRL, 0x1); + priv->wq_edid_wait = 1; + reg_write(priv, REG_EDID_CTRL, 0x1); /* flag must be cleared by sw: */ - reg_write(encoder, REG_EDID_CTRL, 0x0); + reg_write(priv, REG_EDID_CTRL, 0x0); /* wait for block read to complete: */ - for (i = 100; i > 0; i--) { - uint8_t val = reg_read(encoder, REG_INT_FLAGS_2); - if (val & INT_FLAGS_2_EDID_BLK_RD) - break; - msleep(1); + if (priv->hdmi->irq) { + i = wait_event_timeout(priv->wq_edid, + !priv->wq_edid_wait, + msecs_to_jiffies(100)); + if (i < 0) { + dev_err(&priv->hdmi->dev, "read edid wait err %d\n", i); + return i; + } + } else { + for (i = 10; i > 0; i--) { + msleep(10); + ret = reg_read(priv, REG_INT_FLAGS_2); + if (ret < 0) + return ret; + if (ret & INT_FLAGS_2_EDID_BLK_RD) + break; + } } - if (i == 0) + if (i == 0) { + dev_err(&priv->hdmi->dev, "read edid timeout\n"); return -ETIMEDOUT; + } - ret = reg_read_range(encoder, REG_EDID_DATA_0, buf, EDID_LENGTH); + ret = reg_read_range(priv, REG_EDID_DATA_0, buf, EDID_LENGTH); if (ret != EDID_LENGTH) { - dev_err(encoder->dev->dev, "failed to read edid block %d: %d", - blk, ret); + dev_err(&priv->hdmi->dev, "failed to read edid block %d: %d\n", + blk, ret); return ret; } - reg_clear(encoder, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD); - return 0; } @@ -993,7 +1077,7 @@ static uint8_t * do_get_edid(struct drm_encoder *encoder) { struct tda998x_priv *priv = to_tda998x_priv(encoder); - int j = 0, valid_extensions = 0; + int j, valid_extensions = 0; uint8_t *block, *new; bool print_bad_edid = drm_debug & DRM_UT_KMS; @@ -1001,7 +1085,7 @@ do_get_edid(struct drm_encoder *encoder) return NULL; if (priv->rev == TDA19988) - reg_clear(encoder, REG_TX4, TX4_PD_RAM); + reg_clear(priv, REG_TX4, TX4_PD_RAM); /* base block fetch */ if (read_edid_block(encoder, block, 0)) @@ -1041,14 +1125,14 @@ do_get_edid(struct drm_encoder *encoder) done: if (priv->rev == TDA19988) - reg_set(encoder, REG_TX4, TX4_PD_RAM); + reg_set(priv, REG_TX4, TX4_PD_RAM); return block; fail: if (priv->rev == TDA19988) - reg_set(encoder, REG_TX4, TX4_PD_RAM); - dev_warn(encoder->dev->dev, "failed to read EDID\n"); + reg_set(priv, REG_TX4, TX4_PD_RAM); + dev_warn(&priv->hdmi->dev, "failed to read EDID\n"); kfree(block); return NULL; } @@ -1075,7 +1159,13 @@ static int tda998x_encoder_create_resources(struct drm_encoder *encoder, struct drm_connector *connector) { - DBG(""); + struct tda998x_priv *priv = to_tda998x_priv(encoder); + + if (priv->hdmi->irq) + connector->polled = DRM_CONNECTOR_POLL_HPD; + else + connector->polled = DRM_CONNECTOR_POLL_CONNECT | + DRM_CONNECTOR_POLL_DISCONNECT; return 0; } @@ -1094,6 +1184,15 @@ tda998x_encoder_destroy(struct drm_encoder *encoder) { struct tda998x_priv *priv = to_tda998x_priv(encoder); drm_i2c_encoder_destroy(encoder); + + /* disable all IRQs and free the IRQ handler */ + cec_write(priv, REG_CEC_RXSHPDINTENA, 0); + reg_clear(priv, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD); + if (priv->hdmi->irq) + free_irq(priv->hdmi->irq, priv); + + if (priv->cec) + i2c_unregister_device(priv->cec); kfree(priv); } @@ -1131,8 +1230,10 @@ tda998x_encoder_init(struct i2c_client *client, struct drm_device *dev, struct drm_encoder_slave *encoder_slave) { - struct drm_encoder *encoder = &encoder_slave->base; struct tda998x_priv *priv; + struct device_node *np = client->dev.of_node; + u32 video; + int rev_lo, rev_hi, ret; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) @@ -1142,49 +1243,114 @@ tda998x_encoder_init(struct i2c_client *client, priv->vip_cntrl_1 = VIP_CNTRL_1_SWAP_C(0) | VIP_CNTRL_1_SWAP_D(1); priv->vip_cntrl_2 = VIP_CNTRL_2_SWAP_E(4) | VIP_CNTRL_2_SWAP_F(5); - priv->current_page = 0; + priv->current_page = 0xff; + priv->hdmi = client; priv->cec = i2c_new_dummy(client->adapter, 0x34); + if (!priv->cec) { + kfree(priv); + return -ENODEV; + } + + priv->encoder = &encoder_slave->base; priv->dpms = DRM_MODE_DPMS_OFF; encoder_slave->slave_priv = priv; encoder_slave->slave_funcs = &tda998x_encoder_funcs; /* wake up the device: */ - cec_write(encoder, REG_CEC_ENAMODS, + cec_write(priv, REG_CEC_ENAMODS, CEC_ENAMODS_EN_RXSENS | CEC_ENAMODS_EN_HDMI); - tda998x_reset(encoder); + tda998x_reset(priv); /* read version: */ - priv->rev = reg_read(encoder, REG_VERSION_LSB) | - reg_read(encoder, REG_VERSION_MSB) << 8; + rev_lo = reg_read(priv, REG_VERSION_LSB); + rev_hi = reg_read(priv, REG_VERSION_MSB); + if (rev_lo < 0 || rev_hi < 0) { + ret = rev_lo < 0 ? rev_lo : rev_hi; + goto fail; + } + + priv->rev = rev_lo | rev_hi << 8; /* mask off feature bits: */ priv->rev &= ~0x30; /* not-hdcp and not-scalar bit */ switch (priv->rev) { - case TDA9989N2: dev_info(dev->dev, "found TDA9989 n2"); break; - case TDA19989: dev_info(dev->dev, "found TDA19989"); break; - case TDA19989N2: dev_info(dev->dev, "found TDA19989 n2"); break; - case TDA19988: dev_info(dev->dev, "found TDA19988"); break; + case TDA9989N2: + dev_info(&client->dev, "found TDA9989 n2"); + break; + case TDA19989: + dev_info(&client->dev, "found TDA19989"); + break; + case TDA19989N2: + dev_info(&client->dev, "found TDA19989 n2"); + break; + case TDA19988: + dev_info(&client->dev, "found TDA19988"); + break; default: - DBG("found unsupported device: %04x", priv->rev); + dev_err(&client->dev, "found unsupported device: %04x\n", + priv->rev); goto fail; } /* after reset, enable DDC: */ - reg_write(encoder, REG_DDC_DISABLE, 0x00); + reg_write(priv, REG_DDC_DISABLE, 0x00); /* set clock on DDC channel: */ - reg_write(encoder, REG_TX3, 39); + reg_write(priv, REG_TX3, 39); /* if necessary, disable multi-master: */ if (priv->rev == TDA19989) - reg_set(encoder, REG_I2C_MASTER, I2C_MASTER_DIS_MM); + reg_set(priv, REG_I2C_MASTER, I2C_MASTER_DIS_MM); - cec_write(encoder, REG_CEC_FRO_IM_CLK_CTRL, + cec_write(priv, REG_CEC_FRO_IM_CLK_CTRL, CEC_FRO_IM_CLK_CTRL_GHOST_DIS | CEC_FRO_IM_CLK_CTRL_IMCLK_SEL); + /* initialize the optional IRQ */ + if (client->irq) { + int irqf_trigger; + + /* init read EDID waitqueue */ + init_waitqueue_head(&priv->wq_edid); + + /* clear pending interrupts */ + reg_read(priv, REG_INT_FLAGS_0); + reg_read(priv, REG_INT_FLAGS_1); + reg_read(priv, REG_INT_FLAGS_2); + + irqf_trigger = + irqd_get_trigger_type(irq_get_irq_data(client->irq)); + ret = request_threaded_irq(client->irq, NULL, + tda998x_irq_thread, + irqf_trigger | IRQF_ONESHOT, + "tda998x", priv); + if (ret) { + dev_err(&client->dev, + "failed to request IRQ#%u: %d\n", + client->irq, ret); + goto fail; + } + + /* enable HPD irq */ + cec_write(priv, REG_CEC_RXSHPDINTENA, CEC_RXSHPDLEV_HPD); + } + + /* enable EDID read irq: */ + reg_set(priv, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD); + + if (!np) + return 0; /* non-DT */ + + /* get the optional video properties */ + ret = of_property_read_u32(np, "video-ports", &video); + if (ret == 0) { + priv->vip_cntrl_0 = video >> 16; + priv->vip_cntrl_1 = video >> 8; + priv->vip_cntrl_2 = video; + } + return 0; fail: @@ -1199,6 +1365,14 @@ fail: return -ENXIO; } +#ifdef CONFIG_OF +static const struct of_device_id tda998x_dt_ids[] = { + { .compatible = "nxp,tda998x", }, + { } +}; +MODULE_DEVICE_TABLE(of, tda998x_dt_ids); +#endif + static struct i2c_device_id tda998x_ids[] = { { "tda998x", 0 }, { } @@ -1211,6 +1385,7 @@ static struct drm_i2c_encoder_driver tda998x_driver = { .remove = tda998x_remove, .driver = { .name = "tda998x", + .of_match_table = of_match_ptr(tda998x_dt_ids), }, .id_table = tda998x_ids, }, diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 077e81b1065..b1445b73465 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -36,11 +36,10 @@ i915-y += i915_cmd_parser.o \ i915-y += intel_bios.o \ intel_display.o \ intel_modes.o \ - intel_opregion.o \ intel_overlay.o \ intel_sideband.o \ intel_sprite.o -i915-$(CONFIG_ACPI) += intel_acpi.o +i915-$(CONFIG_ACPI) += intel_acpi.o intel_opregion.o i915-$(CONFIG_DRM_I915_FBDEV) += intel_fbdev.o # modesetting output/encoder code diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7b6dfdfb2d8..2a319ba91a7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2013,6 +2013,14 @@ struct drm_i915_cmd_table { /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev) (IS_I830(dev) || IS_845G(dev)) +/* + * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts + * even when in MSI mode. This results in spurious interrupt warnings if the + * legacy irq no. is shared with another device. The kernel then disables that + * interrupt source and so prevents the other device from working properly. + */ +#define HAS_AUX_IRQ(dev) (INTEL_INFO(dev)->gen >= 5) +#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->gen >= 5) /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte * rows, which changed the alignment requirements and fence programming. @@ -2605,8 +2613,8 @@ extern void intel_i2c_reset(struct drm_device *dev); /* intel_opregion.c */ struct intel_encoder; -extern int intel_opregion_setup(struct drm_device *dev); #ifdef CONFIG_ACPI +extern int intel_opregion_setup(struct drm_device *dev); extern void intel_opregion_init(struct drm_device *dev); extern void intel_opregion_fini(struct drm_device *dev); extern void intel_opregion_asle_intr(struct drm_device *dev); @@ -2615,6 +2623,7 @@ extern int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, extern int intel_opregion_notify_adapter(struct drm_device *dev, pci_power_t state); #else +static inline int intel_opregion_setup(struct drm_device *dev) { return 0; } static inline void intel_opregion_init(struct drm_device *dev) { return; } static inline void intel_opregion_fini(struct drm_device *dev) { return; } static inline void intel_opregion_asle_intr(struct drm_device *dev) { return; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3d8bd62a926..63a6dc7a6bb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1817,14 +1817,14 @@ static int ggtt_probe_common(struct drm_device *dev, size_t gtt_size) { struct drm_i915_private *dev_priv = dev->dev_private; - phys_addr_t gtt_bus_addr; + phys_addr_t gtt_phys_addr; int ret; /* For Modern GENs the PTEs and register space are split in the BAR */ - gtt_bus_addr = pci_resource_start(dev->pdev, 0) + + gtt_phys_addr = pci_resource_start(dev->pdev, 0) + (pci_resource_len(dev->pdev, 0) / 2); - dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size); + dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); if (!dev_priv->gtt.gsm) { DRM_ERROR("Failed to map the gtt page table\n"); return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index ce2dd608968..144a5e2bc7e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -146,7 +146,10 @@ static void i915_error_vprintf(struct drm_i915_error_state_buf *e, va_list tmp; va_copy(tmp, args); - if (!__i915_error_seek(e, vsnprintf(NULL, 0, f, tmp))) + len = vsnprintf(NULL, 0, f, tmp); + va_end(tmp); + + if (!__i915_error_seek(e, len)) return; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 55a0a1de3a1..be2713f12e0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -667,8 +667,7 @@ static u32 i915_get_vblank_counter(struct drm_device *dev, int pipe) vbl_start = mode->crtc_vblank_start * mode->crtc_htotal; } else { - enum transcoder cpu_transcoder = - intel_pipe_to_cpu_transcoder(dev_priv, pipe); + enum transcoder cpu_transcoder = (enum transcoder) pipe; u32 htotal; htotal = ((I915_READ(HTOTAL(cpu_transcoder)) >> 16) & 0x1fff) + 1; diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c index dfff0907f70..d96eee1ae9c 100644 --- a/drivers/gpu/drm/i915/intel_acpi.c +++ b/drivers/gpu/drm/i915/intel_acpi.c @@ -6,14 +6,10 @@ #include <linux/pci.h> #include <linux/acpi.h> #include <linux/vga_switcheroo.h> -#include <acpi/acpi_drivers.h> - #include <drm/drmP.h> #include "i915_drv.h" #define INTEL_DSM_REVISION_ID 1 /* For Calpella anyway... */ - -#define INTEL_DSM_FN_SUPPORTED_FUNCTIONS 0 /* No args */ #define INTEL_DSM_FN_PLATFORM_MUX_INFO 1 /* No args */ static struct intel_dsm_priv { @@ -28,61 +24,6 @@ static const u8 intel_dsm_guid[] = { 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c }; -static int intel_dsm(acpi_handle handle, int func) -{ - struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; - struct acpi_object_list input; - union acpi_object params[4]; - union acpi_object *obj; - u32 result; - int ret = 0; - - input.count = 4; - input.pointer = params; - params[0].type = ACPI_TYPE_BUFFER; - params[0].buffer.length = sizeof(intel_dsm_guid); - params[0].buffer.pointer = (char *)intel_dsm_guid; - params[1].type = ACPI_TYPE_INTEGER; - params[1].integer.value = INTEL_DSM_REVISION_ID; - params[2].type = ACPI_TYPE_INTEGER; - params[2].integer.value = func; - params[3].type = ACPI_TYPE_PACKAGE; - params[3].package.count = 0; - params[3].package.elements = NULL; - - ret = acpi_evaluate_object(handle, "_DSM", &input, &output); - if (ret) { - DRM_DEBUG_DRIVER("failed to evaluate _DSM: %d\n", ret); - return ret; - } - - obj = (union acpi_object *)output.pointer; - - result = 0; - switch (obj->type) { - case ACPI_TYPE_INTEGER: - result = obj->integer.value; - break; - - case ACPI_TYPE_BUFFER: - if (obj->buffer.length == 4) { - result = (obj->buffer.pointer[0] | - (obj->buffer.pointer[1] << 8) | - (obj->buffer.pointer[2] << 16) | - (obj->buffer.pointer[3] << 24)); - break; - } - default: - ret = -EINVAL; - break; - } - if (result == 0x80000002) - ret = -ENODEV; - - kfree(output.pointer); - return ret; -} - static char *intel_dsm_port_name(u8 id) { switch (id) { @@ -137,83 +78,56 @@ static char *intel_dsm_mux_type(u8 type) static void intel_dsm_platform_mux_info(void) { - struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; - struct acpi_object_list input; - union acpi_object params[4]; - union acpi_object *pkg; - int i, ret; - - input.count = 4; - input.pointer = params; - params[0].type = ACPI_TYPE_BUFFER; - params[0].buffer.length = sizeof(intel_dsm_guid); - params[0].buffer.pointer = (char *)intel_dsm_guid; - params[1].type = ACPI_TYPE_INTEGER; - params[1].integer.value = INTEL_DSM_REVISION_ID; - params[2].type = ACPI_TYPE_INTEGER; - params[2].integer.value = INTEL_DSM_FN_PLATFORM_MUX_INFO; - params[3].type = ACPI_TYPE_PACKAGE; - params[3].package.count = 0; - params[3].package.elements = NULL; - - ret = acpi_evaluate_object(intel_dsm_priv.dhandle, "_DSM", &input, - &output); - if (ret) { - DRM_DEBUG_DRIVER("failed to evaluate _DSM: %d\n", ret); - goto out; + int i; + union acpi_object *pkg, *connector_count; + + pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, intel_dsm_guid, + INTEL_DSM_REVISION_ID, INTEL_DSM_FN_PLATFORM_MUX_INFO, + NULL, ACPI_TYPE_PACKAGE); + if (!pkg) { + DRM_DEBUG_DRIVER("failed to evaluate _DSM\n"); + return; } - pkg = (union acpi_object *)output.pointer; - - if (pkg->type == ACPI_TYPE_PACKAGE) { - union acpi_object *connector_count = &pkg->package.elements[0]; - DRM_DEBUG_DRIVER("MUX info connectors: %lld\n", - (unsigned long long)connector_count->integer.value); - for (i = 1; i < pkg->package.count; i++) { - union acpi_object *obj = &pkg->package.elements[i]; - union acpi_object *connector_id = - &obj->package.elements[0]; - union acpi_object *info = &obj->package.elements[1]; - DRM_DEBUG_DRIVER("Connector id: 0x%016llx\n", - (unsigned long long)connector_id->integer.value); - DRM_DEBUG_DRIVER(" port id: %s\n", - intel_dsm_port_name(info->buffer.pointer[0])); - DRM_DEBUG_DRIVER(" display mux info: %s\n", - intel_dsm_mux_type(info->buffer.pointer[1])); - DRM_DEBUG_DRIVER(" aux/dc mux info: %s\n", - intel_dsm_mux_type(info->buffer.pointer[2])); - DRM_DEBUG_DRIVER(" hpd mux info: %s\n", - intel_dsm_mux_type(info->buffer.pointer[3])); - } + connector_count = &pkg->package.elements[0]; + DRM_DEBUG_DRIVER("MUX info connectors: %lld\n", + (unsigned long long)connector_count->integer.value); + for (i = 1; i < pkg->package.count; i++) { + union acpi_object *obj = &pkg->package.elements[i]; + union acpi_object *connector_id = &obj->package.elements[0]; + union acpi_object *info = &obj->package.elements[1]; + DRM_DEBUG_DRIVER("Connector id: 0x%016llx\n", + (unsigned long long)connector_id->integer.value); + DRM_DEBUG_DRIVER(" port id: %s\n", + intel_dsm_port_name(info->buffer.pointer[0])); + DRM_DEBUG_DRIVER(" display mux info: %s\n", + intel_dsm_mux_type(info->buffer.pointer[1])); + DRM_DEBUG_DRIVER(" aux/dc mux info: %s\n", + intel_dsm_mux_type(info->buffer.pointer[2])); + DRM_DEBUG_DRIVER(" hpd mux info: %s\n", + intel_dsm_mux_type(info->buffer.pointer[3])); } -out: - kfree(output.pointer); + ACPI_FREE(pkg); } static bool intel_dsm_pci_probe(struct pci_dev *pdev) { acpi_handle dhandle; - int ret; dhandle = ACPI_HANDLE(&pdev->dev); if (!dhandle) return false; - if (!acpi_has_method(dhandle, "_DSM")) { + if (!acpi_check_dsm(dhandle, intel_dsm_guid, INTEL_DSM_REVISION_ID, + 1 << INTEL_DSM_FN_PLATFORM_MUX_INFO)) { DRM_DEBUG_KMS("no _DSM method for intel device\n"); return false; } - ret = intel_dsm(dhandle, INTEL_DSM_FN_SUPPORTED_FUNCTIONS); - if (ret < 0) { - DRM_DEBUG_KMS("failed to get supported _DSM functions\n"); - return false; - } - intel_dsm_priv.dhandle = dhandle; - intel_dsm_platform_mux_info(); + return true; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 56edc840985..11746e28243 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8627,6 +8627,20 @@ static int intel_gen7_queue_flip(struct drm_device *dev, if (ring->id == RCS) len += 6; + /* + * BSpec MI_DISPLAY_FLIP for IVB: + * "The full packet must be contained within the same cache line." + * + * Currently the LRI+SRM+MI_DISPLAY_FLIP all fit within the same + * cacheline, if we ever start emitting more commands before + * the MI_DISPLAY_FLIP we may need to first emit everything else, + * then do the cacheline alignment, and finally emit the + * MI_DISPLAY_FLIP. + */ + ret = intel_ring_cacheline_align(ring); + if (ret) + goto err_unpin; + ret = intel_ring_begin(ring, len); if (ret) goto err_unpin; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 7584348b7e8..49d12d341ab 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -459,7 +459,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, int i, ret, recv_bytes; uint32_t status; int try, clock = 0; - bool has_aux_irq = true; + bool has_aux_irq = HAS_AUX_IRQ(dev); /* dp aux is extremely sensitive to irq latency, hence request the * lowest possible wakeup latency and so prevent the cpu from going into @@ -577,6 +577,7 @@ intel_dp_aux_native_write(struct intel_dp *intel_dp, uint8_t msg[20]; int msg_bytes; uint8_t ack; + int retry; if (WARN_ON(send_bytes > 16)) return -E2BIG; @@ -588,19 +589,21 @@ intel_dp_aux_native_write(struct intel_dp *intel_dp, msg[3] = send_bytes - 1; memcpy(&msg[4], send, send_bytes); msg_bytes = send_bytes + 4; - for (;;) { + for (retry = 0; retry < 7; retry++) { ret = intel_dp_aux_ch(intel_dp, msg, msg_bytes, &ack, 1); if (ret < 0) return ret; ack >>= 4; if ((ack & DP_AUX_NATIVE_REPLY_MASK) == DP_AUX_NATIVE_REPLY_ACK) - break; + return send_bytes; else if ((ack & DP_AUX_NATIVE_REPLY_MASK) == DP_AUX_NATIVE_REPLY_DEFER) - udelay(100); + usleep_range(400, 500); else return -EIO; } - return send_bytes; + + DRM_ERROR("too many retries, giving up\n"); + return -EIO; } /* Write a single byte to the aux channel in native mode */ @@ -622,6 +625,7 @@ intel_dp_aux_native_read(struct intel_dp *intel_dp, int reply_bytes; uint8_t ack; int ret; + int retry; if (WARN_ON(recv_bytes > 19)) return -E2BIG; @@ -635,7 +639,7 @@ intel_dp_aux_native_read(struct intel_dp *intel_dp, msg_bytes = 4; reply_bytes = recv_bytes + 1; - for (;;) { + for (retry = 0; retry < 7; retry++) { ret = intel_dp_aux_ch(intel_dp, msg, msg_bytes, reply, reply_bytes); if (ret == 0) @@ -648,10 +652,13 @@ intel_dp_aux_native_read(struct intel_dp *intel_dp, return ret - 1; } else if ((ack & DP_AUX_NATIVE_REPLY_MASK) == DP_AUX_NATIVE_REPLY_DEFER) - udelay(100); + usleep_range(400, 500); else return -EIO; } + + DRM_ERROR("too many retries, giving up\n"); + return -EIO; } static int @@ -1963,10 +1970,12 @@ static void vlv_pre_enable_dp(struct intel_encoder *encoder) mutex_unlock(&dev_priv->dpio_lock); - /* init power sequencer on this pipe and port */ - intel_dp_init_panel_power_sequencer(dev, intel_dp, &power_seq); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, - &power_seq); + if (is_edp(intel_dp)) { + /* init power sequencer on this pipe and port */ + intel_dp_init_panel_power_sequencer(dev, intel_dp, &power_seq); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, + &power_seq); + } intel_enable_dp(encoder); diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index b1dc33f4789..d33b61d0dd3 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -258,13 +258,6 @@ intel_gpio_setup(struct intel_gmbus *bus, u32 pin) algo->data = bus; } -/* - * gmbus on gen4 seems to be able to generate legacy interrupts even when in MSI - * mode. This results in spurious interrupt warnings if the legacy irq no. is - * shared with another device. The kernel then disables that interrupt source - * and so prevents the other device from working properly. - */ -#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->gen >= 5) static int gmbus_wait_hw_status(struct drm_i915_private *dev_priv, u32 gmbus2_status, diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 37e9a96777e..acde2945eb8 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -28,7 +28,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/acpi.h> -#include <linux/acpi_io.h> #include <acpi/video.h> #include <drm/drmP.h> @@ -227,6 +226,8 @@ struct opregion_asle { #define ACPI_DIGITAL_OUTPUT (3<<8) #define ACPI_LVDS_OUTPUT (4<<8) +#define MAX_DSLP 1500 + #ifdef CONFIG_ACPI static int swsci(struct drm_device *dev, u32 function, u32 parm, u32 *parm_out) { @@ -261,10 +262,11 @@ static int swsci(struct drm_device *dev, u32 function, u32 parm, u32 *parm_out) /* The spec says 2ms should be the default, but it's too small * for some machines. */ dslp = 50; - } else if (dslp > 500) { + } else if (dslp > MAX_DSLP) { /* Hey bios, trust must be earned. */ - WARN_ONCE(1, "excessive driver sleep timeout (DSPL) %u\n", dslp); - dslp = 500; + DRM_INFO_ONCE("ACPI BIOS requests an excessive sleep of %u ms, " + "using %u ms instead\n", dslp, MAX_DSLP); + dslp = MAX_DSLP; } /* The spec tells us to do this, but we are the only user... */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5629cc7a773..859092130c1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1644,6 +1644,27 @@ int intel_ring_begin(struct intel_ring_buffer *ring, return 0; } +/* Align the ring tail to a cacheline boundary */ +int intel_ring_cacheline_align(struct intel_ring_buffer *ring) +{ + int num_dwords = (64 - (ring->tail & 63)) / sizeof(uint32_t); + int ret; + + if (num_dwords == 0) + return 0; + + ret = intel_ring_begin(ring, num_dwords); + if (ret) + return ret; + + while (num_dwords--) + intel_ring_emit(ring, MI_NOOP); + + intel_ring_advance(ring); + + return 0; +} + void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno) { struct drm_i915_private *dev_priv = ring->dev->dev_private; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9d4c3b1182e..09af92099c1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -267,6 +267,7 @@ intel_write_status_page(struct intel_ring_buffer *ring, void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring); int __must_check intel_ring_begin(struct intel_ring_buffer *ring, int n); +int __must_check intel_ring_cacheline_align(struct intel_ring_buffer *ring); static inline void intel_ring_emit(struct intel_ring_buffer *ring, u32 data) { diff --git a/drivers/gpu/drm/mgag200/mgag200_fb.c b/drivers/gpu/drm/mgag200/mgag200_fb.c index f9adc27ef32..13b7dd83faa 100644 --- a/drivers/gpu/drm/mgag200/mgag200_fb.c +++ b/drivers/gpu/drm/mgag200/mgag200_fb.c @@ -41,7 +41,7 @@ static void mga_dirty_update(struct mga_fbdev *mfbdev, * then the BO is being moved and we should * store up the damage until later. */ - if (!drm_can_sleep()) + if (drm_can_sleep()) ret = mgag200_bo_reserve(bo, true); if (ret) { if (ret != -EBUSY) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index b8583f275e8..968374776db 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -1519,11 +1519,11 @@ static int mga_vga_mode_valid(struct drm_connector *connector, (mga_vga_calculate_mode_bandwidth(mode, bpp) > (32700 * 1024))) { return MODE_BANDWIDTH; - } else if (mode->type == G200_EH && + } else if (mdev->type == G200_EH && (mga_vga_calculate_mode_bandwidth(mode, bpp) > (37500 * 1024))) { return MODE_BANDWIDTH; - } else if (mode->type == G200_ER && + } else if (mdev->type == G200_ER && (mga_vga_calculate_mode_bandwidth(mode, bpp) > (55000 * 1024))) { return MODE_BANDWIDTH; diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index bb103fb4519..c69d1e07a3a 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -2,6 +2,7 @@ config DRM_MSM tristate "MSM DRM" depends on DRM + depends on MSM_IOMMU depends on (ARCH_MSM && ARCH_MSM8960) || (ARM && COMPILE_TEST) select DRM_KMS_HELPER select SHMEM diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index 1964f4f0d45..84c5b13b33c 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -39,6 +39,7 @@ struct mdp4_crtc { spinlock_t lock; bool stale; uint32_t width, height; + uint32_t x, y; /* next cursor to scan-out: */ uint32_t next_iova; @@ -57,9 +58,16 @@ struct mdp4_crtc { #define PENDING_FLIP 0x2 atomic_t pending; - /* the fb that we currently hold a scanout ref to: */ + /* the fb that we logically (from PoV of KMS API) hold a ref + * to. Which we may not yet be scanning out (we may still + * be scanning out previous in case of page_flip while waiting + * for gpu rendering to complete: + */ struct drm_framebuffer *fb; + /* the fb that we currently hold a scanout ref to: */ + struct drm_framebuffer *scanout_fb; + /* for unref'ing framebuffers after scanout completes: */ struct drm_flip_work unref_fb_work; @@ -77,24 +85,73 @@ static struct mdp4_kms *get_kms(struct drm_crtc *crtc) return to_mdp4_kms(to_mdp_kms(priv->kms)); } -static void update_fb(struct drm_crtc *crtc, bool async, - struct drm_framebuffer *new_fb) +static void request_pending(struct drm_crtc *crtc, uint32_t pending) { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - struct drm_framebuffer *old_fb = mdp4_crtc->fb; - if (old_fb) - drm_flip_work_queue(&mdp4_crtc->unref_fb_work, old_fb); + atomic_or(pending, &mdp4_crtc->pending); + mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank); +} + +static void crtc_flush(struct drm_crtc *crtc) +{ + struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); + struct mdp4_kms *mdp4_kms = get_kms(crtc); + uint32_t i, flush = 0; + + for (i = 0; i < ARRAY_SIZE(mdp4_crtc->planes); i++) { + struct drm_plane *plane = mdp4_crtc->planes[i]; + if (plane) { + enum mdp4_pipe pipe_id = mdp4_plane_pipe(plane); + flush |= pipe2flush(pipe_id); + } + } + flush |= ovlp2flush(mdp4_crtc->ovlp); + + DBG("%s: flush=%08x", mdp4_crtc->name, flush); + + mdp4_write(mdp4_kms, REG_MDP4_OVERLAY_FLUSH, flush); +} + +static void update_fb(struct drm_crtc *crtc, struct drm_framebuffer *new_fb) +{ + struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); + struct drm_framebuffer *old_fb = mdp4_crtc->fb; /* grab reference to incoming scanout fb: */ drm_framebuffer_reference(new_fb); mdp4_crtc->base.fb = new_fb; mdp4_crtc->fb = new_fb; - if (!async) { - /* enable vblank to pick up the old_fb */ - mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank); - } + if (old_fb) + drm_flip_work_queue(&mdp4_crtc->unref_fb_work, old_fb); +} + +/* unlike update_fb(), take a ref to the new scanout fb *before* updating + * plane, then call this. Needed to ensure we don't unref the buffer that + * is actually still being scanned out. + * + * Note that this whole thing goes away with atomic.. since we can defer + * calling into driver until rendering is done. + */ +static void update_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb) +{ + struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); + + /* flush updates, to make sure hw is updated to new scanout fb, + * so that we can safely queue unref to current fb (ie. next + * vblank we know hw is done w/ previous scanout_fb). + */ + crtc_flush(crtc); + + if (mdp4_crtc->scanout_fb) + drm_flip_work_queue(&mdp4_crtc->unref_fb_work, + mdp4_crtc->scanout_fb); + + mdp4_crtc->scanout_fb = fb; + + /* enable vblank to complete flip: */ + request_pending(crtc, PENDING_FLIP); } /* if file!=NULL, this is preclose potential cancel-flip path */ @@ -120,34 +177,6 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file) spin_unlock_irqrestore(&dev->event_lock, flags); } -static void crtc_flush(struct drm_crtc *crtc) -{ - struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - struct mdp4_kms *mdp4_kms = get_kms(crtc); - uint32_t i, flush = 0; - - for (i = 0; i < ARRAY_SIZE(mdp4_crtc->planes); i++) { - struct drm_plane *plane = mdp4_crtc->planes[i]; - if (plane) { - enum mdp4_pipe pipe_id = mdp4_plane_pipe(plane); - flush |= pipe2flush(pipe_id); - } - } - flush |= ovlp2flush(mdp4_crtc->ovlp); - - DBG("%s: flush=%08x", mdp4_crtc->name, flush); - - mdp4_write(mdp4_kms, REG_MDP4_OVERLAY_FLUSH, flush); -} - -static void request_pending(struct drm_crtc *crtc, uint32_t pending) -{ - struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - - atomic_or(pending, &mdp4_crtc->pending); - mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank); -} - static void pageflip_cb(struct msm_fence_cb *cb) { struct mdp4_crtc *mdp4_crtc = @@ -158,11 +187,9 @@ static void pageflip_cb(struct msm_fence_cb *cb) if (!fb) return; + drm_framebuffer_reference(fb); mdp4_plane_set_scanout(mdp4_crtc->plane, fb); - crtc_flush(crtc); - - /* enable vblank to complete flip: */ - request_pending(crtc, PENDING_FLIP); + update_scanout(crtc, fb); } static void unref_fb_worker(struct drm_flip_work *work, void *val) @@ -320,6 +347,20 @@ static int mdp4_crtc_mode_set(struct drm_crtc *crtc, mode->vsync_end, mode->vtotal, mode->type, mode->flags); + /* grab extra ref for update_scanout() */ + drm_framebuffer_reference(crtc->fb); + + ret = mdp4_plane_mode_set(mdp4_crtc->plane, crtc, crtc->fb, + 0, 0, mode->hdisplay, mode->vdisplay, + x << 16, y << 16, + mode->hdisplay << 16, mode->vdisplay << 16); + if (ret) { + drm_framebuffer_unreference(crtc->fb); + dev_err(crtc->dev->dev, "%s: failed to set mode on plane: %d\n", + mdp4_crtc->name, ret); + return ret; + } + mdp4_write(mdp4_kms, REG_MDP4_DMA_SRC_SIZE(dma), MDP4_DMA_SRC_SIZE_WIDTH(mode->hdisplay) | MDP4_DMA_SRC_SIZE_HEIGHT(mode->vdisplay)); @@ -341,24 +382,15 @@ static int mdp4_crtc_mode_set(struct drm_crtc *crtc, mdp4_write(mdp4_kms, REG_MDP4_OVLP_CFG(ovlp), 1); - update_fb(crtc, false, crtc->fb); - - ret = mdp4_plane_mode_set(mdp4_crtc->plane, crtc, crtc->fb, - 0, 0, mode->hdisplay, mode->vdisplay, - x << 16, y << 16, - mode->hdisplay << 16, mode->vdisplay << 16); - if (ret) { - dev_err(crtc->dev->dev, "%s: failed to set mode on plane: %d\n", - mdp4_crtc->name, ret); - return ret; - } - if (dma == DMA_E) { mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(0), 0x00ff0000); mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(1), 0x00ff0000); mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(2), 0x00ff0000); } + update_fb(crtc, crtc->fb); + update_scanout(crtc, crtc->fb); + return 0; } @@ -385,13 +417,24 @@ static int mdp4_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); struct drm_plane *plane = mdp4_crtc->plane; struct drm_display_mode *mode = &crtc->mode; + int ret; - update_fb(crtc, false, crtc->fb); + /* grab extra ref for update_scanout() */ + drm_framebuffer_reference(crtc->fb); - return mdp4_plane_mode_set(plane, crtc, crtc->fb, + ret = mdp4_plane_mode_set(plane, crtc, crtc->fb, 0, 0, mode->hdisplay, mode->vdisplay, x << 16, y << 16, mode->hdisplay << 16, mode->vdisplay << 16); + if (ret) { + drm_framebuffer_unreference(crtc->fb); + return ret; + } + + update_fb(crtc, crtc->fb); + update_scanout(crtc, crtc->fb); + + return 0; } static void mdp4_crtc_load_lut(struct drm_crtc *crtc) @@ -419,7 +462,7 @@ static int mdp4_crtc_page_flip(struct drm_crtc *crtc, mdp4_crtc->event = event; spin_unlock_irqrestore(&dev->event_lock, flags); - update_fb(crtc, true, new_fb); + update_fb(crtc, new_fb); return msm_gem_queue_inactive_cb(obj, &mdp4_crtc->pageflip_cb); } @@ -442,12 +485,12 @@ static int mdp4_crtc_set_property(struct drm_crtc *crtc, static void update_cursor(struct drm_crtc *crtc) { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); + struct mdp4_kms *mdp4_kms = get_kms(crtc); enum mdp4_dma dma = mdp4_crtc->dma; unsigned long flags; spin_lock_irqsave(&mdp4_crtc->cursor.lock, flags); if (mdp4_crtc->cursor.stale) { - struct mdp4_kms *mdp4_kms = get_kms(crtc); struct drm_gem_object *next_bo = mdp4_crtc->cursor.next_bo; struct drm_gem_object *prev_bo = mdp4_crtc->cursor.scanout_bo; uint32_t iova = mdp4_crtc->cursor.next_iova; @@ -479,6 +522,11 @@ static void update_cursor(struct drm_crtc *crtc) mdp4_crtc->cursor.scanout_bo = next_bo; mdp4_crtc->cursor.stale = false; } + + mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_POS(dma), + MDP4_DMA_CURSOR_POS_X(mdp4_crtc->cursor.x) | + MDP4_DMA_CURSOR_POS_Y(mdp4_crtc->cursor.y)); + spin_unlock_irqrestore(&mdp4_crtc->cursor.lock, flags); } @@ -530,6 +578,7 @@ static int mdp4_crtc_cursor_set(struct drm_crtc *crtc, drm_gem_object_unreference_unlocked(old_bo); } + crtc_flush(crtc); request_pending(crtc, PENDING_CURSOR); return 0; @@ -542,12 +591,15 @@ fail: static int mdp4_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - struct mdp4_kms *mdp4_kms = get_kms(crtc); - enum mdp4_dma dma = mdp4_crtc->dma; + unsigned long flags; - mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_POS(dma), - MDP4_DMA_CURSOR_POS_X(x) | - MDP4_DMA_CURSOR_POS_Y(y)); + spin_lock_irqsave(&mdp4_crtc->cursor.lock, flags); + mdp4_crtc->cursor.x = x; + mdp4_crtc->cursor.y = y; + spin_unlock_irqrestore(&mdp4_crtc->cursor.lock, flags); + + crtc_flush(crtc); + request_pending(crtc, PENDING_CURSOR); return 0; } @@ -713,6 +765,7 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, crtc = &mdp4_crtc->base; mdp4_crtc->plane = plane; + mdp4_crtc->id = id; mdp4_crtc->ovlp = ovlp_id; mdp4_crtc->dma = dma_id; diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c index 2406027200e..1e893dd1385 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c @@ -170,8 +170,8 @@ int mdp4_plane_mode_set(struct drm_plane *plane, MDP4_PIPE_DST_SIZE_HEIGHT(crtc_h)); mdp4_write(mdp4_kms, REG_MDP4_PIPE_DST_XY(pipe), - MDP4_PIPE_SRC_XY_X(crtc_x) | - MDP4_PIPE_SRC_XY_Y(crtc_y)); + MDP4_PIPE_DST_XY_X(crtc_x) | + MDP4_PIPE_DST_XY_Y(crtc_y)); mdp4_plane_set_scanout(plane, fb); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 71a3b2345eb..f2794021f08 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -296,6 +296,7 @@ static int mdp5_crtc_mode_set(struct drm_crtc *crtc, x << 16, y << 16, mode->hdisplay << 16, mode->vdisplay << 16); if (ret) { + drm_framebuffer_unreference(crtc->fb); dev_err(crtc->dev->dev, "%s: failed to set mode on plane: %d\n", mdp5_crtc->name, ret); return ret; @@ -343,11 +344,15 @@ static int mdp5_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, 0, 0, mode->hdisplay, mode->vdisplay, x << 16, y << 16, mode->hdisplay << 16, mode->vdisplay << 16); + if (ret) { + drm_framebuffer_unreference(crtc->fb); + return ret; + } update_fb(crtc, crtc->fb); update_scanout(crtc, crtc->fb); - return ret; + return 0; } static void mdp5_crtc_load_lut(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index d8d60c969ac..3da8264d303 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -644,7 +644,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, fail: if (obj) - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_unreference(obj); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 5281d4bc37f..5423e914e49 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -163,7 +163,7 @@ retry: /* if locking succeeded, pin bo: */ - ret = msm_gem_get_iova(&msm_obj->base, + ret = msm_gem_get_iova_locked(&msm_obj->base, submit->gpu->id, &iova); /* this would break the logic in the fail path.. there is no @@ -247,7 +247,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob /* For now, just map the entire thing. Eventually we probably * to do it page-by-page, w/ kmap() if not vmap()d.. */ - ptr = msm_gem_vaddr(&obj->base); + ptr = msm_gem_vaddr_locked(&obj->base); if (IS_ERR(ptr)) { ret = PTR_ERR(ptr); @@ -307,14 +307,12 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool fail) { unsigned i; - mutex_lock(&submit->dev->struct_mutex); for (i = 0; i < submit->nr_bos; i++) { struct msm_gem_object *msm_obj = submit->bos[i].obj; submit_unlock_unpin_bo(submit, i); list_del_init(&msm_obj->submit_entry); drm_gem_object_unreference(&msm_obj->base); } - mutex_unlock(&submit->dev->struct_mutex); ww_acquire_fini(&submit->ticket); kfree(submit); @@ -342,6 +340,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (args->nr_cmds > MAX_CMDS) return -EINVAL; + mutex_lock(&dev->struct_mutex); + submit = submit_create(dev, gpu, args->nr_bos); if (!submit) { ret = -ENOMEM; @@ -410,5 +410,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, out: if (submit) submit_cleanup(submit, !!ret); + mutex_unlock(&dev->struct_mutex); return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 4ebce8be489..0cfe3f426ee 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -298,8 +298,6 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_drm_private *priv = dev->dev_private; int i, ret; - mutex_lock(&dev->struct_mutex); - submit->fence = ++priv->next_fence; gpu->submitted_fence = submit->fence; @@ -331,7 +329,6 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); } hangcheck_timer_reset(gpu); - mutex_unlock(&dev->struct_mutex); return ret; } diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile index e88145ba1bf..d310c195bdf 100644 --- a/drivers/gpu/drm/nouveau/Makefile +++ b/drivers/gpu/drm/nouveau/Makefile @@ -141,6 +141,7 @@ nouveau-y += core/subdev/mc/base.o nouveau-y += core/subdev/mc/nv04.o nouveau-y += core/subdev/mc/nv40.o nouveau-y += core/subdev/mc/nv44.o +nouveau-y += core/subdev/mc/nv4c.o nouveau-y += core/subdev/mc/nv50.o nouveau-y += core/subdev/mc/nv94.o nouveau-y += core/subdev/mc/nv98.o diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nv40.c b/drivers/gpu/drm/nouveau/core/engine/device/nv40.c index 1b653dd74a7..08b88591ed6 100644 --- a/drivers/gpu/drm/nouveau/core/engine/device/nv40.c +++ b/drivers/gpu/drm/nouveau/core/engine/device/nv40.c @@ -311,7 +311,7 @@ nv40_identify(struct nouveau_device *device) device->oclass[NVDEV_SUBDEV_CLOCK ] = &nv40_clock_oclass; device->oclass[NVDEV_SUBDEV_THERM ] = &nv40_therm_oclass; device->oclass[NVDEV_SUBDEV_DEVINIT] = nv1a_devinit_oclass; - device->oclass[NVDEV_SUBDEV_MC ] = nv44_mc_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = nv4c_mc_oclass; device->oclass[NVDEV_SUBDEV_BUS ] = nv31_bus_oclass; device->oclass[NVDEV_SUBDEV_TIMER ] = &nv04_timer_oclass; device->oclass[NVDEV_SUBDEV_FB ] = nv46_fb_oclass; @@ -334,7 +334,7 @@ nv40_identify(struct nouveau_device *device) device->oclass[NVDEV_SUBDEV_CLOCK ] = &nv40_clock_oclass; device->oclass[NVDEV_SUBDEV_THERM ] = &nv40_therm_oclass; device->oclass[NVDEV_SUBDEV_DEVINIT] = nv1a_devinit_oclass; - device->oclass[NVDEV_SUBDEV_MC ] = nv44_mc_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = nv4c_mc_oclass; device->oclass[NVDEV_SUBDEV_BUS ] = nv31_bus_oclass; device->oclass[NVDEV_SUBDEV_TIMER ] = &nv04_timer_oclass; device->oclass[NVDEV_SUBDEV_FB ] = nv4e_fb_oclass; @@ -357,7 +357,7 @@ nv40_identify(struct nouveau_device *device) device->oclass[NVDEV_SUBDEV_CLOCK ] = &nv40_clock_oclass; device->oclass[NVDEV_SUBDEV_THERM ] = &nv40_therm_oclass; device->oclass[NVDEV_SUBDEV_DEVINIT] = nv1a_devinit_oclass; - device->oclass[NVDEV_SUBDEV_MC ] = nv44_mc_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = nv4c_mc_oclass; device->oclass[NVDEV_SUBDEV_BUS ] = nv31_bus_oclass; device->oclass[NVDEV_SUBDEV_TIMER ] = &nv04_timer_oclass; device->oclass[NVDEV_SUBDEV_FB ] = nv46_fb_oclass; @@ -380,7 +380,7 @@ nv40_identify(struct nouveau_device *device) device->oclass[NVDEV_SUBDEV_CLOCK ] = &nv40_clock_oclass; device->oclass[NVDEV_SUBDEV_THERM ] = &nv40_therm_oclass; device->oclass[NVDEV_SUBDEV_DEVINIT] = nv1a_devinit_oclass; - device->oclass[NVDEV_SUBDEV_MC ] = nv44_mc_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = nv4c_mc_oclass; device->oclass[NVDEV_SUBDEV_BUS ] = nv31_bus_oclass; device->oclass[NVDEV_SUBDEV_TIMER ] = &nv04_timer_oclass; device->oclass[NVDEV_SUBDEV_FB ] = nv46_fb_oclass; @@ -403,7 +403,7 @@ nv40_identify(struct nouveau_device *device) device->oclass[NVDEV_SUBDEV_CLOCK ] = &nv40_clock_oclass; device->oclass[NVDEV_SUBDEV_THERM ] = &nv40_therm_oclass; device->oclass[NVDEV_SUBDEV_DEVINIT] = nv1a_devinit_oclass; - device->oclass[NVDEV_SUBDEV_MC ] = nv44_mc_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = nv4c_mc_oclass; device->oclass[NVDEV_SUBDEV_BUS ] = nv31_bus_oclass; device->oclass[NVDEV_SUBDEV_TIMER ] = &nv04_timer_oclass; device->oclass[NVDEV_SUBDEV_FB ] = nv46_fb_oclass; diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c index 940eaa5d8b9..9ad722e4e08 100644 --- a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c +++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c @@ -1142,7 +1142,7 @@ nv50_disp_intr_unk20_2(struct nv50_disp_priv *priv, int head) if (conf != ~0) { if (outp.location == 0 && outp.type == DCB_OUTPUT_DP) { u32 soff = (ffs(outp.or) - 1) * 0x08; - u32 ctrl = nv_rd32(priv, 0x610798 + soff); + u32 ctrl = nv_rd32(priv, 0x610794 + soff); u32 datarate; switch ((ctrl & 0x000f0000) >> 16) { diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c index 9a850fe1951..54c1b5b471c 100644 --- a/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c @@ -112,7 +112,7 @@ nve0_fifo_runlist_update(struct nve0_fifo_priv *priv, u32 engine) nv_wr32(priv, 0x002270, cur->addr >> 12); nv_wr32(priv, 0x002274, (engine << 20) | (p >> 3)); - if (!nv_wait(priv, 0x002284 + (engine * 4), 0x00100000, 0x00000000)) + if (!nv_wait(priv, 0x002284 + (engine * 8), 0x00100000, 0x00000000)) nv_error(priv, "runlist %d update timeout\n", engine); mutex_unlock(&nv_subdev(priv)->mutex); } diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c b/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c index 30ed19c52e0..7a367c40297 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c @@ -539,7 +539,7 @@ nv50_priv_tp_trap(struct nv50_graph_priv *priv, int type, u32 ustatus_old, ustatus &= ~0x04030000; } if (ustatus && display) { - nv_error("%s - TP%d:", name, i); + nv_error(priv, "%s - TP%d:", name, i); nouveau_bitfield_print(nv50_mpc_traps, ustatus); pr_cont("\n"); ustatus = 0; diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h index adc88b73d91..3c6738edd12 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h @@ -47,6 +47,7 @@ struct nouveau_mc_oclass { extern struct nouveau_oclass *nv04_mc_oclass; extern struct nouveau_oclass *nv40_mc_oclass; extern struct nouveau_oclass *nv44_mc_oclass; +extern struct nouveau_oclass *nv4c_mc_oclass; extern struct nouveau_oclass *nv50_mc_oclass; extern struct nouveau_oclass *nv94_mc_oclass; extern struct nouveau_oclass *nv98_mc_oclass; diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c index aa0fbbec7f0..ef0c9c4a8cc 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c @@ -130,6 +130,10 @@ nouveau_bios_shadow_prom(struct nouveau_bios *bios) u16 pcir; int i; + /* there is no prom on nv4x IGP's */ + if (device->card_type == NV_40 && device->chipset >= 0x4c) + return; + /* enable access to rom */ if (device->card_type >= NV_50) pcireg = 0x088050; diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv1a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv1a.c index 9159a5ccee9..265d1253624 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv1a.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv1a.c @@ -36,7 +36,7 @@ nv1a_fb_oclass = &(struct nv04_fb_impl) { .fini = _nouveau_fb_fini, }, .base.memtype = nv04_fb_memtype_valid, - .base.ram = &nv10_ram_oclass, + .base.ram = &nv1a_ram_oclass, .tile.regions = 8, .tile.init = nv10_fb_tile_init, .tile.fini = nv10_fb_tile_fini, diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.h b/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.h index b0d5c31606c..81a408e7d03 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.h +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.h @@ -14,6 +14,7 @@ int nv04_mc_ctor(struct nouveau_object *, struct nouveau_object *, extern const struct nouveau_mc_intr nv04_mc_intr[]; int nv04_mc_init(struct nouveau_object *); void nv40_mc_msi_rearm(struct nouveau_mc *); +int nv44_mc_init(struct nouveau_object *object); int nv50_mc_init(struct nouveau_object *); extern const struct nouveau_mc_intr nv50_mc_intr[]; extern const struct nouveau_mc_intr nvc0_mc_intr[]; diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c index 3bfee5c6c4f..cc4d0d2d886 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c @@ -24,7 +24,7 @@ #include "nv04.h" -static int +int nv44_mc_init(struct nouveau_object *object) { struct nv04_mc_priv *priv = (void *)object; diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c new file mode 100644 index 00000000000..a75c35ccf25 --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c @@ -0,0 +1,45 @@ +/* + * Copyright 2014 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ilia Mirkin + */ + +#include "nv04.h" + +static void +nv4c_mc_msi_rearm(struct nouveau_mc *pmc) +{ + struct nv04_mc_priv *priv = (void *)pmc; + nv_wr08(priv, 0x088050, 0xff); +} + +struct nouveau_oclass * +nv4c_mc_oclass = &(struct nouveau_mc_oclass) { + .base.handle = NV_SUBDEV(MC, 0x4c), + .base.ofuncs = &(struct nouveau_ofuncs) { + .ctor = nv04_mc_ctor, + .dtor = _nouveau_mc_dtor, + .init = nv44_mc_init, + .fini = _nouveau_mc_fini, + }, + .intr = nv04_mc_intr, + .msi_rearm = nv4c_mc_msi_rearm, +}.base; diff --git a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c index 129120473f6..13c5af88a60 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c @@ -87,55 +87,39 @@ mxm_shadow_dsm(struct nouveau_mxm *mxm, u8 version) 0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65 }; u32 mxms_args[] = { 0x00000000 }; - union acpi_object args[4] = { - /* _DSM MUID */ - { .buffer.type = 3, - .buffer.length = sizeof(muid), - .buffer.pointer = muid, - }, - /* spec says this can be zero to mean "highest revision", but - * of course there's at least one bios out there which fails - * unless you pass in exactly the version it supports.. - */ - { .integer.type = ACPI_TYPE_INTEGER, - .integer.value = (version & 0xf0) << 4 | (version & 0x0f), - }, - /* MXMS function */ - { .integer.type = ACPI_TYPE_INTEGER, - .integer.value = 0x00000010, - }, - /* Pointer to MXMS arguments */ - { .buffer.type = ACPI_TYPE_BUFFER, - .buffer.length = sizeof(mxms_args), - .buffer.pointer = (char *)mxms_args, - }, + union acpi_object argv4 = { + .buffer.type = ACPI_TYPE_BUFFER, + .buffer.length = sizeof(mxms_args), + .buffer.pointer = (char *)mxms_args, }; - struct acpi_object_list list = { ARRAY_SIZE(args), args }; - struct acpi_buffer retn = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; acpi_handle handle; - int ret; + int rev; handle = ACPI_HANDLE(&device->pdev->dev); if (!handle) return false; - ret = acpi_evaluate_object(handle, "_DSM", &list, &retn); - if (ret) { - nv_debug(mxm, "DSM MXMS failed: %d\n", ret); + /* + * spec says this can be zero to mean "highest revision", but + * of course there's at least one bios out there which fails + * unless you pass in exactly the version it supports.. + */ + rev = (version & 0xf0) << 4 | (version & 0x0f); + obj = acpi_evaluate_dsm(handle, muid, rev, 0x00000010, &argv4); + if (!obj) { + nv_debug(mxm, "DSM MXMS failed\n"); return false; } - obj = retn.pointer; if (obj->type == ACPI_TYPE_BUFFER) { mxm->mxms = kmemdup(obj->buffer.pointer, obj->buffer.length, GFP_KERNEL); - } else - if (obj->type == ACPI_TYPE_INTEGER) { + } else if (obj->type == ACPI_TYPE_INTEGER) { nv_debug(mxm, "DSM MXMS returned 0x%llx\n", obj->integer.value); } - kfree(obj); + ACPI_FREE(obj); return mxm->mxms != NULL; } #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index d9f6120e972..83face3f608 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -1,15 +1,10 @@ #include <linux/pci.h> #include <linux/acpi.h> #include <linux/slab.h> -#include <acpi/acpi_drivers.h> -#include <acpi/acpi_bus.h> -#include <acpi/video.h> -#include <acpi/acpi.h> #include <linux/mxm-wmi.h> - #include <linux/vga_switcheroo.h> - #include <drm/drm_edid.h> +#include <acpi/video.h> #include "nouveau_drm.h" #include "nouveau_acpi.h" @@ -79,124 +74,89 @@ static const char nouveau_op_dsm_muid[] = { static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *result) { - struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; - struct acpi_object_list input; - union acpi_object params[4]; + int i; union acpi_object *obj; - int i, err; char args_buff[4]; + union acpi_object argv4 = { + .buffer.type = ACPI_TYPE_BUFFER, + .buffer.length = 4, + .buffer.pointer = args_buff + }; - input.count = 4; - input.pointer = params; - params[0].type = ACPI_TYPE_BUFFER; - params[0].buffer.length = sizeof(nouveau_op_dsm_muid); - params[0].buffer.pointer = (char *)nouveau_op_dsm_muid; - params[1].type = ACPI_TYPE_INTEGER; - params[1].integer.value = 0x00000100; - params[2].type = ACPI_TYPE_INTEGER; - params[2].integer.value = func; - params[3].type = ACPI_TYPE_BUFFER; - params[3].buffer.length = 4; /* ACPI is little endian, AABBCCDD becomes {DD,CC,BB,AA} */ for (i = 0; i < 4; i++) args_buff[i] = (arg >> i * 8) & 0xFF; - params[3].buffer.pointer = args_buff; - err = acpi_evaluate_object(handle, "_DSM", &input, &output); - if (err) { - printk(KERN_INFO "failed to evaluate _DSM: %d\n", err); - return err; - } - - obj = (union acpi_object *)output.pointer; - - if (obj->type == ACPI_TYPE_INTEGER) - if (obj->integer.value == 0x80000002) { - return -ENODEV; - } - - if (obj->type == ACPI_TYPE_BUFFER) { - if (obj->buffer.length == 4 && result) { - *result = 0; + *result = 0; + obj = acpi_evaluate_dsm_typed(handle, nouveau_op_dsm_muid, 0x00000100, + func, &argv4, ACPI_TYPE_BUFFER); + if (!obj) { + acpi_handle_info(handle, "failed to evaluate _DSM\n"); + return AE_ERROR; + } else { + if (obj->buffer.length == 4) { *result |= obj->buffer.pointer[0]; *result |= (obj->buffer.pointer[1] << 8); *result |= (obj->buffer.pointer[2] << 16); *result |= (obj->buffer.pointer[3] << 24); } + ACPI_FREE(obj); } - kfree(output.pointer); return 0; } -static int nouveau_dsm(acpi_handle handle, int func, int arg, uint32_t *result) +/* + * On some platforms, _DSM(nouveau_op_dsm_muid, func0) has special + * requirements on the fourth parameter, so a private implementation + * instead of using acpi_check_dsm(). + */ +static int nouveau_check_optimus_dsm(acpi_handle handle) { - struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; - struct acpi_object_list input; - union acpi_object params[4]; - union acpi_object *obj; - int err; - - input.count = 4; - input.pointer = params; - params[0].type = ACPI_TYPE_BUFFER; - params[0].buffer.length = sizeof(nouveau_dsm_muid); - params[0].buffer.pointer = (char *)nouveau_dsm_muid; - params[1].type = ACPI_TYPE_INTEGER; - params[1].integer.value = 0x00000102; - params[2].type = ACPI_TYPE_INTEGER; - params[2].integer.value = func; - params[3].type = ACPI_TYPE_INTEGER; - params[3].integer.value = arg; - - err = acpi_evaluate_object(handle, "_DSM", &input, &output); - if (err) { - printk(KERN_INFO "failed to evaluate _DSM: %d\n", err); - return err; - } - - obj = (union acpi_object *)output.pointer; + int result; - if (obj->type == ACPI_TYPE_INTEGER) - if (obj->integer.value == 0x80000002) - return -ENODEV; - - if (obj->type == ACPI_TYPE_BUFFER) { - if (obj->buffer.length == 4 && result) { - *result = 0; - *result |= obj->buffer.pointer[0]; - *result |= (obj->buffer.pointer[1] << 8); - *result |= (obj->buffer.pointer[2] << 16); - *result |= (obj->buffer.pointer[3] << 24); - } - } + /* + * Function 0 returns a Buffer containing available functions. + * The args parameter is ignored for function 0, so just put 0 in it + */ + if (nouveau_optimus_dsm(handle, 0, 0, &result)) + return 0; - kfree(output.pointer); - return 0; + /* + * ACPI Spec v4 9.14.1: if bit 0 is zero, no function is supported. + * If the n-th bit is enabled, function n is supported + */ + return result & 1 && result & (1 << NOUVEAU_DSM_OPTIMUS_CAPS); } -/* Returns 1 if a DSM function is usable and 0 otherwise */ -static int nouveau_test_dsm(acpi_handle test_handle, - int (*dsm_func)(acpi_handle, int, int, uint32_t *), - int sfnc) +static int nouveau_dsm(acpi_handle handle, int func, int arg) { - u32 result = 0; - - /* Function 0 returns a Buffer containing available functions. The args - * parameter is ignored for function 0, so just put 0 in it */ - if (dsm_func(test_handle, 0, 0, &result)) - return 0; + int ret = 0; + union acpi_object *obj; + union acpi_object argv4 = { + .integer.type = ACPI_TYPE_INTEGER, + .integer.value = arg, + }; + + obj = acpi_evaluate_dsm_typed(handle, nouveau_dsm_muid, 0x00000102, + func, &argv4, ACPI_TYPE_INTEGER); + if (!obj) { + acpi_handle_info(handle, "failed to evaluate _DSM\n"); + return AE_ERROR; + } else { + if (obj->integer.value == 0x80000002) + ret = -ENODEV; + ACPI_FREE(obj); + } - /* ACPI Spec v4 9.14.1: if bit 0 is zero, no function is supported. If - * the n-th bit is enabled, function n is supported */ - return result & 1 && result & (1 << sfnc); + return ret; } static int nouveau_dsm_switch_mux(acpi_handle handle, int mux_id) { mxm_wmi_call_mxmx(mux_id == NOUVEAU_DSM_LED_STAMINA ? MXM_MXDS_ADAPTER_IGD : MXM_MXDS_ADAPTER_0); mxm_wmi_call_mxds(mux_id == NOUVEAU_DSM_LED_STAMINA ? MXM_MXDS_ADAPTER_IGD : MXM_MXDS_ADAPTER_0); - return nouveau_dsm(handle, NOUVEAU_DSM_LED, mux_id, NULL); + return nouveau_dsm(handle, NOUVEAU_DSM_LED, mux_id); } static int nouveau_dsm_set_discrete_state(acpi_handle handle, enum vga_switcheroo_state state) @@ -206,7 +166,7 @@ static int nouveau_dsm_set_discrete_state(acpi_handle handle, enum vga_switchero arg = NOUVEAU_DSM_POWER_SPEED; else arg = NOUVEAU_DSM_POWER_STAMINA; - nouveau_dsm(handle, NOUVEAU_DSM_POWER, arg, NULL); + nouveau_dsm(handle, NOUVEAU_DSM_POWER, arg); return 0; } @@ -266,11 +226,11 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev) nouveau_dsm_priv.other_handle = dhandle; return false; } - if (nouveau_test_dsm(dhandle, nouveau_dsm, NOUVEAU_DSM_POWER)) + if (acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102, + 1 << NOUVEAU_DSM_POWER)) retval |= NOUVEAU_DSM_HAS_MUX; - if (nouveau_test_dsm(dhandle, nouveau_optimus_dsm, - NOUVEAU_DSM_OPTIMUS_CAPS)) + if (nouveau_check_optimus_dsm(dhandle)) retval |= NOUVEAU_DSM_HAS_OPT; if (retval & NOUVEAU_DSM_HAS_OPT) { diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 488686d490c..4aed1714b9a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1249,7 +1249,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) mem->bus.is_iomem = !dev->agp->cant_use_aperture; } #endif - if (!node->memtype) + if (nv_device(drm->device)->card_type < NV_50 || !node->memtype) /* untiled */ break; /* fallthrough, tiled memory */ diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 78c8e7146d5..89c484d8ac2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -376,6 +376,8 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) if (ret) goto fail_device; + dev->irq_enabled = true; + /* workaround an odd issue on nvc1 by disabling the device's * nosnoop capability. hopefully won't cause issues until a * better fix is found - assuming there is one... @@ -475,6 +477,7 @@ nouveau_drm_remove(struct pci_dev *pdev) struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_object *device; + dev->irq_enabled = false; device = drm->client.base.device; drm_put_dev(dev); diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c index 81638d7f2ef..471347edc27 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vga.c +++ b/drivers/gpu/drm/nouveau/nouveau_vga.c @@ -14,7 +14,9 @@ nouveau_vga_set_decode(void *priv, bool state) { struct nouveau_device *device = nouveau_dev(priv); - if (device->chipset >= 0x40) + if (device->card_type == NV_40 && device->chipset >= 0x4c) + nv_wr32(device, 0x088060, state); + else if (device->chipset >= 0x40) nv_wr32(device, 0x088054, state); else nv_wr32(device, 0x001854, state); diff --git a/drivers/gpu/drm/qxl/Kconfig b/drivers/gpu/drm/qxl/Kconfig index 66ac0ff95f5..38c2bb72e45 100644 --- a/drivers/gpu/drm/qxl/Kconfig +++ b/drivers/gpu/drm/qxl/Kconfig @@ -5,9 +5,11 @@ config DRM_QXL select FB_SYS_COPYAREA select FB_SYS_IMAGEBLIT select FB_DEFERRED_IO - select DRM_KMS_HELPER + select DRM_KMS_HELPER select DRM_KMS_FB_HELPER - select DRM_TTM + select DRM_TTM select CRC32 help - QXL virtual GPU for Spice virtualization desktop integration. Do not enable this driver unless your distro ships a corresponding X.org QXL driver that can handle kernel modesetting. + QXL virtual GPU for Spice virtualization desktop integration. + Do not enable this driver unless your distro ships a corresponding + X.org QXL driver that can handle kernel modesetting. diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 306364a1ecd..09433534dc4 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ - ci_dpm.o dce6_afmt.o + ci_dpm.o dce6_afmt.o radeon_vm.o # add async DMA block radeon-y += \ @@ -99,6 +99,12 @@ radeon-y += \ uvd_v3_1.o \ uvd_v4_2.o +# add VCE block +radeon-y += \ + radeon_vce.o \ + vce_v1_0.o \ + vce_v2_0.o \ + radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o radeon-$(CONFIG_ACPI) += radeon_acpi.o diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index a9338c85630..0d19f4f94d5 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -559,7 +559,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, u32 adjusted_clock = mode->clock; int encoder_mode = atombios_get_encoder_mode(encoder); u32 dp_clock = mode->clock; - int bpc = radeon_get_monitor_bpc(connector); + int bpc = radeon_crtc->bpc; bool is_duallink = radeon_dig_monitor_is_duallink(encoder, mode->clock); /* reset the pll flags */ @@ -1176,7 +1176,7 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, evergreen_tiling_fields(tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); /* Set NUM_BANKS. */ - if (rdev->family >= CHIP_BONAIRE) { + if (rdev->family >= CHIP_TAHITI) { unsigned tileb, index, num_banks, tile_split_bytes; /* Calculate the macrotile mode index. */ @@ -1194,13 +1194,14 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, return -EINVAL; } - num_banks = (rdev->config.cik.macrotile_mode_array[index] >> 6) & 0x3; + if (rdev->family >= CHIP_BONAIRE) + num_banks = (rdev->config.cik.macrotile_mode_array[index] >> 6) & 0x3; + else + num_banks = (rdev->config.si.tile_mode_array[index] >> 20) & 0x3; fb_format |= EVERGREEN_GRPH_NUM_BANKS(num_banks); } else { - /* SI and older. */ - if (rdev->family >= CHIP_TAHITI) - tmp = rdev->config.si.tile_config; - else if (rdev->family >= CHIP_CAYMAN) + /* NI and older. */ + if (rdev->family >= CHIP_CAYMAN) tmp = rdev->config.cayman.tile_config; else tmp = rdev->config.evergreen.tile_config; diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index a42d61571f4..2cec2ab02f8 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -464,11 +464,12 @@ atombios_tv_setup(struct drm_encoder *encoder, int action) static u8 radeon_atom_get_bpc(struct drm_encoder *encoder) { - struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); int bpc = 8; - if (connector) - bpc = radeon_get_monitor_bpc(connector); + if (encoder->crtc) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc); + bpc = radeon_crtc->bpc; + } switch (bpc) { case 0: diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c index 0fbd36f3d4e..f81d7ca134d 100644 --- a/drivers/gpu/drm/radeon/btc_dpm.c +++ b/drivers/gpu/drm/radeon/btc_dpm.c @@ -29,6 +29,7 @@ #include "cypress_dpm.h" #include "btc_dpm.h" #include "atom.h" +#include <linux/seq_file.h> #define MC_CG_ARB_FREQ_F0 0x0a #define MC_CG_ARB_FREQ_F1 0x0b @@ -2600,6 +2601,10 @@ int btc_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv7xx_parse_power_table(rdev); if (ret) return ret; @@ -2756,6 +2761,37 @@ void btc_dpm_fini(struct radeon_device *rdev) r600_free_extended_power_table(rdev); } +void btc_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, + struct seq_file *m) +{ + struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev); + struct radeon_ps *rps = &eg_pi->current_rps; + struct rv7xx_ps *ps = rv770_get_ps(rps); + struct rv7xx_pl *pl; + u32 current_index = + (RREG32(TARGET_AND_CURRENT_PROFILE_INDEX) & CURRENT_PROFILE_INDEX_MASK) >> + CURRENT_PROFILE_INDEX_SHIFT; + + if (current_index > 2) { + seq_printf(m, "invalid dpm profile %d\n", current_index); + } else { + if (current_index == 0) + pl = &ps->low; + else if (current_index == 1) + pl = &ps->medium; + else /* current_index == 2 */ + pl = &ps->high; + seq_printf(m, "uvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); + if (rdev->family >= CHIP_CEDAR) { + seq_printf(m, "power level %d sclk: %u mclk: %u vddc: %u vddci: %u\n", + current_index, pl->sclk, pl->mclk, pl->vddc, pl->vddci); + } else { + seq_printf(m, "power level %d sclk: %u mclk: %u vddc: %u\n", + current_index, pl->sclk, pl->mclk, pl->vddc); + } + } +} + u32 btc_dpm_get_sclk(struct radeon_device *rdev, bool low) { struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev); diff --git a/drivers/gpu/drm/radeon/btcd.h b/drivers/gpu/drm/radeon/btcd.h index 29e32de7e02..9c65be2d55a 100644 --- a/drivers/gpu/drm/radeon/btcd.h +++ b/drivers/gpu/drm/radeon/btcd.h @@ -44,6 +44,10 @@ # define DYN_SPREAD_SPECTRUM_EN (1 << 23) # define AC_DC_SW (1 << 24) +#define TARGET_AND_CURRENT_PROFILE_INDEX 0x66c +# define CURRENT_PROFILE_INDEX_MASK (0xf << 4) +# define CURRENT_PROFILE_INDEX_SHIFT 4 + #define CG_BIF_REQ_AND_RSP 0x7f4 #define CG_CLIENT_REQ(x) ((x) << 0) #define CG_CLIENT_REQ_MASK (0xff << 0) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 8d49104ca6c..cad89a97752 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -172,6 +172,8 @@ extern void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev, extern void cik_enter_rlc_safe_mode(struct radeon_device *rdev); extern void cik_exit_rlc_safe_mode(struct radeon_device *rdev); extern int ci_mc_load_microcode(struct radeon_device *rdev); +extern void cik_update_cg(struct radeon_device *rdev, + u32 block, bool enable); static int ci_get_std_voltage_value_sidd(struct radeon_device *rdev, struct atom_voltage_table_entry *voltage_table, @@ -746,6 +748,14 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev, u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; int i; + if (rps->vce_active) { + rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; + rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; + } else { + rps->evclk = 0; + rps->ecclk = 0; + } + if ((rdev->pm.dpm.new_active_crtc_count > 1) || ci_dpm_vblank_too_short(rdev)) disable_mclk_switching = true; @@ -804,6 +814,13 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev, sclk = ps->performance_levels[0].sclk; } + if (rps->vce_active) { + if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) + sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; + if (mclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk) + mclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk; + } + ps->performance_levels[0].sclk = sclk; ps->performance_levels[0].mclk = mclk; @@ -3468,7 +3485,6 @@ static int ci_enable_uvd_dpm(struct radeon_device *rdev, bool enable) 0 : -EINVAL; } -#if 0 static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable) { struct ci_power_info *pi = ci_get_pi(rdev); @@ -3501,6 +3517,7 @@ static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable) 0 : -EINVAL; } +#if 0 static int ci_enable_samu_dpm(struct radeon_device *rdev, bool enable) { struct ci_power_info *pi = ci_get_pi(rdev); @@ -3587,7 +3604,6 @@ static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate) return ci_enable_uvd_dpm(rdev, !gate); } -#if 0 static u8 ci_get_vce_boot_level(struct radeon_device *rdev) { u8 i; @@ -3608,15 +3624,15 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, struct radeon_ps *radeon_current_state) { struct ci_power_info *pi = ci_get_pi(rdev); - bool new_vce_clock_non_zero = (radeon_new_state->evclk != 0); - bool old_vce_clock_non_zero = (radeon_current_state->evclk != 0); int ret = 0; u32 tmp; - if (new_vce_clock_non_zero != old_vce_clock_non_zero) { - if (new_vce_clock_non_zero) { - pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); + if (radeon_current_state->evclk != radeon_new_state->evclk) { + if (radeon_new_state->evclk) { + /* turn the clocks on when encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false); + pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); tmp = RREG32_SMC(DPM_TABLE_475); tmp &= ~VceBootLevel_MASK; tmp |= VceBootLevel(pi->smc_state_table.VceBootLevel); @@ -3624,12 +3640,16 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, ret = ci_enable_vce_dpm(rdev, true); } else { + /* turn the clocks off when not encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true); + ret = ci_enable_vce_dpm(rdev, false); } } return ret; } +#if 0 static int ci_update_samu_dpm(struct radeon_device *rdev, bool gate) { return ci_enable_samu_dpm(rdev, gate); @@ -4752,13 +4772,13 @@ int ci_dpm_set_power_state(struct radeon_device *rdev) DRM_ERROR("ci_generate_dpm_level_enable_mask failed\n"); return ret; } -#if 0 + ret = ci_update_vce_dpm(rdev, new_ps, old_ps); if (ret) { DRM_ERROR("ci_update_vce_dpm failed\n"); return ret; } -#endif + ret = ci_update_sclk_t(rdev); if (ret) { DRM_ERROR("ci_update_sclk_t failed\n"); @@ -4959,9 +4979,6 @@ static int ci_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -4998,6 +5015,21 @@ static int ci_parse_power_table(struct radeon_device *rdev) power_state_offset += 2 + power_state->v2.ucNumDPMLevels; } rdev->pm.dpm.num_ps = state_array->ucNumEntries; + + /* fill in the vce power states */ + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { + u32 sclk, mclk; + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + sclk = le16_to_cpu(clock_info->ci.usEngineClockLow); + sclk |= clock_info->ci.ucEngineClockHigh << 16; + mclk = le16_to_cpu(clock_info->ci.usMemoryClockLow); + mclk |= clock_info->ci.ucMemoryClockHigh << 16; + rdev->pm.dpm.vce_states[i].sclk = sclk; + rdev->pm.dpm.vce_states[i].mclk = mclk; + } + return 0; } @@ -5077,17 +5109,25 @@ int ci_dpm_init(struct radeon_device *rdev) ci_dpm_fini(rdev); return ret; } - ret = ci_parse_power_table(rdev); + + ret = r600_get_platform_caps(rdev); if (ret) { ci_dpm_fini(rdev); return ret; } + ret = r600_parse_extended_power_table(rdev); if (ret) { ci_dpm_fini(rdev); return ret; } + ret = ci_parse_power_table(rdev); + if (ret) { + ci_dpm_fini(rdev); + return ret; + } + pi->dll_default_on = false; pi->sram_end = SMC_RAM_END; @@ -5120,6 +5160,7 @@ int ci_dpm_init(struct radeon_device *rdev) pi->caps_sclk_throttle_low_notification = false; pi->caps_uvd_dpm = true; + pi->caps_vce_dpm = true; ci_get_leakage_voltages(rdev); ci_patch_dependency_tables_with_leakage(rdev); diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e6419ca7cd3..92e38b54efb 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -75,6 +75,7 @@ extern void si_init_uvd_internal_cg(struct radeon_device *rdev); extern int cik_sdma_resume(struct radeon_device *rdev); extern void cik_sdma_enable(struct radeon_device *rdev, bool enable); extern void cik_sdma_fini(struct radeon_device *rdev); +extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable); static void cik_rlc_stop(struct radeon_device *rdev); static void cik_pcie_gen3_enable(struct radeon_device *rdev); static void cik_program_aspm(struct radeon_device *rdev); @@ -4030,8 +4031,6 @@ static int cik_cp_gfx_resume(struct radeon_device *rdev) WREG32(CP_RB0_BASE, rb_addr); WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); - ring->rptr = RREG32(CP_RB0_RPTR); - /* start the ring */ cik_cp_gfx_start(rdev); rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; @@ -4586,8 +4585,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) rdev->ring[idx].wptr = 0; mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR); - mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr; + mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR); /* set the vmid for the queue */ mqd->queue_state.cp_hqd_vmid = 0; @@ -5117,11 +5115,9 @@ bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) if (!(reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } @@ -6141,6 +6137,10 @@ void cik_update_cg(struct radeon_device *rdev, cik_enable_hdp_mgcg(rdev, enable); cik_enable_hdp_ls(rdev, enable); } + + if (block & RADEON_CG_BLOCK_VCE) { + vce_v2_0_enable_mgcg(rdev, enable); + } } static void cik_init_cg(struct radeon_device *rdev) @@ -7490,6 +7490,20 @@ restart_ih: /* reset addr and status */ WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); break; + case 167: /* VCE */ + DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data); + switch (src_data) { + case 0: + radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX); + break; + case 1: + radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); + break; + } + break; case 176: /* GFX RB CP_INT */ case 177: /* GFX IB CP_INT */ radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); @@ -7789,6 +7803,22 @@ static int cik_startup(struct radeon_device *rdev) if (r) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + r = radeon_vce_resume(rdev); + if (!r) { + r = vce_v2_0_resume(rdev); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE1_INDEX); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE2_INDEX); + } + if (r) { + dev_err(rdev->dev, "VCE init error (%d).\n", r); + rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0; + rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0; + } + /* Enable IRQ */ if (!rdev->irq.installed) { r = radeon_irq_kms_init(rdev); @@ -7864,6 +7894,23 @@ static int cik_startup(struct radeon_device *rdev) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } + r = -ENOENT; + + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + VCE_CMD_NO_OP); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + VCE_CMD_NO_OP); + + if (!r) + r = vce_v1_0_init(rdev); + else if (r != -ENOENT) + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -7934,6 +7981,7 @@ int cik_suspend(struct radeon_device *rdev) cik_sdma_enable(rdev, false); uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); + radeon_vce_suspend(rdev); cik_fini_pg(rdev); cik_fini_cg(rdev); cik_irq_suspend(rdev); @@ -8066,6 +8114,17 @@ int cik_init(struct radeon_device *rdev) r600_ring_init(rdev, ring, 4096); } + r = radeon_vce_init(rdev); + if (!r) { + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -8127,6 +8186,7 @@ void cik_fini(struct radeon_device *rdev) radeon_irq_kms_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); + radeon_vce_fini(rdev); cik_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); @@ -8865,6 +8925,41 @@ int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) return r; } +int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk) +{ + int r, i; + struct atom_clock_dividers dividers; + u32 tmp; + + r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + ecclk, false, ÷rs); + if (r) + return r; + + for (i = 0; i < 100; i++) { + if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + tmp = RREG32_SMC(CG_ECLK_CNTL); + tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK); + tmp |= dividers.post_divider; + WREG32_SMC(CG_ECLK_CNTL, tmp); + + for (i = 0; i < 100; i++) { + if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + return 0; +} + static void cik_pcie_gen3_enable(struct radeon_device *rdev) { struct pci_dev *root = rdev->pdev->bus->self; diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 1ecb3f1070e..00150ac49cd 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -362,8 +362,6 @@ static int cik_sdma_gfx_resume(struct radeon_device *rdev) ring->wptr = 0; WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2); - ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2; - /* enable DMA RB */ WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE); @@ -713,11 +711,9 @@ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) mask = RADEON_RESET_DMA1; if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 98bae9d7b74..213873270d5 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -203,6 +203,12 @@ #define CTF_TEMP_MASK 0x0003fe00 #define CTF_TEMP_SHIFT 9 +#define CG_ECLK_CNTL 0xC05000AC +# define ECLK_DIVIDER_MASK 0x7f +# define ECLK_DIR_CNTL_EN (1 << 8) +#define CG_ECLK_STATUS 0xC05000B0 +# define ECLK_STATUS (1 << 0) + #define CG_SPLL_FUNC_CNTL 0xC0500140 #define SPLL_RESET (1 << 0) #define SPLL_PWRON (1 << 1) @@ -2010,4 +2016,47 @@ /* UVD CTX indirect */ #define UVD_CGC_MEM_CTRL 0xC0 +/* VCE */ + +#define VCE_VCPU_CACHE_OFFSET0 0x20024 +#define VCE_VCPU_CACHE_SIZE0 0x20028 +#define VCE_VCPU_CACHE_OFFSET1 0x2002c +#define VCE_VCPU_CACHE_SIZE1 0x20030 +#define VCE_VCPU_CACHE_OFFSET2 0x20034 +#define VCE_VCPU_CACHE_SIZE2 0x20038 +#define VCE_RB_RPTR2 0x20178 +#define VCE_RB_WPTR2 0x2017c +#define VCE_RB_RPTR 0x2018c +#define VCE_RB_WPTR 0x20190 +#define VCE_CLOCK_GATING_A 0x202f8 +# define CGC_CLK_GATE_DLY_TIMER_MASK (0xf << 0) +# define CGC_CLK_GATE_DLY_TIMER(x) ((x) << 0) +# define CGC_CLK_GATER_OFF_DLY_TIMER_MASK (0xff << 4) +# define CGC_CLK_GATER_OFF_DLY_TIMER(x) ((x) << 4) +# define CGC_UENC_WAIT_AWAKE (1 << 18) +#define VCE_CLOCK_GATING_B 0x202fc +#define VCE_CGTT_CLK_OVERRIDE 0x207a0 +#define VCE_UENC_CLOCK_GATING 0x207bc +# define CLOCK_ON_DELAY_MASK (0xf << 0) +# define CLOCK_ON_DELAY(x) ((x) << 0) +# define CLOCK_OFF_DELAY_MASK (0xff << 4) +# define CLOCK_OFF_DELAY(x) ((x) << 4) +#define VCE_UENC_REG_CLOCK_GATING 0x207c0 +#define VCE_SYS_INT_EN 0x21300 +# define VCE_SYS_INT_TRAP_INTERRUPT_EN (1 << 3) +#define VCE_LMI_CTRL2 0x21474 +#define VCE_LMI_CTRL 0x21498 +#define VCE_LMI_VM_CTRL 0x214a0 +#define VCE_LMI_SWAP_CNTL 0x214b4 +#define VCE_LMI_SWAP_CNTL1 0x214b8 +#define VCE_LMI_CACHE_CTRL 0x214f4 + +#define VCE_CMD_NO_OP 0x00000000 +#define VCE_CMD_END 0x00000001 +#define VCE_CMD_IB 0x00000002 +#define VCE_CMD_FENCE 0x00000003 +#define VCE_CMD_TRAP 0x00000004 +#define VCE_CMD_IB_AUTO 0x00000005 +#define VCE_CMD_SEMAPHORE 0x00000006 + #endif diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c index cf783fc0ef2..5a9a5f4d788 100644 --- a/drivers/gpu/drm/radeon/cypress_dpm.c +++ b/drivers/gpu/drm/radeon/cypress_dpm.c @@ -2036,6 +2036,10 @@ int cypress_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv7xx_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index f2b9e21ce4d..b1f1253e2ce 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1680,7 +1680,7 @@ bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd) case RADEON_HPD_6: if (RREG32(DC_HPD6_INT_STATUS) & DC_HPDx_SENSE) connected = true; - break; + break; default: break; } @@ -2990,8 +2990,6 @@ static int evergreen_cp_resume(struct radeon_device *rdev) WREG32(CP_RB_BASE, ring->gpu_addr >> 8); WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); - ring->rptr = RREG32(CP_RB_RPTR); - evergreen_cp_start(rdev); ring->ready = true; r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring); @@ -3952,11 +3950,9 @@ bool evergreen_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin if (!(reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index c7cac07f139..5c8b358f9fb 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -1165,7 +1165,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case DB_DEPTH_CONTROL: track->db_depth_control = radeon_get_ib_value(p, idx); @@ -1196,12 +1196,12 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } ib[idx] &= ~Z_ARRAY_MODE(0xf); track->db_z_info &= ~Z_ARRAY_MODE(0xf); - ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); @@ -1237,7 +1237,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->db_z_read_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_z_read_bo = reloc->robj; track->db_dirty = true; break; @@ -1249,7 +1249,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->db_z_write_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_z_write_bo = reloc->robj; track->db_dirty = true; break; @@ -1261,7 +1261,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->db_s_read_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_s_read_bo = reloc->robj; track->db_dirty = true; break; @@ -1273,7 +1273,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->db_s_write_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_s_write_bo = reloc->robj; track->db_dirty = true; break; @@ -1297,7 +1297,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->vgt_strmout_bo[tmp] = reloc->robj; track->streamout_dirty = true; break; @@ -1317,7 +1317,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); case CB_TARGET_MASK: track->cb_target_mask = radeon_get_ib_value(p, idx); track->cb_dirty = true; @@ -1381,8 +1381,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); + ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); } track->cb_dirty = true; break; @@ -1399,8 +1399,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); + ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); } track->cb_dirty = true; break; @@ -1461,10 +1461,10 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); @@ -1489,10 +1489,10 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); @@ -1520,7 +1520,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_fmask_bo[tmp] = reloc->robj; break; case CB_COLOR0_CMASK: @@ -1537,7 +1537,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_cmask_bo[tmp] = reloc->robj; break; case CB_COLOR0_FMASK_SLICE: @@ -1578,7 +1578,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = (reg - CB_COLOR0_BASE) / 0x3c; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_bo[tmp] = reloc->robj; track->cb_dirty = true; break; @@ -1594,7 +1594,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_bo[tmp] = reloc->robj; track->cb_dirty = true; break; @@ -1606,7 +1606,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->htile_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->htile_bo = reloc->robj; track->db_dirty = true; break; @@ -1723,7 +1723,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MEMORY_EXPORT_BASE: if (p->rdev->family >= CHIP_CAYMAN) { @@ -1737,7 +1737,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case CAYMAN_SX_SCATTER_EXPORT_BASE: if (p->rdev->family < CHIP_CAYMAN) { @@ -1751,7 +1751,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MISC: track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; @@ -1836,7 +1836,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (idx_value & 0xfffffff0) + ((u64)(tmp & 0xff) << 32); @@ -1882,7 +1882,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + idx_value + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); @@ -1909,7 +1909,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + idx_value + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); @@ -1937,7 +1937,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + radeon_get_ib_value(p, idx+1) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -2027,7 +2027,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad DISPATCH_INDIRECT\n"); return -EINVAL; } - ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff); r = evergreen_cs_track_check(p); if (r) { dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); @@ -2049,7 +2049,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -2106,7 +2106,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, tmp = radeon_get_ib_value(p, idx) + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); - offset = reloc->lobj.gpu_offset + tmp; + offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", @@ -2144,7 +2144,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, tmp = radeon_get_ib_value(p, idx+2) + ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); - offset = reloc->lobj.gpu_offset + tmp; + offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", @@ -2174,7 +2174,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad SURFACE_SYNC\n"); return -EINVAL; } - ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); } break; case PACKET3_EVENT_WRITE: @@ -2190,7 +2190,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad EVENT_WRITE\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -2212,7 +2212,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -2234,7 +2234,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -2302,11 +2302,11 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { ib[idx+1+(i*8)+1] |= - TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split); @@ -2318,7 +2318,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } } texture = reloc->robj; - toffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); /* tex mip base */ tex_dim = ib[idx+1+(i*8)+0] & 0x7; @@ -2337,7 +2337,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad SET_RESOURCE (tex)\n"); return -EINVAL; } - moffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); mipmap = reloc->robj; } @@ -2364,7 +2364,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset; } - offset64 = reloc->lobj.gpu_offset + offset; + offset64 = reloc->gpu_offset + offset; ib[idx+1+(i*8)+0] = offset64; ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | (upper_32_bits(offset64) & 0xff); @@ -2445,7 +2445,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+1] = offset; ib[idx+2] = upper_32_bits(offset) & 0xff; } @@ -2464,7 +2464,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+3] = offset; ib[idx+4] = upper_32_bits(offset) & 0xff; } @@ -2493,7 +2493,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+0] = offset; ib[idx+1] = upper_32_bits(offset) & 0xff; break; @@ -2518,7 +2518,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+1] = offset; ib[idx+2] = upper_32_bits(offset) & 0xff; } else { @@ -2542,7 +2542,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+3] = offset; ib[idx+4] = upper_32_bits(offset) & 0xff; } else { @@ -2717,7 +2717,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); p->idx += count + 7; break; /* linear */ @@ -2725,8 +2725,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; p->idx += count + 3; break; default: @@ -2768,10 +2768,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 5; break; /* Copy L2T/T2L */ @@ -2781,22 +2781,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* tiled src, linear dst */ src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); dst_offset = radeon_get_ib_value(p, idx + 7); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; } else { /* linear src, tiled dst */ src_offset = radeon_get_ib_value(p, idx+7); src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n", @@ -2827,10 +2827,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset + count, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff); + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff); + ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 5; break; /* Copy L2L, partial */ @@ -2840,10 +2840,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("L2L Partial is cayman only !\n"); return -EINVAL; } - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff); + ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff; + ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff); + ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; p->idx += 9; break; @@ -2876,12 +2876,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff; - ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; + ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff; + ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 7; break; /* Copy L2T Frame to Field */ @@ -2916,10 +2916,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 10; break; /* Copy L2T/T2L, partial */ @@ -2932,16 +2932,16 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* detile bit */ if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { /* tiled src, linear dst */ - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; } else { /* linear src, tiled dst */ - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } p->idx += 12; break; @@ -2978,10 +2978,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 10; break; /* Copy L2T/T2L (tile units) */ @@ -2992,22 +2992,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* tiled src, linear dst */ src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); dst_offset = radeon_get_ib_value(p, idx+7); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; } else { /* linear src, tiled dst */ src_offset = radeon_get_ib_value(p, idx+7); src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", @@ -3028,8 +3028,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("L2T, T2L Partial is cayman only !\n"); return -EINVAL; } - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); - ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); + ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8); p->idx += 13; break; /* Copy L2T broadcast (tile units) */ @@ -3065,10 +3065,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 10; break; default: @@ -3089,8 +3089,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000; p->idx += 4; break; case DMA_PACKET_NOP: diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index a37b5443638..287fe966d7d 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -174,11 +174,9 @@ bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin u32 reset_mask = evergreen_gpu_check_soft_reset(rdev); if (!(reset_mask & RADEON_RESET_DMA)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index b6e01d5d2cc..16ec9d56a23 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -1223,7 +1223,7 @@ int kv_dpm_enable(struct radeon_device *rdev) int kv_dpm_late_enable(struct radeon_device *rdev) { - int ret; + int ret = 0; if (rdev->irq.installed && r600_is_internal_thermal_sensor(rdev->pm.int_thermal_type)) { @@ -1338,13 +1338,11 @@ static int kv_enable_uvd_dpm(struct radeon_device *rdev, bool enable) PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable); } -#if 0 static int kv_enable_vce_dpm(struct radeon_device *rdev, bool enable) { return kv_notify_message_to_smu(rdev, enable ? PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable); } -#endif static int kv_enable_samu_dpm(struct radeon_device *rdev, bool enable) { @@ -1389,7 +1387,6 @@ static int kv_update_uvd_dpm(struct radeon_device *rdev, bool gate) return kv_enable_uvd_dpm(rdev, !gate); } -#if 0 static u8 kv_get_vce_boot_level(struct radeon_device *rdev) { u8 i; @@ -1414,6 +1411,9 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, int ret; if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { + kv_dpm_powergate_vce(rdev, false); + /* turn the clocks on when encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false); if (pi->caps_stable_p_state) pi->vce_boot_level = table->count - 1; else @@ -1436,11 +1436,13 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, kv_enable_vce_dpm(rdev, true); } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { kv_enable_vce_dpm(rdev, false); + /* turn the clocks off when not encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true); + kv_dpm_powergate_vce(rdev, true); } return 0; } -#endif static int kv_update_samu_dpm(struct radeon_device *rdev, bool gate) { @@ -1575,11 +1577,16 @@ static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate) pi->vce_power_gated = gate; if (gate) { - if (pi->caps_vce_pg) + if (pi->caps_vce_pg) { + /* XXX do we need a vce_v1_0_stop() ? */ kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerOFF); + } } else { - if (pi->caps_vce_pg) + if (pi->caps_vce_pg) { kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerON); + vce_v2_0_resume(rdev); + vce_v1_0_start(rdev); + } } } @@ -1768,7 +1775,7 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) { struct kv_power_info *pi = kv_get_pi(rdev); struct radeon_ps *new_ps = &pi->requested_rps; - /*struct radeon_ps *old_ps = &pi->current_rps;*/ + struct radeon_ps *old_ps = &pi->current_rps; int ret; if (pi->bapm_enable) { @@ -1798,13 +1805,12 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) kv_set_enabled_levels(rdev); kv_force_lowest_valid(rdev); kv_unforce_levels(rdev); -#if 0 + ret = kv_update_vce_dpm(rdev, new_ps, old_ps); if (ret) { DRM_ERROR("kv_update_vce_dpm failed\n"); return ret; } -#endif kv_update_sclk_t(rdev); } } else { @@ -1823,13 +1829,11 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) kv_program_nbps_index_settings(rdev, new_ps); kv_freeze_sclk_dpm(rdev, false); kv_set_enabled_levels(rdev); -#if 0 ret = kv_update_vce_dpm(rdev, new_ps, old_ps); if (ret) { DRM_ERROR("kv_update_vce_dpm failed\n"); return ret; } -#endif kv_update_acp_boot_level(rdev); kv_update_sclk_t(rdev); kv_enable_nb_dpm(rdev); @@ -2037,6 +2041,14 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_clock_and_voltage_limits *max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + if (new_rps->vce_active) { + new_rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; + new_rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; + } else { + new_rps->evclk = 0; + new_rps->ecclk = 0; + } + mclk = max_limits->mclk; sclk = min_sclk; @@ -2056,6 +2068,11 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, sclk = stable_p_state_sclk; } + if (new_rps->vce_active) { + if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) + sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; + } + ps->need_dfs_bypass = true; for (i = 0; i < ps->num_levels; i++) { @@ -2092,7 +2109,8 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, } } - pi->video_start = new_rps->dclk || new_rps->vclk; + pi->video_start = new_rps->dclk || new_rps->vclk || + new_rps->evclk || new_rps->ecclk; if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) @@ -2538,9 +2556,6 @@ static int kv_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -2577,6 +2592,19 @@ static int kv_parse_power_table(struct radeon_device *rdev) power_state_offset += 2 + power_state->v2.ucNumDPMLevels; } rdev->pm.dpm.num_ps = state_array->ucNumEntries; + + /* fill in the vce power states */ + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { + u32 sclk; + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow); + sclk |= clock_info->sumo.ucEngineClockHigh << 16; + rdev->pm.dpm.vce_states[i].sclk = sclk; + rdev->pm.dpm.vce_states[i].mclk = 0; + } + return 0; } @@ -2590,6 +2618,10 @@ int kv_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = r600_parse_extended_power_table(rdev); if (ret) return ret; @@ -2623,7 +2655,7 @@ int kv_dpm_init(struct radeon_device *rdev) pi->caps_fps = false; /* true? */ pi->caps_uvd_pg = true; pi->caps_uvd_dpm = true; - pi->caps_vce_pg = false; + pi->caps_vce_pg = false; /* XXX true */ pi->caps_samu_pg = false; pi->caps_acp_pg = false; pi->caps_stable_p_state = false; diff --git a/drivers/gpu/drm/radeon/mkregtable.c b/drivers/gpu/drm/radeon/mkregtable.c index af85299f212..4a85bb644e2 100644 --- a/drivers/gpu/drm/radeon/mkregtable.c +++ b/drivers/gpu/drm/radeon/mkregtable.c @@ -655,7 +655,7 @@ static int parser_auth(struct table *t, const char *filename) /* first line will contain the last register * and gpu name */ - sscanf(buf, "%s %s", gpu_name, last_reg_s); + sscanf(buf, "%9s %9s", gpu_name, last_reg_s); t->gpu_prefix = gpu_name; last_reg = strtol(last_reg_s, NULL, 16); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index ea932ac66fc..85168ecd216 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1642,8 +1642,8 @@ static int cayman_cp_resume(struct radeon_device *rdev) ring = &rdev->ring[ridx[i]]; WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA); - ring->rptr = ring->wptr = 0; - WREG32(cp_rb_rptr[i], ring->rptr); + ring->wptr = 0; + WREG32(cp_rb_rptr[i], 0); WREG32(cp_rb_wptr[i], ring->wptr); mdelay(1); @@ -1917,11 +1917,9 @@ bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) if (!(reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 7cf96b15377..6378e027669 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -248,8 +248,6 @@ int cayman_dma_resume(struct radeon_device *rdev) ring->wptr = 0; WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); - ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2; - WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); ring->ready = true; @@ -302,11 +300,9 @@ bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) mask = RADEON_RESET_DMA1; if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index c351226ecb3..004c931606c 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -2588,7 +2588,7 @@ static int ni_populate_sq_ramping_values(struct radeon_device *rdev, if (NISLANDS_DPM2_SQ_RAMP_STI_SIZE > (STI_SIZE_MASK >> STI_SIZE_SHIFT)) enable_sq_ramping = false; - if (NISLANDS_DPM2_SQ_RAMP_LTI_RATIO <= (LTI_RATIO_MASK >> LTI_RATIO_SHIFT)) + if (NISLANDS_DPM2_SQ_RAMP_LTI_RATIO > (LTI_RATIO_MASK >> LTI_RATIO_SHIFT)) enable_sq_ramping = false; for (i = 0; i < state->performance_level_count; i++) { @@ -3945,7 +3945,6 @@ static void ni_parse_pplib_clock_info(struct radeon_device *rdev, struct rv7xx_power_info *pi = rv770_get_pi(rdev); struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev); struct ni_ps *ps = ni_get_ps(rps); - u16 vddc; struct rv7xx_pl *pl = &ps->performance_levels[index]; ps->performance_level_count = index + 1; @@ -3961,8 +3960,8 @@ static void ni_parse_pplib_clock_info(struct radeon_device *rdev, /* patch up vddc if necessary */ if (pl->vddc == 0xff01) { - if (radeon_atom_get_max_vddc(rdev, 0, 0, &vddc) == 0) - pl->vddc = vddc; + if (pi->max_vddc) + pl->vddc = pi->max_vddc; } if (rps->class & ATOM_PPLIB_CLASSIFICATION_ACPI) { @@ -4026,9 +4025,6 @@ static int ni_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -4090,6 +4086,10 @@ int ni_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = ni_parse_power_table(rdev); if (ret) return ret; @@ -4322,7 +4322,8 @@ void ni_dpm_print_power_state(struct radeon_device *rdev, void ni_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, struct seq_file *m) { - struct radeon_ps *rps = rdev->pm.dpm.current_ps; + struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev); + struct radeon_ps *rps = &eg_pi->current_rps; struct ni_ps *ps = ni_get_ps(rps); struct rv7xx_pl *pl; u32 current_index = diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index ef024ce3f7c..0a894aee740 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1193,7 +1193,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) WREG32(RADEON_CP_RB_CNTL, tmp); udelay(10); - ring->rptr = RREG32(RADEON_CP_RB_RPTR); /* Set cp mode to bus mastering & enable cp*/ WREG32(RADEON_CP_CSQ_MODE, REG_SET(RADEON_INDIRECT2_START, indirect2_start) | @@ -1275,12 +1274,12 @@ int r100_reloc_pitch_offset(struct radeon_cs_parser *p, value = radeon_get_ib_value(p, idx); tmp = value & 0x003fffff; - tmp += (((u32)reloc->lobj.gpu_offset) >> 10); + tmp += (((u32)reloc->gpu_offset) >> 10); if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= RADEON_DST_TILE_MACRO; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + if (reloc->tiling_flags & RADEON_TILING_MICRO) { if (reg == RADEON_SRC_PITCH_OFFSET) { DRM_ERROR("Cannot src blit from microtiled surface\n"); radeon_cs_dump_packet(p, pkt); @@ -1326,7 +1325,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, return r; } idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset); track->arrays[i + 0].esize = idx_value >> 8; track->arrays[i + 0].robj = reloc->robj; @@ -1338,7 +1337,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); + ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset); track->arrays[i + 1].robj = reloc->robj; track->arrays[i + 1].esize = idx_value >> 24; track->arrays[i + 1].esize &= 0x7F; @@ -1352,7 +1351,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, return r; } idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset); track->arrays[i + 0].robj = reloc->robj; track->arrays[i + 0].esize = idx_value >> 8; track->arrays[i + 0].esize &= 0x7F; @@ -1595,7 +1594,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->zb.robj = reloc->robj; track->zb.offset = idx_value; track->zb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case RADEON_RB3D_COLOROFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); @@ -1608,7 +1607,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->cb[0].robj = reloc->robj; track->cb[0].offset = idx_value; track->cb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case RADEON_PP_TXOFFSET_0: case RADEON_PP_TXOFFSET_1: @@ -1622,16 +1621,16 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= RADEON_TXO_MACRO_TILE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= RADEON_TXO_MICRO_TILE_X2; tmp = idx_value & ~(0x7 << 2); tmp |= tile_flags; - ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset); + ib[idx] = tmp + ((u32)reloc->gpu_offset); } else - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[i].robj = reloc->robj; track->tex_dirty = true; break; @@ -1649,7 +1648,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } track->textures[0].cube_info[i].offset = idx_value; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[0].cube_info[i].robj = reloc->robj; track->tex_dirty = true; break; @@ -1667,7 +1666,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } track->textures[1].cube_info[i].offset = idx_value; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[1].cube_info[i].robj = reloc->robj; track->tex_dirty = true; break; @@ -1685,7 +1684,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } track->textures[2].cube_info[i].offset = idx_value; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[2].cube_info[i].robj = reloc->robj; track->tex_dirty = true; break; @@ -1703,9 +1702,9 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= RADEON_COLOR_TILE_ENABLE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; tmp = idx_value & ~(0x7 << 16); @@ -1773,7 +1772,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case RADEON_PP_CNTL: { @@ -1933,7 +1932,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset); + ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset); r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); if (r) { return r; @@ -1947,7 +1946,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset); + ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset); track->num_arrays = 1; track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2)); @@ -2523,11 +2522,9 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) rbbm_status = RREG32(R_000E40_RBBM_STATUS); if (!G_000E40_GUI_ACTIVE(rbbm_status)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index b3807edb193..58f0473aa73 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -185,7 +185,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->zb.robj = reloc->robj; track->zb.offset = idx_value; track->zb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case RADEON_RB3D_COLOROFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); @@ -198,7 +198,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->cb[0].robj = reloc->robj; track->cb[0].offset = idx_value; track->cb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case R200_PP_TXOFFSET_0: case R200_PP_TXOFFSET_1: @@ -215,16 +215,16 @@ int r200_packet0_check(struct radeon_cs_parser *p, return r; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= R200_TXO_MACRO_TILE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= R200_TXO_MICRO_TILE; tmp = idx_value & ~(0x7 << 2); tmp |= tile_flags; - ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset); + ib[idx] = tmp + ((u32)reloc->gpu_offset); } else - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[i].robj = reloc->robj; track->tex_dirty = true; break; @@ -268,7 +268,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, return r; } track->textures[i].cube_info[face - 1].offset = idx_value; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[i].cube_info[face - 1].robj = reloc->robj; track->tex_dirty = true; break; @@ -287,9 +287,9 @@ int r200_packet0_check(struct radeon_cs_parser *p, } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= RADEON_COLOR_TILE_ENABLE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; tmp = idx_value & ~(0x7 << 16); @@ -362,7 +362,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case RADEON_PP_CNTL: { diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 7c63ef840e8..41cdf236ee9 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -640,7 +640,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->cb[i].robj = reloc->robj; track->cb[i].offset = idx_value; track->cb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case R300_ZB_DEPTHOFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); @@ -653,7 +653,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->zb.robj = reloc->robj; track->zb.offset = idx_value; track->zb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case R300_TX_OFFSET_0: case R300_TX_OFFSET_0+4: @@ -682,16 +682,16 @@ static int r300_packet0_check(struct radeon_cs_parser *p, if (p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) { ib[idx] = (idx_value & 31) | /* keep the 1st 5 bits */ - ((idx_value & ~31) + (u32)reloc->lobj.gpu_offset); + ((idx_value & ~31) + (u32)reloc->gpu_offset); } else { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= R300_TXO_MACRO_TILE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= R300_TXO_MICRO_TILE; - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) + else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE) tile_flags |= R300_TXO_MICRO_TILE_SQUARE; - tmp = idx_value + ((u32)reloc->lobj.gpu_offset); + tmp = idx_value + ((u32)reloc->gpu_offset); tmp |= tile_flags; ib[idx] = tmp; } @@ -753,11 +753,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p, return r; } - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= R300_COLOR_TILE_ENABLE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= R300_COLOR_MICROTILE_ENABLE; - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) + else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE) tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE; tmp = idx_value & ~(0x7 << 16); @@ -838,11 +838,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p, return r; } - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= R300_DEPTHMACROTILE_ENABLE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= R300_DEPTHMICROTILE_TILED; - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) + else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE) tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE; tmp = idx_value & ~(0x7 << 16); @@ -1052,7 +1052,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case 0x4e0c: /* RB3D_COLOR_CHANNEL_MASK */ @@ -1097,7 +1097,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->aa.robj = reloc->robj; track->aa.offset = idx_value; track->aa_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case R300_RB3D_AARESOLVE_PITCH: track->aa.pitch = idx_value & 0x3FFE; @@ -1162,7 +1162,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset); r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); if (r) { return r; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 56140b4e5bb..0f4ab928a15 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1748,11 +1748,9 @@ bool r600_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) if (!(reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } @@ -2604,8 +2602,6 @@ int r600_cp_resume(struct radeon_device *rdev) WREG32(CP_RB_BASE, ring->gpu_addr >> 8); WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); - ring->rptr = RREG32(CP_RB_RPTR); - r600_cp_start(rdev); ring->ready = true; r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring); @@ -3991,6 +3987,10 @@ restart_ih: break; } break; + case 124: /* UVD */ + DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); + radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); + break; case 176: /* CP_INT in ring buffer */ case 177: /* CP_INT in IB1 */ case 178: /* CP_INT in IB2 */ diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 7b399dc5fd5..12511bb5fd6 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -1007,8 +1007,22 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case R_008C64_SQ_VSTMP_RING_SIZE: case R_0288C8_SQ_GS_VERT_ITEMSIZE: /* get value to populate the IB don't remove */ - tmp =radeon_get_ib_value(p, idx); - ib[idx] = 0; + /*tmp =radeon_get_ib_value(p, idx); + ib[idx] = 0;*/ + break; + case SQ_ESGS_RING_BASE: + case SQ_GSVS_RING_BASE: + case SQ_ESTMP_RING_BASE: + case SQ_GSTMP_RING_BASE: + case SQ_PSTMP_RING_BASE: + case SQ_VSTMP_RING_BASE: + r = radeon_cs_packet_next_reloc(p, &reloc, 0); + if (r) { + dev_warn(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SQ_CONFIG: track->sq_config = radeon_get_ib_value(p, idx); @@ -1029,7 +1043,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->db_depth_info = radeon_get_ib_value(p, idx); ib[idx] &= C_028010_ARRAY_MODE; track->db_depth_info &= C_028010_ARRAY_MODE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + if (reloc->tiling_flags & RADEON_TILING_MACRO) { ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1); track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1); } else { @@ -1070,9 +1084,9 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->vgt_strmout_bo[tmp] = reloc->robj; - track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset; + track->vgt_strmout_bo_mc[tmp] = reloc->gpu_offset; track->streamout_dirty = true; break; case VGT_STRMOUT_BUFFER_SIZE_0: @@ -1091,7 +1105,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case R_028238_CB_TARGET_MASK: track->cb_target_mask = radeon_get_ib_value(p, idx); @@ -1128,10 +1142,10 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4; track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + if (reloc->tiling_flags & RADEON_TILING_MACRO) { ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1); track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1); - } else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + } else if (reloc->tiling_flags & RADEON_TILING_MICRO) { ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1); track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1); } @@ -1200,7 +1214,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } track->cb_color_frag_bo[tmp] = reloc->robj; track->cb_color_frag_offset[tmp] = (u64)ib[idx] << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); } if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) { track->cb_dirty = true; @@ -1231,7 +1245,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } track->cb_color_tile_bo[tmp] = reloc->robj; track->cb_color_tile_offset[tmp] = (u64)ib[idx] << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); } if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) { track->cb_dirty = true; @@ -1267,10 +1281,10 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = (reg - CB_COLOR0_BASE) / 4; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_base_last[tmp] = ib[idx]; track->cb_color_bo[tmp] = reloc->robj; - track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset; + track->cb_color_bo_mc[tmp] = reloc->gpu_offset; track->cb_dirty = true; break; case DB_DEPTH_BASE: @@ -1281,9 +1295,9 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->db_offset = radeon_get_ib_value(p, idx) << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_bo = reloc->robj; - track->db_bo_mc = reloc->lobj.gpu_offset; + track->db_bo_mc = reloc->gpu_offset; track->db_dirty = true; break; case DB_HTILE_DATA_BASE: @@ -1294,7 +1308,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->htile_offset = radeon_get_ib_value(p, idx) << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->htile_bo = reloc->robj; track->db_dirty = true; break; @@ -1363,7 +1377,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MEMORY_EXPORT_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); @@ -1372,7 +1386,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MISC: track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; @@ -1658,7 +1672,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (idx_value & 0xfffffff0) + ((u64)(tmp & 0xff) << 32); @@ -1699,7 +1713,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + idx_value + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); @@ -1751,7 +1765,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffff0) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -1791,7 +1805,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, tmp = radeon_get_ib_value(p, idx) + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); - offset = reloc->lobj.gpu_offset + tmp; + offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", @@ -1821,7 +1835,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, tmp = radeon_get_ib_value(p, idx+2) + ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); - offset = reloc->lobj.gpu_offset + tmp; + offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", @@ -1847,7 +1861,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad SURFACE_SYNC\n"); return -EINVAL; } - ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); } break; case PACKET3_EVENT_WRITE: @@ -1863,7 +1877,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad EVENT_WRITE\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -1885,7 +1899,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -1950,11 +1964,11 @@ static int r600_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad SET_RESOURCE\n"); return -EINVAL; } - base_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + base_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + else if (reloc->tiling_flags & RADEON_TILING_MICRO) ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); } texture = reloc->robj; @@ -1964,13 +1978,13 @@ static int r600_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad SET_RESOURCE\n"); return -EINVAL; } - mip_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + mip_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); mipmap = reloc->robj; r = r600_check_texture_resource(p, idx+(i*7)+1, texture, mipmap, base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2), mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3), - reloc->lobj.tiling_flags); + reloc->tiling_flags); if (r) return r; ib[idx+1+(i*7)+2] += base_offset; @@ -1994,7 +2008,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj) - offset; } - offset64 = reloc->lobj.gpu_offset + offset; + offset64 = reloc->gpu_offset + offset; ib[idx+1+(i*8)+0] = offset64; ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | (upper_32_bits(offset64) & 0xff); @@ -2104,7 +2118,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx+1] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); } break; case PACKET3_SURFACE_BASE_UPDATE: @@ -2137,7 +2151,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+1] = offset; ib[idx+2] = upper_32_bits(offset) & 0xff; } @@ -2156,7 +2170,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+3] = offset; ib[idx+4] = upper_32_bits(offset) & 0xff; } @@ -2185,7 +2199,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+0] = offset; ib[idx+1] = upper_32_bits(offset) & 0xff; break; @@ -2210,7 +2224,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+1] = offset; ib[idx+2] = upper_32_bits(offset) & 0xff; } else { @@ -2234,7 +2248,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+3] = offset; ib[idx+4] = upper_32_bits(offset) & 0xff; } else { @@ -2491,14 +2505,14 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); p->idx += count + 5; } else { dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; p->idx += count + 3; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { @@ -2525,22 +2539,22 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) /* tiled src, linear dst */ src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); dst_offset = radeon_get_ib_value(p, idx+5); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; - ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+5] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+6] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; } else { /* linear src, tiled dst */ src_offset = radeon_get_ib_value(p, idx+5); src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; - ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+5] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff; dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } p->idx += 7; } else { @@ -2550,10 +2564,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 5; } else { src_offset = radeon_get_ib_value(p, idx+2); @@ -2561,10 +2575,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; - ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff) << 16; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += upper_32_bits(src_reloc->gpu_offset) & 0xff; + ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) & 0xff) << 16; p->idx += 4; } } @@ -2596,8 +2610,8 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000; p->idx += 4; break; case DMA_PACKET_NOP: diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index b2d4c91e627..53fcb28f557 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -176,8 +176,6 @@ int r600_dma_resume(struct radeon_device *rdev) ring->wptr = 0; WREG32(DMA_RB_WPTR, ring->wptr << 2); - ring->rptr = RREG32(DMA_RB_RPTR) >> 2; - WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); ring->ready = true; @@ -221,11 +219,9 @@ bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) u32 reset_mask = r600_gpu_check_soft_reset(rdev); if (!(reset_mask & RADEON_RESET_DMA)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index e4cc9b314ce..cbf7e3269f8 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -834,6 +834,26 @@ static int r600_parse_clk_voltage_dep_table(struct radeon_clock_voltage_dependen return 0; } +int r600_get_platform_caps(struct radeon_device *rdev) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + union power_info *power_info; + int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo); + u16 data_offset; + u8 frev, crev; + + if (!atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) + return -EINVAL; + power_info = (union power_info *)(mode_info->atom_context->bios + data_offset); + + rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); + rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); + rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + + return 0; +} + /* sizeof(ATOM_PPLIB_EXTENDEDHEADER) */ #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V2 12 #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3 14 @@ -1043,7 +1063,15 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) (mode_info->atom_context->bios + data_offset + le16_to_cpu(ext_hdr->usVCETableOffset) + 1 + 1 + array->ucNumEntries * sizeof(VCEClockInfo)); + ATOM_PPLIB_VCE_State_Table *states = + (ATOM_PPLIB_VCE_State_Table *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usVCETableOffset) + 1 + + 1 + (array->ucNumEntries * sizeof (VCEClockInfo)) + + 1 + (limits->numEntries * sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record))); ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *entry; + ATOM_PPLIB_VCE_State_Record *state_entry; + VCEClockInfo *vce_clk; u32 size = limits->numEntries * sizeof(struct radeon_vce_clock_voltage_dependency_entry); rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries = @@ -1055,8 +1083,9 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count = limits->numEntries; entry = &limits->entries[0]; + state_entry = &states->entries[0]; for (i = 0; i < limits->numEntries; i++) { - VCEClockInfo *vce_clk = (VCEClockInfo *) + vce_clk = (VCEClockInfo *) ((u8 *)&array->entries[0] + (entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo))); rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].evclk = @@ -1068,6 +1097,23 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) entry = (ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *) ((u8 *)entry + sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record)); } + for (i = 0; i < states->numEntries; i++) { + if (i >= RADEON_MAX_VCE_LEVELS) + break; + vce_clk = (VCEClockInfo *) + ((u8 *)&array->entries[0] + + (state_entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo))); + rdev->pm.dpm.vce_states[i].evclk = + le16_to_cpu(vce_clk->usEVClkLow) | (vce_clk->ucEVClkHigh << 16); + rdev->pm.dpm.vce_states[i].ecclk = + le16_to_cpu(vce_clk->usECClkLow) | (vce_clk->ucECClkHigh << 16); + rdev->pm.dpm.vce_states[i].clk_idx = + state_entry->ucClockInfoIndex & 0x3f; + rdev->pm.dpm.vce_states[i].pstate = + (state_entry->ucClockInfoIndex & 0xc0) >> 6; + state_entry = (ATOM_PPLIB_VCE_State_Record *) + ((u8 *)state_entry + sizeof(ATOM_PPLIB_VCE_State_Record)); + } } if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3) && ext_hdr->usUVDTableOffset) { diff --git a/drivers/gpu/drm/radeon/r600_dpm.h b/drivers/gpu/drm/radeon/r600_dpm.h index 07eab2b04e8..46b9d2a0301 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.h +++ b/drivers/gpu/drm/radeon/r600_dpm.h @@ -215,6 +215,8 @@ void r600_stop_dpm(struct radeon_device *rdev); bool r600_is_internal_thermal_sensor(enum radeon_int_thermal_type sensor); +int r600_get_platform_caps(struct radeon_device *rdev); + int r600_parse_extended_power_table(struct radeon_device *rdev); void r600_free_extended_power_table(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 4a8ac1cd6b4..111deab2492 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -113,19 +113,16 @@ extern int radeon_hard_reset; #define RADEONFB_CONN_LIMIT 4 #define RADEON_BIOS_NUM_SCRATCH 8 -/* max number of rings */ -#define RADEON_NUM_RINGS 6 - /* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL /* internal ring indices */ /* r1xx+ has gfx CP ring */ -#define RADEON_RING_TYPE_GFX_INDEX 0 +#define RADEON_RING_TYPE_GFX_INDEX 0 /* cayman has 2 compute CP rings */ -#define CAYMAN_RING_TYPE_CP1_INDEX 1 -#define CAYMAN_RING_TYPE_CP2_INDEX 2 +#define CAYMAN_RING_TYPE_CP1_INDEX 1 +#define CAYMAN_RING_TYPE_CP2_INDEX 2 /* R600+ has an async dma ring */ #define R600_RING_TYPE_DMA_INDEX 3 @@ -133,7 +130,20 @@ extern int radeon_hard_reset; #define CAYMAN_RING_TYPE_DMA1_INDEX 4 /* R600+ */ -#define R600_RING_TYPE_UVD_INDEX 5 +#define R600_RING_TYPE_UVD_INDEX 5 + +/* TN+ */ +#define TN_RING_TYPE_VCE1_INDEX 6 +#define TN_RING_TYPE_VCE2_INDEX 7 + +/* max number of rings */ +#define RADEON_NUM_RINGS 8 + +/* number of hw syncs before falling back on blocking */ +#define RADEON_NUM_SYNCS 4 + +/* number of hw syncs before falling back on blocking */ +#define RADEON_NUM_SYNCS 4 /* hardcode those limit for now */ #define RADEON_VA_IB_OFFSET (1 << 20) @@ -353,9 +363,8 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, i void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); -int radeon_fence_wait_locked(struct radeon_fence *fence); -int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); -int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); +int radeon_fence_wait_next(struct radeon_device *rdev, int ring); +int radeon_fence_wait_empty(struct radeon_device *rdev, int ring); int radeon_fence_wait_any(struct radeon_device *rdev, struct radeon_fence **fences, bool intr); @@ -447,6 +456,7 @@ struct radeon_bo { /* Protected by gem.mutex */ struct list_head list; /* Protected by tbo.reserved */ + u32 initial_domain; u32 placements[3]; struct ttm_placement placement; struct ttm_buffer_object tbo; @@ -469,16 +479,6 @@ struct radeon_bo { }; #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base) -struct radeon_bo_list { - struct ttm_validate_buffer tv; - struct radeon_bo *bo; - uint64_t gpu_offset; - bool written; - unsigned domain; - unsigned alt_domain; - u32 tiling_flags; -}; - int radeon_gem_debugfs_init(struct radeon_device *rdev); /* sub-allocation manager, it has to be protected by another lock. @@ -554,7 +554,6 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, /* * Semaphores. */ -/* everything here is constant */ struct radeon_semaphore { struct radeon_sa_bo *sa_bo; signed waiters; @@ -787,7 +786,6 @@ struct radeon_ib { struct radeon_ring { struct radeon_bo *ring_obj; volatile uint32_t *ring; - unsigned rptr; unsigned rptr_offs; unsigned rptr_save_reg; u64 next_rptr_gpu_addr; @@ -797,8 +795,8 @@ struct radeon_ring { unsigned ring_size; unsigned ring_free_dw; int count_dw; - unsigned long last_activity; - unsigned last_rptr; + atomic_t last_rptr; + atomic64_t last_activity; uint64_t gpu_addr; uint32_t align_mask; uint32_t ptr_mask; @@ -850,17 +848,22 @@ struct radeon_mec { #define R600_PTE_READABLE (1 << 5) #define R600_PTE_WRITEABLE (1 << 6) +struct radeon_vm_pt { + struct radeon_bo *bo; + uint64_t addr; +}; + struct radeon_vm { - struct list_head list; struct list_head va; unsigned id; /* contains the page directory */ - struct radeon_sa_bo *page_directory; + struct radeon_bo *page_directory; uint64_t pd_gpu_addr; + unsigned max_pde_used; /* array of page tables, one for each page directory entry */ - struct radeon_sa_bo **page_tables; + struct radeon_vm_pt *page_tables; struct mutex mutex; /* last fence for cs using this vm */ @@ -872,10 +875,7 @@ struct radeon_vm { }; struct radeon_vm_manager { - struct mutex lock; - struct list_head lru_vm; struct radeon_fence *active[RADEON_NUM_VM]; - struct radeon_sa_manager sa_manager; uint32_t max_pfn; /* number of VMIDs */ unsigned nvm; @@ -951,8 +951,8 @@ void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *c void radeon_ring_undo(struct radeon_ring *ring); void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp); int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); -void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring); -void radeon_ring_lockup_update(struct radeon_ring *ring); +void radeon_ring_lockup_update(struct radeon_device *rdev, + struct radeon_ring *ring); bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring); unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring, uint32_t **data); @@ -978,9 +978,12 @@ void cayman_dma_fini(struct radeon_device *rdev); struct radeon_cs_reloc { struct drm_gem_object *gobj; struct radeon_bo *robj; - struct radeon_bo_list lobj; + struct ttm_validate_buffer tv; + uint64_t gpu_offset; + unsigned domain; + unsigned alt_domain; + uint32_t tiling_flags; uint32_t handle; - uint32_t flags; }; struct radeon_cs_chunk { @@ -1004,6 +1007,7 @@ struct radeon_cs_parser { unsigned nrelocs; struct radeon_cs_reloc *relocs; struct radeon_cs_reloc **relocs_ptr; + struct radeon_cs_reloc *vm_bos; struct list_head validated; unsigned dma_reloc_idx; /* indices of various chunks */ @@ -1253,6 +1257,17 @@ enum radeon_dpm_event_src { RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 }; +#define RADEON_MAX_VCE_LEVELS 6 + +enum radeon_vce_level { + RADEON_VCE_LEVEL_AC_ALL = 0, /* AC, All cases */ + RADEON_VCE_LEVEL_DC_EE = 1, /* DC, entropy encoding */ + RADEON_VCE_LEVEL_DC_LL_LOW = 2, /* DC, low latency queue, res <= 720 */ + RADEON_VCE_LEVEL_DC_LL_HIGH = 3, /* DC, low latency queue, 1080 >= res > 720 */ + RADEON_VCE_LEVEL_DC_GP_LOW = 4, /* DC, general purpose queue, res <= 720 */ + RADEON_VCE_LEVEL_DC_GP_HIGH = 5, /* DC, general purpose queue, 1080 >= res > 720 */ +}; + struct radeon_ps { u32 caps; /* vbios flags */ u32 class; /* vbios flags */ @@ -1263,6 +1278,8 @@ struct radeon_ps { /* VCE clocks */ u32 evclk; u32 ecclk; + bool vce_active; + enum radeon_vce_level vce_level; /* asic priv */ void *ps_priv; }; @@ -1437,6 +1454,17 @@ enum radeon_dpm_forced_level { RADEON_DPM_FORCED_LEVEL_HIGH = 2, }; +struct radeon_vce_state { + /* vce clocks */ + u32 evclk; + u32 ecclk; + /* gpu clocks */ + u32 sclk; + u32 mclk; + u8 clk_idx; + u8 pstate; +}; + struct radeon_dpm { struct radeon_ps *ps; /* number of valid power states */ @@ -1449,6 +1477,9 @@ struct radeon_dpm { struct radeon_ps *boot_ps; /* default uvd power state */ struct radeon_ps *uvd_ps; + /* vce requirements */ + struct radeon_vce_state vce_states[RADEON_MAX_VCE_LEVELS]; + enum radeon_vce_level vce_level; enum radeon_pm_state_type state; enum radeon_pm_state_type user_state; u32 platform_caps; @@ -1474,6 +1505,7 @@ struct radeon_dpm { /* special states active */ bool thermal_active; bool uvd_active; + bool vce_active; /* thermal handling */ struct radeon_dpm_thermal thermal; /* forced levels */ @@ -1484,6 +1516,7 @@ struct radeon_dpm { }; void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable); +void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable); struct radeon_pm { struct mutex mutex; @@ -1589,6 +1622,45 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, unsigned cg_upll_func_cntl); +/* + * VCE + */ +#define RADEON_MAX_VCE_HANDLES 16 +#define RADEON_VCE_STACK_SIZE (1024*1024) +#define RADEON_VCE_HEAP_SIZE (4*1024*1024) + +struct radeon_vce { + struct radeon_bo *vcpu_bo; + uint64_t gpu_addr; + unsigned fw_version; + unsigned fb_version; + atomic_t handles[RADEON_MAX_VCE_HANDLES]; + struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; + struct delayed_work idle_work; +}; + +int radeon_vce_init(struct radeon_device *rdev); +void radeon_vce_fini(struct radeon_device *rdev); +int radeon_vce_suspend(struct radeon_device *rdev); +int radeon_vce_resume(struct radeon_device *rdev); +int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence); +int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence); +void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp); +void radeon_vce_note_usage(struct radeon_device *rdev); +int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi); +int radeon_vce_cs_parse(struct radeon_cs_parser *p); +bool radeon_vce_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); +void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +void radeon_vce_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); +int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); + struct r600_audio_pin { int channels; int rate; @@ -1778,6 +1850,7 @@ struct radeon_asic { void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes); void (*set_clock_gating)(struct radeon_device *rdev, int enable); int (*set_uvd_clocks)(struct radeon_device *rdev, u32 vclk, u32 dclk); + int (*set_vce_clocks)(struct radeon_device *rdev, u32 evclk, u32 ecclk); int (*get_temperature)(struct radeon_device *rdev); } pm; /* dynamic power management */ @@ -2039,6 +2112,8 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_op_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); @@ -2184,6 +2259,7 @@ struct radeon_device { struct radeon_gem gem; struct radeon_pm pm; struct radeon_uvd uvd; + struct radeon_vce vce; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; struct radeon_wb wb; struct radeon_dummy_page dummy_page; @@ -2203,6 +2279,7 @@ struct radeon_device { const struct firmware *sdma_fw; /* CIK SDMA firmware */ const struct firmware *smc_fw; /* SMC firmware */ const struct firmware *uvd_fw; /* UVD firmware */ + const struct firmware *vce_fw; /* VCE firmware */ struct r600_vram_scratch vram_scratch; int msi_enabled; /* msi enabled */ struct r600_ih ih; /* r6/700 interrupt ring */ @@ -2227,6 +2304,10 @@ struct radeon_device { /* virtual memory */ struct radeon_vm_manager vm_manager; struct mutex gpu_clock_mutex; + /* memory stats */ + atomic64_t vram_usage; + atomic64_t gtt_usage; + atomic64_t num_bytes_moved; /* ACPI interface */ struct radeon_atif atif; struct radeon_atcs atcs; @@ -2637,6 +2718,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->pm.set_pcie_lanes((rdev), (l)) #define radeon_set_clock_gating(rdev, e) (rdev)->asic->pm.set_clock_gating((rdev), (e)) #define radeon_set_uvd_clocks(rdev, v, d) (rdev)->asic->pm.set_uvd_clocks((rdev), (v), (d)) +#define radeon_set_vce_clocks(rdev, ev, ec) (rdev)->asic->pm.set_vce_clocks((rdev), (ev), (ec)) #define radeon_get_temperature(rdev) (rdev)->asic->pm.get_temperature((rdev)) #define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->surface.set_reg((rdev), (r), (f), (p), (o), (s))) #define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->surface.clear_reg((rdev), (r))) @@ -2713,16 +2795,22 @@ extern void radeon_program_register_sequence(struct radeon_device *rdev, */ int radeon_vm_manager_init(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev); -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); +struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); +void radeon_vm_flush(struct radeon_device *rdev, + struct radeon_vm *vm, + int ring); void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence); uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr); +int radeon_vm_update_page_directory(struct radeon_device *rdev, + struct radeon_vm *vm); int radeon_vm_bo_update(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c index 98a9074b306..77e9d07c55b 100644 --- a/drivers/gpu/drm/radeon/radeon_acpi.c +++ b/drivers/gpu/drm/radeon/radeon_acpi.c @@ -25,18 +25,14 @@ #include <linux/acpi.h> #include <linux/slab.h> #include <linux/power_supply.h> -#include <acpi/acpi_drivers.h> -#include <acpi/acpi_bus.h> +#include <linux/vga_switcheroo.h> #include <acpi/video.h> - #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> #include "radeon.h" #include "radeon_acpi.h" #include "atom.h" -#include <linux/vga_switcheroo.h> - #define ACPI_AC_CLASS "ac_adapter" extern void radeon_pm_acpi_event_handler(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index f74db43346f..b8a24a75d4f 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1555,7 +1555,7 @@ static struct radeon_asic btc_asic = { .get_sclk = &btc_dpm_get_sclk, .get_mclk = &btc_dpm_get_mclk, .print_power_state = &rv770_dpm_print_power_state, - .debugfs_print_current_performance_level = &rv770_dpm_debugfs_print_current_performance_level, + .debugfs_print_current_performance_level = &btc_dpm_debugfs_print_current_performance_level, .force_performance_level = &rv770_dpm_force_performance_level, .vblank_too_short = &btc_dpm_vblank_too_short, }, @@ -1987,6 +1987,19 @@ static struct radeon_asic_ring ci_dma_ring = { .set_wptr = &cik_sdma_set_wptr, }; +static struct radeon_asic_ring ci_vce_ring = { + .ib_execute = &radeon_vce_ib_execute, + .emit_fence = &radeon_vce_fence_emit, + .emit_semaphore = &radeon_vce_semaphore_emit, + .cs_parse = &radeon_vce_cs_parse, + .ring_test = &radeon_vce_ring_test, + .ib_test = &radeon_vce_ib_test, + .is_lockup = &radeon_ring_test_lockup, + .get_rptr = &vce_v1_0_get_rptr, + .get_wptr = &vce_v1_0_get_wptr, + .set_wptr = &vce_v1_0_set_wptr, +}; + static struct radeon_asic ci_asic = { .init = &cik_init, .fini = &cik_fini, @@ -2015,6 +2028,8 @@ static struct radeon_asic ci_asic = { [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, + [TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring, + [TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, }, .irq = { .set = &cik_irq_set, @@ -2061,6 +2076,7 @@ static struct radeon_asic ci_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &cik_set_uvd_clocks, + .set_vce_clocks = &cik_set_vce_clocks, .get_temperature = &ci_get_temp, }, .dpm = { @@ -2117,6 +2133,8 @@ static struct radeon_asic kv_asic = { [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, + [TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring, + [TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, }, .irq = { .set = &cik_irq_set, @@ -2163,6 +2181,7 @@ static struct radeon_asic kv_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &cik_set_uvd_clocks, + .set_vce_clocks = &cik_set_vce_clocks, .get_temperature = &kv_get_temp, }, .dpm = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index b3bc433eed4..3d55a3a39e8 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -551,6 +551,8 @@ void btc_dpm_fini(struct radeon_device *rdev); u32 btc_dpm_get_sclk(struct radeon_device *rdev, bool low); u32 btc_dpm_get_mclk(struct radeon_device *rdev, bool low); bool btc_dpm_vblank_too_short(struct radeon_device *rdev); +void btc_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, + struct seq_file *m); int sumo_dpm_init(struct radeon_device *rdev); int sumo_dpm_enable(struct radeon_device *rdev); int sumo_dpm_late_enable(struct radeon_device *rdev); @@ -715,6 +717,7 @@ u32 cik_get_xclk(struct radeon_device *rdev); uint32_t cik_pciep_rreg(struct radeon_device *rdev, uint32_t reg); void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); +int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk); void cik_sdma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, @@ -861,4 +864,17 @@ bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, /* uvd v4.2 */ int uvd_v4_2_resume(struct radeon_device *rdev); +/* vce v1.0 */ +uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring); +uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); +void vce_v1_0_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); +int vce_v1_0_init(struct radeon_device *rdev); +int vce_v1_0_start(struct radeon_device *rdev); + +/* vce v2.0 */ +int vce_v2_0_resume(struct radeon_device *rdev); + #endif diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index dfb5a1db87d..2b6e0ebcc13 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -24,16 +24,59 @@ * Authors: * Jerome Glisse <glisse@freedesktop.org> */ +#include <linux/list_sort.h> #include <drm/drmP.h> #include <drm/radeon_drm.h> #include "radeon_reg.h" #include "radeon.h" #include "radeon_trace.h" +#define RADEON_CS_MAX_PRIORITY 32u +#define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1) + +/* This is based on the bucket sort with O(n) time complexity. + * An item with priority "i" is added to bucket[i]. The lists are then + * concatenated in descending order. + */ +struct radeon_cs_buckets { + struct list_head bucket[RADEON_CS_NUM_BUCKETS]; +}; + +static void radeon_cs_buckets_init(struct radeon_cs_buckets *b) +{ + unsigned i; + + for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) + INIT_LIST_HEAD(&b->bucket[i]); +} + +static void radeon_cs_buckets_add(struct radeon_cs_buckets *b, + struct list_head *item, unsigned priority) +{ + /* Since buffers which appear sooner in the relocation list are + * likely to be used more often than buffers which appear later + * in the list, the sort mustn't change the ordering of buffers + * with the same priority, i.e. it must be stable. + */ + list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]); +} + +static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b, + struct list_head *out_list) +{ + unsigned i; + + /* Connect the sorted buckets in the output list. */ + for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) { + list_splice(&b->bucket[i], out_list); + } +} + static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) { struct drm_device *ddev = p->rdev->ddev; struct radeon_cs_chunk *chunk; + struct radeon_cs_buckets buckets; unsigned i, j; bool duplicate; @@ -52,8 +95,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) if (p->relocs == NULL) { return -ENOMEM; } + + radeon_cs_buckets_init(&buckets); + for (i = 0; i < p->nrelocs; i++) { struct drm_radeon_cs_reloc *r; + unsigned priority; duplicate = false; r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; @@ -78,8 +125,14 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) } p->relocs_ptr[i] = &p->relocs[i]; p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); - p->relocs[i].lobj.bo = p->relocs[i].robj; - p->relocs[i].lobj.written = !!r->write_domain; + + /* The userspace buffer priorities are from 0 to 15. A higher + * number means the buffer is more important. + * Also, the buffers used for write have a higher priority than + * the buffers used for read only, which doubles the range + * to 0 to 31. 32 is reserved for the kernel driver. + */ + priority = (r->flags & 0xf) * 2 + !!r->write_domain; /* the first reloc of an UVD job is the msg and that must be in VRAM, also but everything into VRAM on AGP cards to avoid @@ -87,29 +140,38 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) if (p->ring == R600_RING_TYPE_UVD_INDEX && (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) { /* TODO: is this still needed for NI+ ? */ - p->relocs[i].lobj.domain = + p->relocs[i].domain = RADEON_GEM_DOMAIN_VRAM; - p->relocs[i].lobj.alt_domain = + p->relocs[i].alt_domain = RADEON_GEM_DOMAIN_VRAM; + /* prioritize this over any other relocation */ + priority = RADEON_CS_MAX_PRIORITY; } else { uint32_t domain = r->write_domain ? r->write_domain : r->read_domains; - p->relocs[i].lobj.domain = domain; + p->relocs[i].domain = domain; if (domain == RADEON_GEM_DOMAIN_VRAM) domain |= RADEON_GEM_DOMAIN_GTT; - p->relocs[i].lobj.alt_domain = domain; + p->relocs[i].alt_domain = domain; } - p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; + p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle; - radeon_bo_list_add_object(&p->relocs[i].lobj, - &p->validated); + radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, + priority); } - return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); + + radeon_cs_buckets_get_list(&buckets, &p->validated); + + if (p->cs_flags & RADEON_CS_USE_VM) + p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, + &p->validated); + + return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); } static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) @@ -147,6 +209,10 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority case RADEON_CS_RING_UVD: p->ring = R600_RING_TYPE_UVD_INDEX; break; + case RADEON_CS_RING_VCE: + /* TODO: only use the low priority ring for now */ + p->ring = TN_RING_TYPE_VCE1_INDEX; + break; } return 0; } @@ -286,6 +352,16 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return 0; } +static int cmp_size_smaller_first(void *priv, struct list_head *a, + struct list_head *b) +{ + struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head); + struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head); + + /* Sort A before B if A is smaller. */ + return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; +} + /** * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. @@ -299,6 +375,18 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo unsigned i; if (!error) { + /* Sort the buffer list from the smallest to largest buffer, + * which affects the order of buffers in the LRU list. + * This assures that the smallest buffers are added first + * to the LRU list, so they are likely to be later evicted + * first, instead of large buffers whose eviction is more + * expensive. + * + * This slightly lowers the number of bytes moved by TTM + * per frame under memory pressure. + */ + list_sort(NULL, &parser->validated, cmp_size_smaller_first); + ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, parser->ib.fence); @@ -316,6 +404,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo kfree(parser->track); kfree(parser->relocs); kfree(parser->relocs_ptr); + kfree(parser->vm_bos); for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); @@ -343,6 +432,9 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, if (parser->ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_note_usage(rdev); + else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || + (parser->ring == TN_RING_TYPE_VCE2_INDEX)) + radeon_vce_note_usage(rdev); radeon_cs_sync_rings(parser); r = radeon_ib_schedule(rdev, &parser->ib, NULL); @@ -352,24 +444,32 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, return r; } -static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, +static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, struct radeon_vm *vm) { - struct radeon_device *rdev = parser->rdev; - struct radeon_bo_list *lobj; - struct radeon_bo *bo; - int r; + struct radeon_device *rdev = p->rdev; + int i, r; - r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); - if (r) { + r = radeon_vm_update_page_directory(rdev, vm); + if (r) return r; - } - list_for_each_entry(lobj, &parser->validated, tv.head) { - bo = lobj->bo; - r = radeon_vm_bo_update(parser->rdev, vm, bo, &bo->tbo.mem); - if (r) { + + r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, + &rdev->ring_tmp_bo.bo->tbo.mem); + if (r) + return r; + + for (i = 0; i < p->nrelocs; i++) { + struct radeon_bo *bo; + + /* ignore duplicates */ + if (p->relocs_ptr[i] != &p->relocs[i]) + continue; + + bo = p->relocs[i].robj; + r = radeon_vm_bo_update(rdev, vm, bo, &bo->tbo.mem); + if (r) return r; - } } return 0; } @@ -401,20 +501,13 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (parser->ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_note_usage(rdev); - mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); - r = radeon_vm_alloc_pt(rdev, vm); - if (r) { - goto out; - } r = radeon_bo_vm_update_pte(parser, vm); if (r) { goto out; } radeon_cs_sync_rings(parser); radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); - radeon_semaphore_sync_to(parser->ib.semaphore, - radeon_vm_grab_id(rdev, vm, parser->ring)); if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { @@ -423,14 +516,8 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, r = radeon_ib_schedule(rdev, &parser->ib, NULL); } - if (!r) { - radeon_vm_fence(rdev, vm, parser->ib.fence); - } - out: - radeon_vm_add_to_lru(rdev, vm); mutex_unlock(&vm->mutex); - mutex_unlock(&rdev->vm_manager.lock); return r; } @@ -698,9 +785,9 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, /* FIXME: we assume reloc size is 4 dwords */ if (nomm) { *cs_reloc = p->relocs; - (*cs_reloc)->lobj.gpu_offset = + (*cs_reloc)->gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32; - (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0]; + (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0]; } else *cs_reloc = p->relocs_ptr[(idx / 4)]; return 0; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b012cbbc3ed..7db44de90d6 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1191,14 +1191,12 @@ int radeon_device_init(struct radeon_device *rdev, r = radeon_gem_init(rdev); if (r) return r; - /* initialize vm here */ - mutex_init(&rdev->vm_manager.lock); + /* Adjust VM size here. * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. */ rdev->vm_manager.max_pfn = 1 << 20; - INIT_LIST_HEAD(&rdev->vm_manager.lru_vm); /* Set asic functions */ r = radeon_asic_init(rdev); @@ -1445,10 +1443,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) /* evict vram memory */ radeon_bo_evict_vram(rdev); - mutex_lock(&rdev->ring_lock); /* wait for gpu to finish processing current batch */ for (i = 0; i < RADEON_NUM_RINGS; i++) { - r = radeon_fence_wait_empty_locked(rdev, i); + r = radeon_fence_wait_empty(rdev, i); if (r) { /* delay GPU reset to resume */ force_completion = true; @@ -1457,7 +1454,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) if (force_completion) { radeon_fence_driver_force_completion(rdev); } - mutex_unlock(&rdev->ring_lock); radeon_save_bios_scratch_regs(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index d680608f6f5..fbd8b930f2b 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -571,6 +571,8 @@ static void radeon_crtc_init(struct drm_device *dev, int index) radeon_crtc->max_cursor_width = CURSOR_WIDTH; radeon_crtc->max_cursor_height = CURSOR_HEIGHT; } + dev->mode_config.cursor_width = radeon_crtc->max_cursor_width; + dev->mode_config.cursor_height = radeon_crtc->max_cursor_height; #if 0 radeon_crtc->mode_set.crtc = &radeon_crtc->base; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index ec8c388eec1..4392b7c95ee 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -78,9 +78,11 @@ * 2.34.0 - Add CIK tiling mode array query * 2.35.0 - Add CIK macrotile mode array query * 2.36.0 - Fix CIK DCE tiling setup + * 2.37.0 - allow GS ring setup on r6xx/r7xx + * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN) */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 36 +#define KMS_DRIVER_MINOR 38 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index c37cb79a948..a77b1c13ea4 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -288,7 +288,6 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) * @rdev: radeon device pointer * @target_seq: sequence number(s) we want to wait for * @intr: use interruptable sleep - * @lock_ring: whether the ring should be locked or not * * Wait for the requested sequence number(s) to be written by any ring * (all asics). Sequnce number array is indexed by ring id. @@ -299,7 +298,7 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) * -EDEADLK is returned when a GPU lockup has been detected. */ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, - bool intr, bool lock_ring) + bool intr) { uint64_t last_seq[RADEON_NUM_RINGS]; bool signaled; @@ -358,9 +357,6 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, if (i != RADEON_NUM_RINGS) continue; - if (lock_ring) - mutex_lock(&rdev->ring_lock); - for (i = 0; i < RADEON_NUM_RINGS; ++i) { if (!target_seq[i]) continue; @@ -378,14 +374,9 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, /* remember that we need an reset */ rdev->needs_reset = true; - if (lock_ring) - mutex_unlock(&rdev->ring_lock); wake_up_all(&rdev->fence_queue); return -EDEADLK; } - - if (lock_ring) - mutex_unlock(&rdev->ring_lock); } } return 0; @@ -416,7 +407,7 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) return 0; - r = radeon_fence_wait_seq(fence->rdev, seq, intr, true); + r = radeon_fence_wait_seq(fence->rdev, seq, intr); if (r) return r; @@ -464,7 +455,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, if (num_rings == 0) return -ENOENT; - r = radeon_fence_wait_seq(rdev, seq, intr, true); + r = radeon_fence_wait_seq(rdev, seq, intr); if (r) { return r; } @@ -472,37 +463,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, } /** - * radeon_fence_wait_locked - wait for a fence to signal - * - * @fence: radeon fence object - * - * Wait for the requested fence to signal (all asics). - * Returns 0 if the fence has passed, error for all other cases. - */ -int radeon_fence_wait_locked(struct radeon_fence *fence) -{ - uint64_t seq[RADEON_NUM_RINGS] = {}; - int r; - - if (fence == NULL) { - WARN(1, "Querying an invalid fence : %p !\n", fence); - return -EINVAL; - } - - seq[fence->ring] = fence->seq; - if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) - return 0; - - r = radeon_fence_wait_seq(fence->rdev, seq, false, false); - if (r) - return r; - - fence->seq = RADEON_FENCE_SIGNALED_SEQ; - return 0; -} - -/** - * radeon_fence_wait_next_locked - wait for the next fence to signal + * radeon_fence_wait_next - wait for the next fence to signal * * @rdev: radeon device pointer * @ring: ring index the fence is associated with @@ -511,7 +472,7 @@ int radeon_fence_wait_locked(struct radeon_fence *fence) * Returns 0 if the next fence has passed, error for all other cases. * Caller must hold ring lock. */ -int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) +int radeon_fence_wait_next(struct radeon_device *rdev, int ring) { uint64_t seq[RADEON_NUM_RINGS] = {}; @@ -521,11 +482,11 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) already the last emited fence */ return -ENOENT; } - return radeon_fence_wait_seq(rdev, seq, false, false); + return radeon_fence_wait_seq(rdev, seq, false); } /** - * radeon_fence_wait_empty_locked - wait for all fences to signal + * radeon_fence_wait_empty - wait for all fences to signal * * @rdev: radeon device pointer * @ring: ring index the fence is associated with @@ -534,7 +495,7 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) * Returns 0 if the fences have passed, error for all other cases. * Caller must hold ring lock. */ -int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) +int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) { uint64_t seq[RADEON_NUM_RINGS] = {}; int r; @@ -543,7 +504,7 @@ int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) if (!seq[ring]) return 0; - r = radeon_fence_wait_seq(rdev, seq, false, false); + r = radeon_fence_wait_seq(rdev, seq, false); if (r) { if (r == -EDEADLK) return -EDEADLK; @@ -794,7 +755,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { if (!rdev->fence_drv[ring].initialized) continue; - r = radeon_fence_wait_empty_locked(rdev, ring); + r = radeon_fence_wait_empty(rdev, ring); if (r) { /* no need to trigger GPU reset as we are unloading */ radeon_fence_driver_force_completion(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a8f9b463bf2..2e723651069 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -28,8 +28,6 @@ #include <drm/drmP.h> #include <drm/radeon_drm.h> #include "radeon.h" -#include "radeon_reg.h" -#include "radeon_trace.h" /* * GART @@ -394,959 +392,3 @@ void radeon_gart_fini(struct radeon_device *rdev) radeon_dummy_page_fini(rdev); } - -/* - * GPUVM - * GPUVM is similar to the legacy gart on older asics, however - * rather than there being a single global gart table - * for the entire GPU, there are multiple VM page tables active - * at any given time. The VM page tables can contain a mix - * vram pages and system memory pages and system memory pages - * can be mapped as snooped (cached system pages) or unsnooped - * (uncached system pages). - * Each VM has an ID associated with it and there is a page table - * associated with each VMID. When execting a command buffer, - * the kernel tells the the ring what VMID to use for that command - * buffer. VMIDs are allocated dynamically as commands are submitted. - * The userspace drivers maintain their own address space and the kernel - * sets up their pages tables accordingly when they submit their - * command buffers and a VMID is assigned. - * Cayman/Trinity support up to 8 active VMs at any given time; - * SI supports 16. - */ - -/* - * vm helpers - * - * TODO bind a default page at vm initialization for default address - */ - -/** - * radeon_vm_num_pde - return the number of page directory entries - * - * @rdev: radeon_device pointer - * - * Calculate the number of page directory entries (cayman+). - */ -static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) -{ - return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; -} - -/** - * radeon_vm_directory_size - returns the size of the page directory in bytes - * - * @rdev: radeon_device pointer - * - * Calculate the size of the page directory in bytes (cayman+). - */ -static unsigned radeon_vm_directory_size(struct radeon_device *rdev) -{ - return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); -} - -/** - * radeon_vm_manager_init - init the vm manager - * - * @rdev: radeon_device pointer - * - * Init the vm manager (cayman+). - * Returns 0 for success, error for failure. - */ -int radeon_vm_manager_init(struct radeon_device *rdev) -{ - struct radeon_vm *vm; - struct radeon_bo_va *bo_va; - int r; - unsigned size; - - if (!rdev->vm_manager.enabled) { - /* allocate enough for 2 full VM pts */ - size = radeon_vm_directory_size(rdev); - size += rdev->vm_manager.max_pfn * 8; - size *= 2; - r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - RADEON_GPU_PAGE_ALIGN(size), - RADEON_VM_PTB_ALIGN_SIZE, - RADEON_GEM_DOMAIN_VRAM); - if (r) { - dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", - (rdev->vm_manager.max_pfn * 8) >> 10); - return r; - } - - r = radeon_asic_vm_init(rdev); - if (r) - return r; - - rdev->vm_manager.enabled = true; - - r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); - if (r) - return r; - } - - /* restore page table */ - list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->page_directory == NULL) - continue; - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - } - return 0; -} - -/** - * radeon_vm_free_pt - free the page table for a specific vm - * - * @rdev: radeon_device pointer - * @vm: vm to unbind - * - * Free the page table of a specific vm (cayman+). - * - * Global and local mutex must be lock! - */ -static void radeon_vm_free_pt(struct radeon_device *rdev, - struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va; - int i; - - if (!vm->page_directory) - return; - - list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - - if (vm->page_tables == NULL) - return; - - for (i = 0; i < radeon_vm_num_pdes(rdev); i++) - radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); - - kfree(vm->page_tables); -} - -/** - * radeon_vm_manager_fini - tear down the vm manager - * - * @rdev: radeon_device pointer - * - * Tear down the VM manager (cayman+). - */ -void radeon_vm_manager_fini(struct radeon_device *rdev) -{ - struct radeon_vm *vm, *tmp; - int i; - - if (!rdev->vm_manager.enabled) - return; - - mutex_lock(&rdev->vm_manager.lock); - /* free all allocated page tables */ - list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&vm->mutex); - } - for (i = 0; i < RADEON_NUM_VM; ++i) { - radeon_fence_unref(&rdev->vm_manager.active[i]); - } - radeon_asic_vm_fini(rdev); - mutex_unlock(&rdev->vm_manager.lock); - - radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); - radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); - rdev->vm_manager.enabled = false; -} - -/** - * radeon_vm_evict - evict page table to make room for new one - * - * @rdev: radeon_device pointer - * @vm: VM we want to allocate something for - * - * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). - * Returns 0 for success, -ENOMEM for failure. - * - * Global and local mutex must be locked! - */ -static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) -{ - struct radeon_vm *vm_evict; - - if (list_empty(&rdev->vm_manager.lru_vm)) - return -ENOMEM; - - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, - struct radeon_vm, list); - if (vm_evict == vm) - return -ENOMEM; - - mutex_lock(&vm_evict->mutex); - radeon_vm_free_pt(rdev, vm_evict); - mutex_unlock(&vm_evict->mutex); - return 0; -} - -/** - * radeon_vm_alloc_pt - allocates a page table for a VM - * - * @rdev: radeon_device pointer - * @vm: vm to bind - * - * Allocate a page table for the requested vm (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) -{ - unsigned pd_size, pd_entries, pts_size; - struct radeon_ib ib; - int r; - - if (vm == NULL) { - return -EINVAL; - } - - if (vm->page_directory != NULL) { - return 0; - } - - pd_size = radeon_vm_directory_size(rdev); - pd_entries = radeon_vm_num_pdes(rdev); - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_directory, pd_size, - RADEON_VM_PTB_ALIGN_SIZE, false); - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - - } else if (r) { - return r; - } - - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); - - /* Initially clear the page directory */ - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, - NULL, pd_entries * 2 + 64); - if (r) { - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - - ib.length_dw = 0; - - radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, - 0, pd_entries, 0, 0); - - radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush); - - /* allocate page table array */ - pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); - vm->page_tables = kzalloc(pts_size, GFP_KERNEL); - - if (vm->page_tables == NULL) { - DRM_ERROR("Cannot allocate memory for page table array\n"); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return -ENOMEM; - } - - return 0; -} - -/** - * radeon_vm_add_to_lru - add VMs page table to LRU list - * - * @rdev: radeon_device pointer - * @vm: vm to add to LRU - * - * Add the allocated page table to the LRU list (cayman+). - * - * Global mutex must be locked! - */ -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) -{ - list_del_init(&vm->list); - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); -} - -/** - * radeon_vm_grab_id - allocate the next free VMID - * - * @rdev: radeon_device pointer - * @vm: vm to allocate id for - * @ring: ring we want to submit job to - * - * Allocate an id for the vm (cayman+). - * Returns the fence we need to sync to (if any). - * - * Global and local mutex must be locked! - */ -struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, - struct radeon_vm *vm, int ring) -{ - struct radeon_fence *best[RADEON_NUM_RINGS] = {}; - unsigned choices[2] = {}; - unsigned i; - - /* check if the id is still valid */ - if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) - return NULL; - - /* we definately need to flush */ - radeon_fence_unref(&vm->last_flush); - - /* skip over VMID 0, since it is the system VM */ - for (i = 1; i < rdev->vm_manager.nvm; ++i) { - struct radeon_fence *fence = rdev->vm_manager.active[i]; - - if (fence == NULL) { - /* found a free one */ - vm->id = i; - trace_radeon_vm_grab_id(vm->id, ring); - return NULL; - } - - if (radeon_fence_is_earlier(fence, best[fence->ring])) { - best[fence->ring] = fence; - choices[fence->ring == ring ? 0 : 1] = i; - } - } - - for (i = 0; i < 2; ++i) { - if (choices[i]) { - vm->id = choices[i]; - trace_radeon_vm_grab_id(vm->id, ring); - return rdev->vm_manager.active[choices[i]]; - } - } - - /* should never happen */ - BUG(); - return NULL; -} - -/** - * radeon_vm_fence - remember fence for vm - * - * @rdev: radeon_device pointer - * @vm: vm we want to fence - * @fence: fence to remember - * - * Fence the vm (cayman+). - * Set the fence used to protect page table and id. - * - * Global and local mutex must be locked! - */ -void radeon_vm_fence(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_fence *fence) -{ - radeon_fence_unref(&rdev->vm_manager.active[vm->id]); - rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); - - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(fence); - - radeon_fence_unref(&vm->last_id_use); - vm->last_id_use = radeon_fence_ref(fence); -} - -/** - * radeon_vm_bo_find - find the bo_va for a specific vm & bo - * - * @vm: requested vm - * @bo: requested buffer object - * - * Find @bo inside the requested vm (cayman+). - * Search inside the @bos vm list for the requested vm - * Returns the found bo_va or NULL if none is found - * - * Object has to be reserved! - */ -struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - list_for_each_entry(bo_va, &bo->va, bo_list) { - if (bo_va->vm == vm) { - return bo_va; - } - } - return NULL; -} - -/** - * radeon_vm_bo_add - add a bo to a specific vm - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * - * Add @bo into the requested vm (cayman+). - * Add @bo to the list of bos associated with the vm - * Returns newly added bo_va or NULL for failure - * - * Object has to be reserved! - */ -struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); - if (bo_va == NULL) { - return NULL; - } - bo_va->vm = vm; - bo_va->bo = bo; - bo_va->soffset = 0; - bo_va->eoffset = 0; - bo_va->flags = 0; - bo_va->valid = false; - bo_va->ref_count = 1; - INIT_LIST_HEAD(&bo_va->bo_list); - INIT_LIST_HEAD(&bo_va->vm_list); - - mutex_lock(&vm->mutex); - list_add(&bo_va->vm_list, &vm->va); - list_add_tail(&bo_va->bo_list, &bo->va); - mutex_unlock(&vm->mutex); - - return bo_va; -} - -/** - * radeon_vm_bo_set_addr - set bos virtual address inside a vm - * - * @rdev: radeon_device pointer - * @bo_va: bo_va to store the address - * @soffset: requested offset of the buffer in the VM address space - * @flags: attributes of pages (read/write/valid/etc.) - * - * Set offset of @bo_va (cayman+). - * Validate and set the offset requested within the vm address space. - * Returns 0 for success, error for failure. - * - * Object has to be reserved! - */ -int radeon_vm_bo_set_addr(struct radeon_device *rdev, - struct radeon_bo_va *bo_va, - uint64_t soffset, - uint32_t flags) -{ - uint64_t size = radeon_bo_size(bo_va->bo); - uint64_t eoffset, last_offset = 0; - struct radeon_vm *vm = bo_va->vm; - struct radeon_bo_va *tmp; - struct list_head *head; - unsigned last_pfn; - - if (soffset) { - /* make sure object fit at this offset */ - eoffset = soffset + size; - if (soffset >= eoffset) { - return -EINVAL; - } - - last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; - if (last_pfn > rdev->vm_manager.max_pfn) { - dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", - last_pfn, rdev->vm_manager.max_pfn); - return -EINVAL; - } - - } else { - eoffset = last_pfn = 0; - } - - mutex_lock(&vm->mutex); - head = &vm->va; - last_offset = 0; - list_for_each_entry(tmp, &vm->va, vm_list) { - if (bo_va == tmp) { - /* skip over currently modified bo */ - continue; - } - - if (soffset >= last_offset && eoffset <= tmp->soffset) { - /* bo can be added before this one */ - break; - } - if (eoffset > tmp->soffset && soffset < tmp->eoffset) { - /* bo and tmp overlap, invalid offset */ - dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", - bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, - (unsigned)tmp->soffset, (unsigned)tmp->eoffset); - mutex_unlock(&vm->mutex); - return -EINVAL; - } - last_offset = tmp->eoffset; - head = &tmp->vm_list; - } - - bo_va->soffset = soffset; - bo_va->eoffset = eoffset; - bo_va->flags = flags; - bo_va->valid = false; - list_move(&bo_va->vm_list, head); - - mutex_unlock(&vm->mutex); - return 0; -} - -/** - * radeon_vm_map_gart - get the physical address of a gart page - * - * @rdev: radeon_device pointer - * @addr: the unmapped addr - * - * Look up the physical address of the page that the pte resolves - * to (cayman+). - * Returns the physical address of the page. - */ -uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) -{ - uint64_t result; - - /* page table offset */ - result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; - - /* in case cpu page size != gpu page size*/ - result |= addr & (~PAGE_MASK); - - return result; -} - -/** - * radeon_vm_page_flags - translate page flags to what the hw uses - * - * @flags: flags comming from userspace - * - * Translate the flags the userspace ABI uses to hw flags. - */ -static uint32_t radeon_vm_page_flags(uint32_t flags) -{ - uint32_t hw_flags = 0; - hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; - hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; - hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; - if (flags & RADEON_VM_PAGE_SYSTEM) { - hw_flags |= R600_PTE_SYSTEM; - hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; - } - return hw_flags; -} - -/** - * radeon_vm_update_pdes - make sure that page directory is valid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * - * Allocates new page tables if necessary - * and updates the page directory (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -static int radeon_vm_update_pdes(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_ib *ib, - uint64_t start, uint64_t end) -{ - static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; - - uint64_t last_pde = ~0, last_pt = ~0; - unsigned count = 0; - uint64_t pt_idx; - int r; - - start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; - end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; - - /* walk over the address space and update the page directory */ - for (pt_idx = start; pt_idx <= end; ++pt_idx) { - uint64_t pde, pt; - - if (vm->page_tables[pt_idx]) - continue; - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_tables[pt_idx], - RADEON_VM_PTE_COUNT * 8, - RADEON_GPU_PAGE_SIZE, false); - - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - } else if (r) { - return r; - } - - pde = vm->pd_gpu_addr + pt_idx * 8; - - pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); - - if (((last_pde + 8 * count) != pde) || - ((last_pt + incr * count) != pt)) { - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, - last_pt, count, incr, - R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); - } - - count = 1; - last_pde = pde; - last_pt = pt; - } else { - ++count; - } - } - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, - incr, R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); - } - - return 0; -} - -/** - * radeon_vm_update_ptes - make sure that page tables are valid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * @dst: destination address to map to - * @flags: mapping flags - * - * Update the page tables in the range @start - @end (cayman+). - * - * Global and local mutex must be locked! - */ -static void radeon_vm_update_ptes(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_ib *ib, - uint64_t start, uint64_t end, - uint64_t dst, uint32_t flags) -{ - static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; - - uint64_t last_pte = ~0, last_dst = ~0; - unsigned count = 0; - uint64_t addr; - - start = start / RADEON_GPU_PAGE_SIZE; - end = end / RADEON_GPU_PAGE_SIZE; - - /* walk over the address space and update the page tables */ - for (addr = start; addr < end; ) { - uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; - unsigned nptes; - uint64_t pte; - - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = RADEON_VM_PTE_COUNT - (addr & mask); - - pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); - pte += (addr & mask) * 8; - - if ((last_pte + 8 * count) != pte) { - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pte, - last_dst, count, - RADEON_GPU_PAGE_SIZE, - flags); - } - - count = nptes; - last_pte = pte; - last_dst = dst; - } else { - count += nptes; - } - - addr += nptes; - dst += nptes * RADEON_GPU_PAGE_SIZE; - } - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pte, - last_dst, count, - RADEON_GPU_PAGE_SIZE, flags); - } -} - -/** - * radeon_vm_bo_update - map a bo into the vm page table - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * @mem: ttm mem - * - * Fill in the page table entries for @bo (cayman+). - * Returns 0 for success, -EINVAL for failure. - * - * Object have to be reserved & global and local mutex must be locked! - */ -int radeon_vm_bo_update(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_bo *bo, - struct ttm_mem_reg *mem) -{ - struct radeon_ib ib; - struct radeon_bo_va *bo_va; - unsigned nptes, npdes, ndw; - uint64_t addr; - int r; - - /* nothing to do if vm isn't bound */ - if (vm->page_directory == NULL) - return 0; - - bo_va = radeon_vm_bo_find(vm, bo); - if (bo_va == NULL) { - dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); - return -EINVAL; - } - - if (!bo_va->soffset) { - dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", - bo, vm); - return -EINVAL; - } - - if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) - return 0; - - bo_va->flags &= ~RADEON_VM_PAGE_VALID; - bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; - if (mem) { - addr = mem->start << PAGE_SHIFT; - if (mem->mem_type != TTM_PL_SYSTEM) { - bo_va->flags |= RADEON_VM_PAGE_VALID; - bo_va->valid = true; - } - if (mem->mem_type == TTM_PL_TT) { - bo_va->flags |= RADEON_VM_PAGE_SYSTEM; - } else { - addr += rdev->vm_manager.vram_base_offset; - } - } else { - addr = 0; - bo_va->valid = false; - } - - trace_radeon_vm_bo_update(bo_va); - - nptes = radeon_bo_ngpu_pages(bo); - - /* assume two extra pdes in case the mapping overlaps the borders */ - npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; - - /* padding, etc. */ - ndw = 64; - - if (RADEON_VM_BLOCK_SIZE > 11) - /* reserve space for one header for every 2k dwords */ - ndw += (nptes >> 11) * 4; - else - /* reserve space for one header for - every (1 << BLOCK_SIZE) entries */ - ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; - - /* reserve space for pte addresses */ - ndw += nptes * 2; - - /* reserve space for one header for every 2k dwords */ - ndw += (npdes >> 11) * 4; - - /* reserve space for pde addresses */ - ndw += npdes * 2; - - /* reserve space for clearing new page tables */ - ndw += npdes * 2 * RADEON_VM_PTE_COUNT; - - /* update too big for an IB */ - if (ndw > 0xfffff) - return -ENOMEM; - - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); - if (r) - return r; - ib.length_dw = 0; - - r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); - if (r) { - radeon_ib_free(rdev, &ib); - return r; - } - - radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, - addr, radeon_vm_page_flags(bo_va->flags)); - - radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush); - - return 0; -} - -/** - * radeon_vm_bo_rmv - remove a bo to a specific vm - * - * @rdev: radeon_device pointer - * @bo_va: requested bo_va - * - * Remove @bo_va->bo from the requested vm (cayman+). - * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and - * remove the ptes for @bo_va in the page table. - * Returns 0 for success. - * - * Object have to be reserved! - */ -int radeon_vm_bo_rmv(struct radeon_device *rdev, - struct radeon_bo_va *bo_va) -{ - int r = 0; - - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&bo_va->vm->mutex); - if (bo_va->soffset) { - r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); - } - mutex_unlock(&rdev->vm_manager.lock); - list_del(&bo_va->vm_list); - mutex_unlock(&bo_va->vm->mutex); - list_del(&bo_va->bo_list); - - kfree(bo_va); - return r; -} - -/** - * radeon_vm_bo_invalidate - mark the bo as invalid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * - * Mark @bo as invalid (cayman+). - */ -void radeon_vm_bo_invalidate(struct radeon_device *rdev, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - list_for_each_entry(bo_va, &bo->va, bo_list) { - bo_va->valid = false; - } -} - -/** - * radeon_vm_init - initialize a vm instance - * - * @rdev: radeon_device pointer - * @vm: requested vm - * - * Init @vm fields (cayman+). - */ -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) -{ - vm->id = 0; - vm->fence = NULL; - vm->last_flush = NULL; - vm->last_id_use = NULL; - mutex_init(&vm->mutex); - INIT_LIST_HEAD(&vm->list); - INIT_LIST_HEAD(&vm->va); -} - -/** - * radeon_vm_fini - tear down a vm instance - * - * @rdev: radeon_device pointer - * @vm: requested vm - * - * Tear down @vm (cayman+). - * Unbind the VM and remove all bos from the vm bo list - */ -void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va, *tmp; - int r; - - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&rdev->vm_manager.lock); - - if (!list_empty(&vm->va)) { - dev_err(rdev->dev, "still active bo inside vm\n"); - } - list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { - list_del_init(&bo_va->vm_list); - r = radeon_bo_reserve(bo_va->bo, false); - if (!r) { - list_del_init(&bo_va->bo_list); - radeon_bo_unreserve(bo_va->bo); - kfree(bo_va); - } - } - radeon_fence_unref(&vm->fence); - radeon_fence_unref(&vm->last_flush); - radeon_fence_unref(&vm->last_id_use); - mutex_unlock(&vm->mutex); -} diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index b96c819024b..d09650c1d72 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -344,18 +344,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, } robj = gem_to_radeon_bo(gobj); r = radeon_bo_wait(robj, &cur_placement, true); - switch (cur_placement) { - case TTM_PL_VRAM: - args->domain = RADEON_GEM_DOMAIN_VRAM; - break; - case TTM_PL_TT: - args->domain = RADEON_GEM_DOMAIN_GTT; - break; - case TTM_PL_SYSTEM: - args->domain = RADEON_GEM_DOMAIN_CPU; - default: - break; - } + args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_unreference_unlocked(gobj); r = radeon_gem_handle_lockup(rdev, r); return r; @@ -533,6 +522,42 @@ out: return r; } +int radeon_gem_op_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct drm_radeon_gem_op *args = data; + struct drm_gem_object *gobj; + struct radeon_bo *robj; + int r; + + gobj = drm_gem_object_lookup(dev, filp, args->handle); + if (gobj == NULL) { + return -ENOENT; + } + robj = gem_to_radeon_bo(gobj); + r = radeon_bo_reserve(robj, false); + if (unlikely(r)) + goto out; + + switch (args->op) { + case RADEON_GEM_OP_GET_INITIAL_DOMAIN: + args->value = robj->initial_domain; + break; + case RADEON_GEM_OP_SET_INITIAL_DOMAIN: + robj->initial_domain = args->value & (RADEON_GEM_DOMAIN_VRAM | + RADEON_GEM_DOMAIN_GTT | + RADEON_GEM_DOMAIN_CPU); + break; + default: + r = -EINVAL; + } + + radeon_bo_unreserve(robj); +out: + drm_gem_object_unreference_unlocked(gobj); + return r; +} + int radeon_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 114d1672d61..37b1deacb5b 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -433,6 +433,9 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file case RADEON_CS_RING_UVD: *value = rdev->ring[R600_RING_TYPE_UVD_INDEX].ready; break; + case RADEON_CS_RING_VCE: + *value = rdev->ring[TN_RING_TYPE_VCE1_INDEX].ready; + break; default: return -EINVAL; } @@ -477,6 +480,27 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file else *value = rdev->pm.default_sclk * 10; break; + case RADEON_INFO_VCE_FW_VERSION: + *value = rdev->vce.fw_version; + break; + case RADEON_INFO_VCE_FB_VERSION: + *value = rdev->vce.fb_version; + break; + case RADEON_INFO_NUM_BYTES_MOVED: + value = (uint32_t*)&value64; + value_size = sizeof(uint64_t); + value64 = atomic64_read(&rdev->num_bytes_moved); + break; + case RADEON_INFO_VRAM_USAGE: + value = (uint32_t*)&value64; + value_size = sizeof(uint64_t); + value64 = atomic64_read(&rdev->vram_usage); + break; + case RADEON_INFO_GTT_USAGE: + value = (uint32_t*)&value64; + value_size = sizeof(uint64_t); + value64 = atomic64_read(&rdev->gtt_usage); + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; @@ -535,7 +559,13 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) return -ENOMEM; } - radeon_vm_init(rdev, &fpriv->vm); + r = radeon_vm_init(rdev, &fpriv->vm); + if (r) + return r; + + r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); + if (r) + return r; /* map the ib pool buffer read only into * virtual address space */ @@ -544,6 +574,8 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); + + radeon_bo_unreserve(rdev->ring_tmp_bo.bo); if (r) { radeon_vm_fini(rdev, &fpriv->vm); kfree(fpriv); @@ -610,6 +642,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev, if (rdev->cmask_filp == file_priv) rdev->cmask_filp = NULL; radeon_uvd_free_handles(rdev, file_priv); + radeon_vce_free_handles(rdev, file_priv); } /* @@ -804,5 +837,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 08595cf90b0..ca79431b2c1 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -56,11 +56,36 @@ static void radeon_bo_clear_va(struct radeon_bo *bo) } } +static void radeon_update_memory_usage(struct radeon_bo *bo, + unsigned mem_type, int sign) +{ + struct radeon_device *rdev = bo->rdev; + u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT; + + switch (mem_type) { + case TTM_PL_TT: + if (sign > 0) + atomic64_add(size, &rdev->gtt_usage); + else + atomic64_sub(size, &rdev->gtt_usage); + break; + case TTM_PL_VRAM: + if (sign > 0) + atomic64_add(size, &rdev->vram_usage); + else + atomic64_sub(size, &rdev->vram_usage); + break; + } +} + static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct radeon_bo *bo; bo = container_of(tbo, struct radeon_bo, tbo); + + radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); + mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); mutex_unlock(&bo->rdev->gem.mutex); @@ -145,6 +170,9 @@ int radeon_bo_create(struct radeon_device *rdev, bo->surface_reg = -1; INIT_LIST_HEAD(&bo->list); INIT_LIST_HEAD(&bo->va); + bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM | + RADEON_GEM_DOMAIN_GTT | + RADEON_GEM_DOMAIN_CPU); radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */ down_read(&rdev->pm.mclk_lock); @@ -338,39 +366,105 @@ void radeon_bo_fini(struct radeon_device *rdev) arch_phys_wc_del(rdev->mc.vram_mtrr); } -void radeon_bo_list_add_object(struct radeon_bo_list *lobj, - struct list_head *head) +/* Returns how many bytes TTM can move per IB. + */ +static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev) { - if (lobj->written) { - list_add(&lobj->tv.head, head); - } else { - list_add_tail(&lobj->tv.head, head); - } + u64 real_vram_size = rdev->mc.real_vram_size; + u64 vram_usage = atomic64_read(&rdev->vram_usage); + + /* This function is based on the current VRAM usage. + * + * - If all of VRAM is free, allow relocating the number of bytes that + * is equal to 1/4 of the size of VRAM for this IB. + + * - If more than one half of VRAM is occupied, only allow relocating + * 1 MB of data for this IB. + * + * - From 0 to one half of used VRAM, the threshold decreases + * linearly. + * __________________ + * 1/4 of -|\ | + * VRAM | \ | + * | \ | + * | \ | + * | \ | + * | \ | + * | \ | + * | \________|1 MB + * |----------------| + * VRAM 0 % 100 % + * used used + * + * Note: It's a threshold, not a limit. The threshold must be crossed + * for buffer relocations to stop, so any buffer of an arbitrary size + * can be moved as long as the threshold isn't crossed before + * the relocation takes place. We don't want to disable buffer + * relocations completely. + * + * The idea is that buffers should be placed in VRAM at creation time + * and TTM should only do a minimum number of relocations during + * command submission. In practice, you need to submit at least + * a dozen IBs to move all buffers to VRAM if they are in GTT. + * + * Also, things can get pretty crazy under memory pressure and actual + * VRAM usage can change a lot, so playing safe even at 50% does + * consistently increase performance. + */ + + u64 half_vram = real_vram_size >> 1; + u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; + u64 bytes_moved_threshold = half_free_vram >> 1; + return max(bytes_moved_threshold, 1024*1024ull); } -int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, +int radeon_bo_list_validate(struct radeon_device *rdev, + struct ww_acquire_ctx *ticket, struct list_head *head, int ring) { - struct radeon_bo_list *lobj; + struct radeon_cs_reloc *lobj; struct radeon_bo *bo; - u32 domain; int r; + u64 bytes_moved = 0, initial_bytes_moved; + u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); r = ttm_eu_reserve_buffers(ticket, head); if (unlikely(r != 0)) { return r; } + list_for_each_entry(lobj, head, tv.head) { - bo = lobj->bo; + bo = lobj->robj; if (!bo->pin_count) { - domain = lobj->domain; - + u32 domain = lobj->domain; + u32 current_domain = + radeon_mem_type_to_domain(bo->tbo.mem.mem_type); + + /* Check if this buffer will be moved and don't move it + * if we have moved too many buffers for this IB already. + * + * Note that this allows moving at least one buffer of + * any size, because it doesn't take the current "bo" + * into account. We don't want to disallow buffer moves + * completely. + */ + if (current_domain != RADEON_GEM_DOMAIN_CPU && + (domain & current_domain) == 0 && /* will be moved */ + bytes_moved > bytes_moved_threshold) { + /* don't move it */ + domain = current_domain; + } + retry: radeon_ttm_placement_from_domain(bo, domain); if (ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_force_into_uvd_segment(bo); - r = ttm_bo_validate(&bo->tbo, &bo->placement, - true, false); + + initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + bytes_moved += atomic64_read(&rdev->num_bytes_moved) - + initial_bytes_moved; + if (unlikely(r)) { if (r != -ERESTARTSYS && domain != lobj->alt_domain) { domain = lobj->alt_domain; @@ -564,14 +658,23 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, } void radeon_bo_move_notify(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem) + struct ttm_mem_reg *new_mem) { struct radeon_bo *rbo; + if (!radeon_ttm_bo_is_radeon_bo(bo)) return; + rbo = container_of(bo, struct radeon_bo, tbo); radeon_bo_check_tiling(rbo, 0, 1); radeon_vm_bo_invalidate(rbo->rdev, rbo); + + /* update statistics */ + if (!new_mem) + return; + + radeon_update_memory_usage(rbo, bo->mem.mem_type, -1); + radeon_update_memory_usage(rbo, new_mem->mem_type, 1); } int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 209b1115026..9e7b25a0629 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -138,9 +138,8 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev); extern void radeon_bo_force_delete(struct radeon_device *rdev); extern int radeon_bo_init(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev); -extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, - struct list_head *head); -extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, +extern int radeon_bo_list_validate(struct radeon_device *rdev, + struct ww_acquire_ctx *ticket, struct list_head *head, int ring); extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, struct vm_area_struct *vma); @@ -151,7 +150,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo, extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, bool force_drop); extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem); + struct ttm_mem_reg *new_mem); extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); @@ -181,7 +180,7 @@ extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev, extern int radeon_sa_bo_new(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager, struct radeon_sa_bo **sa_bo, - unsigned size, unsigned align, bool block); + unsigned size, unsigned align); extern void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, struct radeon_fence *fence); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 8e8153e471c..ee738a52463 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -260,7 +260,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) if (!ring->ready) { continue; } - r = radeon_fence_wait_empty_locked(rdev, i); + r = radeon_fence_wait_empty(rdev, i); if (r) { /* needs a GPU reset dont reset here */ mutex_unlock(&rdev->ring_lock); @@ -826,6 +826,9 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) /* no need to reprogram if nothing changed unless we are on BTC+ */ if (rdev->pm.dpm.current_ps == rdev->pm.dpm.requested_ps) { + /* vce just modifies an existing state so force a change */ + if (ps->vce_active != rdev->pm.dpm.vce_active) + goto force; if ((rdev->family < CHIP_BARTS) || (rdev->flags & RADEON_IS_IGP)) { /* for pre-BTC and APUs if the num crtcs changed but state is the same, * all we need to do is update the display configuration. @@ -862,16 +865,21 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) } } +force: if (radeon_dpm == 1) { printk("switching from power state:\n"); radeon_dpm_print_power_state(rdev, rdev->pm.dpm.current_ps); printk("switching to power state:\n"); radeon_dpm_print_power_state(rdev, rdev->pm.dpm.requested_ps); } + mutex_lock(&rdev->ddev->struct_mutex); down_write(&rdev->pm.mclk_lock); mutex_lock(&rdev->ring_lock); + /* update whether vce is active */ + ps->vce_active = rdev->pm.dpm.vce_active; + ret = radeon_dpm_pre_set_power_state(rdev); if (ret) goto done; @@ -888,7 +896,7 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) for (i = 0; i < RADEON_NUM_RINGS; i++) { struct radeon_ring *ring = &rdev->ring[i]; if (ring->ready) - radeon_fence_wait_empty_locked(rdev, i); + radeon_fence_wait_empty(rdev, i); } /* program the new power state */ @@ -935,8 +943,6 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) if (enable) { mutex_lock(&rdev->pm.mutex); rdev->pm.dpm.uvd_active = true; - /* disable this for now */ -#if 0 if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0)) dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD; else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0)) @@ -946,7 +952,6 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2)) dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2; else -#endif dpm_state = POWER_STATE_TYPE_INTERNAL_UVD; rdev->pm.dpm.state = dpm_state; mutex_unlock(&rdev->pm.mutex); @@ -960,6 +965,23 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) } } +void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable) +{ + if (enable) { + mutex_lock(&rdev->pm.mutex); + rdev->pm.dpm.vce_active = true; + /* XXX select vce level based on ring/task */ + rdev->pm.dpm.vce_level = RADEON_VCE_LEVEL_AC_ALL; + mutex_unlock(&rdev->pm.mutex); + } else { + mutex_lock(&rdev->pm.mutex); + rdev->pm.dpm.vce_active = false; + mutex_unlock(&rdev->pm.mutex); + } + + radeon_pm_compute_clocks(rdev); +} + static void radeon_pm_suspend_old(struct radeon_device *rdev) { mutex_lock(&rdev->pm.mutex); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 1b783f0e6d3..8b0dfdd2379 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -63,7 +63,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, { int r; - r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); + r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256); if (r) { dev_err(rdev->dev, "failed to get a new IB (%d)\n", r); return r; @@ -139,12 +139,19 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, } /* 64 dwords should be enough for fence too */ - r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8); + r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_SYNCS * 8); if (r) { dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } + /* grab a vm id if necessary */ + if (ib->vm) { + struct radeon_fence *vm_id_fence; + vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring); + radeon_semaphore_sync_to(ib->semaphore, vm_id_fence); + } + /* sync with other rings */ r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); if (r) { @@ -153,11 +160,9 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, return r; } - /* if we can't remember our last VM flush then flush now! */ - /* XXX figure out why we have to flush for every IB */ - if (ib->vm /*&& !ib->vm->last_flush*/) { - radeon_ring_vm_flush(rdev, ib->ring, ib->vm); - } + if (ib->vm) + radeon_vm_flush(rdev, ib->vm, ib->ring); + if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); @@ -172,10 +177,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (const_ib) { const_ib->fence = radeon_fence_ref(ib->fence); } - /* we just flushed the VM, remember that */ - if (ib->vm && !ib->vm->last_flush) { - ib->vm->last_flush = radeon_fence_ref(ib->fence); - } + + if (ib->vm) + radeon_vm_fence(rdev, ib->vm, ib->fence); + radeon_ring_unlock_commit(rdev, ring); return 0; } @@ -342,13 +347,17 @@ bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev, */ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) { - ring->rptr = radeon_ring_get_rptr(rdev, ring); + uint32_t rptr = radeon_ring_get_rptr(rdev, ring); + /* This works because ring_size is a power of 2 */ - ring->ring_free_dw = (ring->rptr + (ring->ring_size / 4)); + ring->ring_free_dw = rptr + (ring->ring_size / 4); ring->ring_free_dw -= ring->wptr; ring->ring_free_dw &= ring->ptr_mask; if (!ring->ring_free_dw) { + /* this is an empty ring */ ring->ring_free_dw = ring->ring_size / 4; + /* update lockup info to avoid false positive */ + radeon_ring_lockup_update(rdev, ring); } } @@ -372,19 +381,13 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi /* Align requested size with padding so unlock_commit can * pad safely */ radeon_ring_free_size(rdev, ring); - if (ring->ring_free_dw == (ring->ring_size / 4)) { - /* This is an empty ring update lockup info to avoid - * false positive. - */ - radeon_ring_lockup_update(ring); - } ndw = (ndw + ring->align_mask) & ~ring->align_mask; while (ndw > (ring->ring_free_dw - 1)) { radeon_ring_free_size(rdev, ring); if (ndw < ring->ring_free_dw) { break; } - r = radeon_fence_wait_next_locked(rdev, ring->idx); + r = radeon_fence_wait_next(rdev, ring->idx); if (r) return r; } @@ -478,39 +481,17 @@ void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *rin } /** - * radeon_ring_force_activity - add some nop packets to the ring - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Add some nop packets to the ring to force activity (all asics). - * Used for lockup detection to see if the rptr is advancing. - */ -void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring) -{ - int r; - - radeon_ring_free_size(rdev, ring); - if (ring->rptr == ring->wptr) { - r = radeon_ring_alloc(rdev, ring, 1); - if (!r) { - radeon_ring_write(ring, ring->nop); - radeon_ring_commit(rdev, ring); - } - } -} - -/** * radeon_ring_lockup_update - update lockup variables * * @ring: radeon_ring structure holding ring information * * Update the last rptr value and timestamp (all asics). */ -void radeon_ring_lockup_update(struct radeon_ring *ring) +void radeon_ring_lockup_update(struct radeon_device *rdev, + struct radeon_ring *ring) { - ring->last_rptr = ring->rptr; - ring->last_activity = jiffies; + atomic_set(&ring->last_rptr, radeon_ring_get_rptr(rdev, ring)); + atomic64_set(&ring->last_activity, jiffies_64); } /** @@ -518,40 +499,23 @@ void radeon_ring_lockup_update(struct radeon_ring *ring) * @rdev: radeon device structure * @ring: radeon_ring structure holding ring information * - * We don't need to initialize the lockup tracking information as we will either - * have CP rptr to a different value of jiffies wrap around which will force - * initialization of the lockup tracking informations. - * - * A possible false positivie is if we get call after while and last_cp_rptr == - * the current CP rptr, even if it's unlikely it might happen. To avoid this - * if the elapsed time since last call is bigger than 2 second than we return - * false and update the tracking information. Due to this the caller must call - * radeon_ring_test_lockup several time in less than 2sec for lockup to be reported - * the fencing code should be cautious about that. - * - * Caller should write to the ring to force CP to do something so we don't get - * false positive when CP is just gived nothing to do. - * - **/ + */ bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring) { - unsigned long cjiffies, elapsed; + uint32_t rptr = radeon_ring_get_rptr(rdev, ring); + uint64_t last = atomic64_read(&ring->last_activity); + uint64_t elapsed; - cjiffies = jiffies; - if (!time_after(cjiffies, ring->last_activity)) { - /* likely a wrap around */ - radeon_ring_lockup_update(ring); + if (rptr != atomic_read(&ring->last_rptr)) { + /* ring is still working, no lockup */ + radeon_ring_lockup_update(rdev, ring); return false; } - ring->rptr = radeon_ring_get_rptr(rdev, ring); - if (ring->rptr != ring->last_rptr) { - /* CP is still working no lockup */ - radeon_ring_lockup_update(ring); - return false; - } - elapsed = jiffies_to_msecs(cjiffies - ring->last_activity); + + elapsed = jiffies_to_msecs(jiffies_64 - last); if (radeon_lockup_timeout && elapsed >= radeon_lockup_timeout) { - dev_err(rdev->dev, "GPU lockup CP stall for more than %lumsec\n", elapsed); + dev_err(rdev->dev, "ring %d stalled for more than %llumsec\n", + ring->idx, elapsed); return true; } /* give a chance to the GPU ... */ @@ -709,7 +673,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig if (radeon_debugfs_ring_init(rdev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); } - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return 0; } @@ -780,8 +744,6 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", ring->wptr, ring->wptr); - seq_printf(m, "driver's copy of the rptr: 0x%08x [%5d]\n", - ring->rptr, ring->rptr); seq_printf(m, "last semaphore signal addr : 0x%016llx\n", ring->last_semaphore_signal_addr); seq_printf(m, "last semaphore wait addr : 0x%016llx\n", @@ -814,6 +776,8 @@ static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX; static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX; +static int si_vce1_index = TN_RING_TYPE_VCE1_INDEX; +static int si_vce2_index = TN_RING_TYPE_VCE2_INDEX; static struct drm_info_list radeon_debugfs_ring_info_list[] = { {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index}, @@ -822,6 +786,8 @@ static struct drm_info_list radeon_debugfs_ring_info_list[] = { {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index}, {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index}, {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index}, + {"radeon_ring_vce1", radeon_debugfs_ring_info, 0, &si_vce1_index}, + {"radeon_ring_vce2", radeon_debugfs_ring_info, 0, &si_vce2_index}, }; static int radeon_debugfs_sa_info(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index c0625805cdd..adcf3e2f07d 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -312,7 +312,7 @@ static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager, int radeon_sa_bo_new(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager, struct radeon_sa_bo **sa_bo, - unsigned size, unsigned align, bool block) + unsigned size, unsigned align) { struct radeon_fence *fences[RADEON_NUM_RINGS]; unsigned tries[RADEON_NUM_RINGS]; @@ -353,14 +353,11 @@ int radeon_sa_bo_new(struct radeon_device *rdev, r = radeon_fence_wait_any(rdev, fences, false); spin_lock(&sa_manager->wq.lock); /* if we have nothing to wait for block */ - if (r == -ENOENT && block) { + if (r == -ENOENT) { r = wait_event_interruptible_locked( sa_manager->wq, radeon_sa_event(sa_manager, size, align) ); - - } else if (r == -ENOENT) { - r = -ENOMEM; } } while (!r); diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 2b42aa1914f..dbd6bcde92d 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -34,14 +34,15 @@ int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) { + uint32_t *cpu_addr; int i, r; *semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL); if (*semaphore == NULL) { return -ENOMEM; } - r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, - &(*semaphore)->sa_bo, 8, 8, true); + r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo, + 8 * RADEON_NUM_SYNCS, 8); if (r) { kfree(*semaphore); *semaphore = NULL; @@ -49,7 +50,10 @@ int radeon_semaphore_create(struct radeon_device *rdev, } (*semaphore)->waiters = 0; (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo); - *((uint64_t*)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0; + + cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo); + for (i = 0; i < RADEON_NUM_SYNCS; ++i) + cpu_addr[i] = 0; for (i = 0; i < RADEON_NUM_RINGS; ++i) (*semaphore)->sync_to[i] = NULL; @@ -125,6 +129,7 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, int ring) { + unsigned count = 0; int i, r; for (i = 0; i < RADEON_NUM_RINGS; ++i) { @@ -140,6 +145,14 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, return -EINVAL; } + if (++count > RADEON_NUM_SYNCS) { + /* not enough room, wait manually */ + r = radeon_fence_wait(fence, false); + if (r) + return r; + continue; + } + /* allocate enough space for sync command */ r = radeon_ring_alloc(rdev, &rdev->ring[i], 16); if (r) { @@ -150,7 +163,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { /* signaling wasn't successful wait manually */ radeon_ring_undo(&rdev->ring[i]); - radeon_fence_wait_locked(fence); + r = radeon_fence_wait(fence, false); + if (r) + return r; continue; } @@ -158,12 +173,16 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { /* waiting wasn't successful wait manually */ radeon_ring_undo(&rdev->ring[i]); - radeon_fence_wait_locked(fence); + r = radeon_fence_wait(fence, false); + if (r) + return r; continue; } radeon_ring_commit(rdev, &rdev->ring[i]); radeon_fence_note_sync(fence, ring); + + semaphore->gpu_addr += 8; } return 0; diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 12e8099a082..3a13e0d1055 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -257,20 +257,36 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_fence **fence) { + uint32_t handle = ring->idx ^ 0xdeafbeef; int r; if (ring->idx == R600_RING_TYPE_UVD_INDEX) { - r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); + r = radeon_uvd_get_create_msg(rdev, ring->idx, handle, NULL); if (r) { DRM_ERROR("Failed to get dummy create msg\n"); return r; } - r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence); + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, handle, fence); if (r) { DRM_ERROR("Failed to get dummy destroy msg\n"); return r; } + + } else if (ring->idx == TN_RING_TYPE_VCE1_INDEX || + ring->idx == TN_RING_TYPE_VCE2_INDEX) { + r = radeon_vce_get_create_msg(rdev, ring->idx, handle, NULL); + if (r) { + DRM_ERROR("Failed to get dummy create msg\n"); + return r; + } + + r = radeon_vce_get_destroy_msg(rdev, ring->idx, handle, fence); + if (r) { + DRM_ERROR("Failed to get dummy destroy msg\n"); + return r; + } + } else { r = radeon_ring_lock(rdev, ring, 64); if (r) { @@ -486,6 +502,16 @@ out_cleanup: printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); } +static bool radeon_test_sync_possible(struct radeon_ring *ringA, + struct radeon_ring *ringB) +{ + if (ringA->idx == TN_RING_TYPE_VCE2_INDEX && + ringB->idx == TN_RING_TYPE_VCE1_INDEX) + return false; + + return true; +} + void radeon_test_syncing(struct radeon_device *rdev) { int i, j, k; @@ -500,6 +526,9 @@ void radeon_test_syncing(struct radeon_device *rdev) if (!ringB->ready) continue; + if (!radeon_test_sync_possible(ringA, ringB)) + continue; + DRM_INFO("Testing syncing between rings %d and %d...\n", i, j); radeon_test_ring_sync(rdev, ringA, ringB); @@ -511,6 +540,12 @@ void radeon_test_syncing(struct radeon_device *rdev) if (!ringC->ready) continue; + if (!radeon_test_sync_possible(ringA, ringC)) + continue; + + if (!radeon_test_sync_possible(ringB, ringC)) + continue; + DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k); radeon_test_ring_sync2(rdev, ringA, ringB, ringC); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 77f5b0c3edb..60dfce889ec 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -406,8 +406,14 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, if (r) { memcpy: r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); + if (r) { + return r; + } } - return r; + + /* update statistics */ + atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &rdev->num_bytes_moved); + return 0; } static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 6781fee1eaa..6a2e3ff6837 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -453,7 +453,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, } reloc = p->relocs_ptr[(idx / 4)]; - start = reloc->lobj.gpu_offset; + start = reloc->gpu_offset; end = start + radeon_bo_size(reloc->robj); start += offset; @@ -805,8 +805,7 @@ void radeon_uvd_note_usage(struct radeon_device *rdev) (rdev->pm.dpm.hd != hd)) { rdev->pm.dpm.sd = sd; rdev->pm.dpm.hd = hd; - /* disable this for now */ - /*streams_changed = true;*/ + streams_changed = true; } } diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c new file mode 100644 index 00000000000..76e9904bc53 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -0,0 +1,699 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Christian König <christian.koenig@amd.com> + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include <drm/drmP.h> +#include <drm/drm.h> + +#include "radeon.h" +#include "radeon_asic.h" +#include "sid.h" + +/* 1 second timeout */ +#define VCE_IDLE_TIMEOUT_MS 1000 + +/* Firmware Names */ +#define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin" + +MODULE_FIRMWARE(FIRMWARE_BONAIRE); + +static void radeon_vce_idle_work_handler(struct work_struct *work); + +/** + * radeon_vce_init - allocate memory, load vce firmware + * + * @rdev: radeon_device pointer + * + * First step to get VCE online, allocate memory and load the firmware + */ +int radeon_vce_init(struct radeon_device *rdev) +{ + static const char *fw_version = "[ATI LIB=VCEFW,"; + static const char *fb_version = "[ATI LIB=VCEFWSTATS,"; + unsigned long size; + const char *fw_name, *c; + uint8_t start, mid, end; + int i, r; + + INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler); + + switch (rdev->family) { + case CHIP_BONAIRE: + case CHIP_KAVERI: + case CHIP_KABINI: + fw_name = FIRMWARE_BONAIRE; + break; + + default: + return -EINVAL; + } + + r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev); + if (r) { + dev_err(rdev->dev, "radeon_vce: Can't load firmware \"%s\"\n", + fw_name); + return r; + } + + /* search for firmware version */ + + size = rdev->vce_fw->size - strlen(fw_version) - 9; + c = rdev->vce_fw->data; + for (;size > 0; --size, ++c) + if (strncmp(c, fw_version, strlen(fw_version)) == 0) + break; + + if (size == 0) + return -EINVAL; + + c += strlen(fw_version); + if (sscanf(c, "%2hhd.%2hhd.%2hhd]", &start, &mid, &end) != 3) + return -EINVAL; + + /* search for feedback version */ + + size = rdev->vce_fw->size - strlen(fb_version) - 3; + c = rdev->vce_fw->data; + for (;size > 0; --size, ++c) + if (strncmp(c, fb_version, strlen(fb_version)) == 0) + break; + + if (size == 0) + return -EINVAL; + + c += strlen(fb_version); + if (sscanf(c, "%2u]", &rdev->vce.fb_version) != 1) + return -EINVAL; + + DRM_INFO("Found VCE firmware/feedback version %hhd.%hhd.%hhd / %d!\n", + start, mid, end, rdev->vce.fb_version); + + rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8); + + /* we can only work with this fw version for now */ + if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) + return -EINVAL; + + /* allocate firmware, stack and heap BO */ + + size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + + RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; + r = radeon_bo_create(rdev, size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->vce.vcpu_bo); + if (r) { + dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r); + return r; + } + + r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); + if (r) { + radeon_bo_unref(&rdev->vce.vcpu_bo); + dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r); + return r; + } + + r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, + &rdev->vce.gpu_addr); + radeon_bo_unreserve(rdev->vce.vcpu_bo); + if (r) { + radeon_bo_unref(&rdev->vce.vcpu_bo); + dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r); + return r; + } + + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + atomic_set(&rdev->vce.handles[i], 0); + rdev->vce.filp[i] = NULL; + } + + return 0; +} + +/** + * radeon_vce_fini - free memory + * + * @rdev: radeon_device pointer + * + * Last step on VCE teardown, free firmware memory + */ +void radeon_vce_fini(struct radeon_device *rdev) +{ + if (rdev->vce.vcpu_bo == NULL) + return; + + radeon_bo_unref(&rdev->vce.vcpu_bo); + + release_firmware(rdev->vce_fw); +} + +/** + * radeon_vce_suspend - unpin VCE fw memory + * + * @rdev: radeon_device pointer + * + */ +int radeon_vce_suspend(struct radeon_device *rdev) +{ + int i; + + if (rdev->vce.vcpu_bo == NULL) + return 0; + + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) + if (atomic_read(&rdev->vce.handles[i])) + break; + + if (i == RADEON_MAX_VCE_HANDLES) + return 0; + + /* TODO: suspending running encoding sessions isn't supported */ + return -EINVAL; +} + +/** + * radeon_vce_resume - pin VCE fw memory + * + * @rdev: radeon_device pointer + * + */ +int radeon_vce_resume(struct radeon_device *rdev) +{ + void *cpu_addr; + int r; + + if (rdev->vce.vcpu_bo == NULL) + return -EINVAL; + + r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); + if (r) { + dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r); + return r; + } + + r = radeon_bo_kmap(rdev->vce.vcpu_bo, &cpu_addr); + if (r) { + radeon_bo_unreserve(rdev->vce.vcpu_bo); + dev_err(rdev->dev, "(%d) VCE map failed\n", r); + return r; + } + + memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); + + radeon_bo_kunmap(rdev->vce.vcpu_bo); + + radeon_bo_unreserve(rdev->vce.vcpu_bo); + + return 0; +} + +/** + * radeon_vce_idle_work_handler - power off VCE + * + * @work: pointer to work structure + * + * power of VCE when it's not used any more + */ +static void radeon_vce_idle_work_handler(struct work_struct *work) +{ + struct radeon_device *rdev = + container_of(work, struct radeon_device, vce.idle_work.work); + + if ((radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE1_INDEX) == 0) && + (radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE2_INDEX) == 0)) { + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + radeon_dpm_enable_vce(rdev, false); + } else { + radeon_set_vce_clocks(rdev, 0, 0); + } + } else { + schedule_delayed_work(&rdev->vce.idle_work, + msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); + } +} + +/** + * radeon_vce_note_usage - power up VCE + * + * @rdev: radeon_device pointer + * + * Make sure VCE is powerd up when we want to use it + */ +void radeon_vce_note_usage(struct radeon_device *rdev) +{ + bool streams_changed = false; + bool set_clocks = !cancel_delayed_work_sync(&rdev->vce.idle_work); + set_clocks &= schedule_delayed_work(&rdev->vce.idle_work, + msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); + + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + /* XXX figure out if the streams changed */ + streams_changed = false; + } + + if (set_clocks || streams_changed) { + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + radeon_dpm_enable_vce(rdev, true); + } else { + radeon_set_vce_clocks(rdev, 53300, 40000); + } + } +} + +/** + * radeon_vce_free_handles - free still open VCE handles + * + * @rdev: radeon_device pointer + * @filp: drm file pointer + * + * Close all VCE handles still open by this file pointer + */ +void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp) +{ + int i, r; + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + uint32_t handle = atomic_read(&rdev->vce.handles[i]); + if (!handle || rdev->vce.filp[i] != filp) + continue; + + radeon_vce_note_usage(rdev); + + r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX, + handle, NULL); + if (r) + DRM_ERROR("Error destroying VCE handle (%d)!\n", r); + + rdev->vce.filp[i] = NULL; + atomic_set(&rdev->vce.handles[i], 0); + } +} + +/** + * radeon_vce_get_create_msg - generate a VCE create msg + * + * @rdev: radeon_device pointer + * @ring: ring we should submit the msg to + * @handle: VCE session handle to use + * @fence: optional fence to return + * + * Open up a stream for HW test + */ +int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence) +{ + const unsigned ib_size_dw = 1024; + struct radeon_ib ib; + uint64_t dummy; + int i, r; + + r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + + dummy = ib.gpu_addr + 1024; + + /* stitch together an VCE create msg */ + ib.length_dw = 0; + ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ + ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ + ib.ptr[ib.length_dw++] = handle; + + ib.ptr[ib.length_dw++] = 0x00000030; /* len */ + ib.ptr[ib.length_dw++] = 0x01000001; /* create cmd */ + ib.ptr[ib.length_dw++] = 0x00000000; + ib.ptr[ib.length_dw++] = 0x00000042; + ib.ptr[ib.length_dw++] = 0x0000000a; + ib.ptr[ib.length_dw++] = 0x00000001; + ib.ptr[ib.length_dw++] = 0x00000080; + ib.ptr[ib.length_dw++] = 0x00000060; + ib.ptr[ib.length_dw++] = 0x00000100; + ib.ptr[ib.length_dw++] = 0x00000100; + ib.ptr[ib.length_dw++] = 0x0000000c; + ib.ptr[ib.length_dw++] = 0x00000000; + + ib.ptr[ib.length_dw++] = 0x00000014; /* len */ + ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ + ib.ptr[ib.length_dw++] = upper_32_bits(dummy); + ib.ptr[ib.length_dw++] = dummy; + ib.ptr[ib.length_dw++] = 0x00000001; + + for (i = ib.length_dw; i < ib_size_dw; ++i) + ib.ptr[i] = 0x0; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + } + + if (fence) + *fence = radeon_fence_ref(ib.fence); + + radeon_ib_free(rdev, &ib); + + return r; +} + +/** + * radeon_vce_get_destroy_msg - generate a VCE destroy msg + * + * @rdev: radeon_device pointer + * @ring: ring we should submit the msg to + * @handle: VCE session handle to use + * @fence: optional fence to return + * + * Close up a stream for HW test or if userspace failed to do so + */ +int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence) +{ + const unsigned ib_size_dw = 1024; + struct radeon_ib ib; + uint64_t dummy; + int i, r; + + r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + + dummy = ib.gpu_addr + 1024; + + /* stitch together an VCE destroy msg */ + ib.length_dw = 0; + ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ + ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ + ib.ptr[ib.length_dw++] = handle; + + ib.ptr[ib.length_dw++] = 0x00000014; /* len */ + ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ + ib.ptr[ib.length_dw++] = upper_32_bits(dummy); + ib.ptr[ib.length_dw++] = dummy; + ib.ptr[ib.length_dw++] = 0x00000001; + + ib.ptr[ib.length_dw++] = 0x00000008; /* len */ + ib.ptr[ib.length_dw++] = 0x02000001; /* destroy cmd */ + + for (i = ib.length_dw; i < ib_size_dw; ++i) + ib.ptr[i] = 0x0; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + } + + if (fence) + *fence = radeon_fence_ref(ib.fence); + + radeon_ib_free(rdev, &ib); + + return r; +} + +/** + * radeon_vce_cs_reloc - command submission relocation + * + * @p: parser context + * @lo: address of lower dword + * @hi: address of higher dword + * + * Patch relocation inside command stream with real buffer address + */ +int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi) +{ + struct radeon_cs_chunk *relocs_chunk; + uint64_t offset; + unsigned idx; + + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; + offset = radeon_get_ib_value(p, lo); + idx = radeon_get_ib_value(p, hi); + + if (idx >= relocs_chunk->length_dw) { + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", + idx, relocs_chunk->length_dw); + return -EINVAL; + } + + offset += p->relocs_ptr[(idx / 4)]->gpu_offset; + + p->ib.ptr[lo] = offset & 0xFFFFFFFF; + p->ib.ptr[hi] = offset >> 32; + + return 0; +} + +/** + * radeon_vce_cs_parse - parse and validate the command stream + * + * @p: parser context + * + */ +int radeon_vce_cs_parse(struct radeon_cs_parser *p) +{ + uint32_t handle = 0; + bool destroy = false; + int i, r; + + while (p->idx < p->chunks[p->chunk_ib_idx].length_dw) { + uint32_t len = radeon_get_ib_value(p, p->idx); + uint32_t cmd = radeon_get_ib_value(p, p->idx + 1); + + if ((len < 8) || (len & 3)) { + DRM_ERROR("invalid VCE command length (%d)!\n", len); + return -EINVAL; + } + + switch (cmd) { + case 0x00000001: // session + handle = radeon_get_ib_value(p, p->idx + 2); + break; + + case 0x00000002: // task info + case 0x01000001: // create + case 0x04000001: // config extension + case 0x04000002: // pic control + case 0x04000005: // rate control + case 0x04000007: // motion estimation + case 0x04000008: // rdo + break; + + case 0x03000001: // encode + r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9); + if (r) + return r; + + r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11); + if (r) + return r; + break; + + case 0x02000001: // destroy + destroy = true; + break; + + case 0x05000001: // context buffer + case 0x05000004: // video bitstream buffer + case 0x05000005: // feedback buffer + r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2); + if (r) + return r; + break; + + default: + DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); + return -EINVAL; + } + + p->idx += len / 4; + } + + if (destroy) { + /* IB contains a destroy msg, free the handle */ + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) + atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0); + + return 0; + } + + /* create or encode, validate the handle */ + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + if (atomic_read(&p->rdev->vce.handles[i]) == handle) + return 0; + } + + /* handle not found try to alloc a new one */ + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) { + p->rdev->vce.filp[i] = p->filp; + return 0; + } + } + + DRM_ERROR("No more free VCE handles!\n"); + return -EINVAL; +} + +/** + * radeon_vce_semaphore_emit - emit a semaphore command + * + * @rdev: radeon_device pointer + * @ring: engine to use + * @semaphore: address of semaphore + * @emit_wait: true=emit wait, false=emit signal + * + */ +bool radeon_vce_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + uint64_t addr = semaphore->gpu_addr; + + radeon_ring_write(ring, VCE_CMD_SEMAPHORE); + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); + radeon_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0)); + if (!emit_wait) + radeon_ring_write(ring, VCE_CMD_END); + + return true; +} + +/** + * radeon_vce_ib_execute - execute indirect buffer + * + * @rdev: radeon_device pointer + * @ib: the IB to execute + * + */ +void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + radeon_ring_write(ring, VCE_CMD_IB); + radeon_ring_write(ring, ib->gpu_addr); + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr)); + radeon_ring_write(ring, ib->length_dw); +} + +/** + * radeon_vce_fence_emit - add a fence command to the ring + * + * @rdev: radeon_device pointer + * @fence: the fence + * + */ +void radeon_vce_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr; + + radeon_ring_write(ring, VCE_CMD_FENCE); + radeon_ring_write(ring, addr); + radeon_ring_write(ring, upper_32_bits(addr)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, VCE_CMD_TRAP); + radeon_ring_write(ring, VCE_CMD_END); +} + +/** + * radeon_vce_ring_test - test if VCE ring is working + * + * @rdev: radeon_device pointer + * @ring: the engine to test on + * + */ +int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + uint32_t rptr = vce_v1_0_get_rptr(rdev, ring); + unsigned i; + int r; + + r = radeon_ring_lock(rdev, ring, 16); + if (r) { + DRM_ERROR("radeon: vce failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + radeon_ring_write(ring, VCE_CMD_END); + radeon_ring_unlock_commit(rdev, ring); + + for (i = 0; i < rdev->usec_timeout; i++) { + if (vce_v1_0_get_rptr(rdev, ring) != rptr) + break; + DRM_UDELAY(1); + } + + if (i < rdev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("radeon: ring %d test failed\n", + ring->idx); + r = -ETIMEDOUT; + } + + return r; +} + +/** + * radeon_vce_ib_test - test if VCE IBs are working + * + * @rdev: radeon_device pointer + * @ring: the engine to test on + * + */ +int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + struct radeon_fence *fence = NULL; + int r; + + r = radeon_vce_get_create_msg(rdev, ring->idx, 1, NULL); + if (r) { + DRM_ERROR("radeon: failed to get create msg (%d).\n", r); + goto error; + } + + r = radeon_vce_get_destroy_msg(rdev, ring->idx, 1, &fence); + if (r) { + DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); + goto error; + } + + r = radeon_fence_wait(fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + } else { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + } +error: + radeon_fence_unref(&fence); + return r; +} diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c new file mode 100644 index 00000000000..2aae6ce49d3 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -0,0 +1,966 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#include <drm/drmP.h> +#include <drm/radeon_drm.h> +#include "radeon.h" +#include "radeon_trace.h" + +/* + * GPUVM + * GPUVM is similar to the legacy gart on older asics, however + * rather than there being a single global gart table + * for the entire GPU, there are multiple VM page tables active + * at any given time. The VM page tables can contain a mix + * vram pages and system memory pages and system memory pages + * can be mapped as snooped (cached system pages) or unsnooped + * (uncached system pages). + * Each VM has an ID associated with it and there is a page table + * associated with each VMID. When execting a command buffer, + * the kernel tells the the ring what VMID to use for that command + * buffer. VMIDs are allocated dynamically as commands are submitted. + * The userspace drivers maintain their own address space and the kernel + * sets up their pages tables accordingly when they submit their + * command buffers and a VMID is assigned. + * Cayman/Trinity support up to 8 active VMs at any given time; + * SI supports 16. + */ + +/** + * radeon_vm_num_pde - return the number of page directory entries + * + * @rdev: radeon_device pointer + * + * Calculate the number of page directory entries (cayman+). + */ +static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) +{ + return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; +} + +/** + * radeon_vm_directory_size - returns the size of the page directory in bytes + * + * @rdev: radeon_device pointer + * + * Calculate the size of the page directory in bytes (cayman+). + */ +static unsigned radeon_vm_directory_size(struct radeon_device *rdev) +{ + return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); +} + +/** + * radeon_vm_manager_init - init the vm manager + * + * @rdev: radeon_device pointer + * + * Init the vm manager (cayman+). + * Returns 0 for success, error for failure. + */ +int radeon_vm_manager_init(struct radeon_device *rdev) +{ + int r; + + if (!rdev->vm_manager.enabled) { + r = radeon_asic_vm_init(rdev); + if (r) + return r; + + rdev->vm_manager.enabled = true; + } + return 0; +} + +/** + * radeon_vm_manager_fini - tear down the vm manager + * + * @rdev: radeon_device pointer + * + * Tear down the VM manager (cayman+). + */ +void radeon_vm_manager_fini(struct radeon_device *rdev) +{ + int i; + + if (!rdev->vm_manager.enabled) + return; + + for (i = 0; i < RADEON_NUM_VM; ++i) + radeon_fence_unref(&rdev->vm_manager.active[i]); + radeon_asic_vm_fini(rdev); + rdev->vm_manager.enabled = false; +} + +/** + * radeon_vm_get_bos - add the vm BOs to a validation list + * + * @vm: vm providing the BOs + * @head: head of validation list + * + * Add the page directory to the list of BOs to + * validate for command submission (cayman+). + */ +struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head) +{ + struct radeon_cs_reloc *list; + unsigned i, idx, size; + + size = (radeon_vm_num_pdes(rdev) + 1) * sizeof(struct radeon_cs_reloc); + list = kmalloc(size, GFP_KERNEL); + if (!list) + return NULL; + + /* add the vm page table to the list */ + list[0].gobj = NULL; + list[0].robj = vm->page_directory; + list[0].domain = RADEON_GEM_DOMAIN_VRAM; + list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM; + list[0].tv.bo = &vm->page_directory->tbo; + list[0].tiling_flags = 0; + list[0].handle = 0; + list_add(&list[0].tv.head, head); + + for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { + if (!vm->page_tables[i].bo) + continue; + + list[idx].gobj = NULL; + list[idx].robj = vm->page_tables[i].bo; + list[idx].domain = RADEON_GEM_DOMAIN_VRAM; + list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM; + list[idx].tv.bo = &list[idx].robj->tbo; + list[idx].tiling_flags = 0; + list[idx].handle = 0; + list_add(&list[idx++].tv.head, head); + } + + return list; +} + +/** + * radeon_vm_grab_id - allocate the next free VMID + * + * @rdev: radeon_device pointer + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * + * Allocate an id for the vm (cayman+). + * Returns the fence we need to sync to (if any). + * + * Global and local mutex must be locked! + */ +struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, + struct radeon_vm *vm, int ring) +{ + struct radeon_fence *best[RADEON_NUM_RINGS] = {}; + unsigned choices[2] = {}; + unsigned i; + + /* check if the id is still valid */ + if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) + return NULL; + + /* we definately need to flush */ + radeon_fence_unref(&vm->last_flush); + + /* skip over VMID 0, since it is the system VM */ + for (i = 1; i < rdev->vm_manager.nvm; ++i) { + struct radeon_fence *fence = rdev->vm_manager.active[i]; + + if (fence == NULL) { + /* found a free one */ + vm->id = i; + trace_radeon_vm_grab_id(vm->id, ring); + return NULL; + } + + if (radeon_fence_is_earlier(fence, best[fence->ring])) { + best[fence->ring] = fence; + choices[fence->ring == ring ? 0 : 1] = i; + } + } + + for (i = 0; i < 2; ++i) { + if (choices[i]) { + vm->id = choices[i]; + trace_radeon_vm_grab_id(vm->id, ring); + return rdev->vm_manager.active[choices[i]]; + } + } + + /* should never happen */ + BUG(); + return NULL; +} + +/** + * radeon_vm_flush - hardware flush the vm + * + * @rdev: radeon_device pointer + * @vm: vm we want to flush + * @ring: ring to use for flush + * + * Flush the vm (cayman+). + * + * Global and local mutex must be locked! + */ +void radeon_vm_flush(struct radeon_device *rdev, + struct radeon_vm *vm, + int ring) +{ + uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); + + /* if we can't remember our last VM flush then flush now! */ + /* XXX figure out why we have to flush all the time */ + if (!vm->last_flush || true || pd_addr != vm->pd_gpu_addr) { + vm->pd_gpu_addr = pd_addr; + radeon_ring_vm_flush(rdev, ring, vm); + } +} + +/** + * radeon_vm_fence - remember fence for vm + * + * @rdev: radeon_device pointer + * @vm: vm we want to fence + * @fence: fence to remember + * + * Fence the vm (cayman+). + * Set the fence used to protect page table and id. + * + * Global and local mutex must be locked! + */ +void radeon_vm_fence(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_fence *fence) +{ + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(fence); + + radeon_fence_unref(&rdev->vm_manager.active[vm->id]); + rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); + + radeon_fence_unref(&vm->last_id_use); + vm->last_id_use = radeon_fence_ref(fence); + + /* we just flushed the VM, remember that */ + if (!vm->last_flush) + vm->last_flush = radeon_fence_ref(fence); +} + +/** + * radeon_vm_bo_find - find the bo_va for a specific vm & bo + * + * @vm: requested vm + * @bo: requested buffer object + * + * Find @bo inside the requested vm (cayman+). + * Search inside the @bos vm list for the requested vm + * Returns the found bo_va or NULL if none is found + * + * Object has to be reserved! + */ +struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + list_for_each_entry(bo_va, &bo->va, bo_list) { + if (bo_va->vm == vm) { + return bo_va; + } + } + return NULL; +} + +/** + * radeon_vm_bo_add - add a bo to a specific vm + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * + * Add @bo into the requested vm (cayman+). + * Add @bo to the list of bos associated with the vm + * Returns newly added bo_va or NULL for failure + * + * Object has to be reserved! + */ +struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + if (bo_va == NULL) { + return NULL; + } + bo_va->vm = vm; + bo_va->bo = bo; + bo_va->soffset = 0; + bo_va->eoffset = 0; + bo_va->flags = 0; + bo_va->valid = false; + bo_va->ref_count = 1; + INIT_LIST_HEAD(&bo_va->bo_list); + INIT_LIST_HEAD(&bo_va->vm_list); + + mutex_lock(&vm->mutex); + list_add(&bo_va->vm_list, &vm->va); + list_add_tail(&bo_va->bo_list, &bo->va); + mutex_unlock(&vm->mutex); + + return bo_va; +} + +/** + * radeon_vm_clear_bo - initially clear the page dir/table + * + * @rdev: radeon_device pointer + * @bo: bo to clear + */ +static int radeon_vm_clear_bo(struct radeon_device *rdev, + struct radeon_bo *bo) +{ + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + struct list_head head; + struct radeon_ib ib; + unsigned entries; + uint64_t addr; + int r; + + memset(&tv, 0, sizeof(tv)); + tv.bo = &bo->tbo; + + INIT_LIST_HEAD(&head); + list_add(&tv.head, &head); + + r = ttm_eu_reserve_buffers(&ticket, &head); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) + goto error; + + addr = radeon_bo_gpu_offset(bo); + entries = radeon_bo_size(bo) / 8; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, + NULL, entries * 2 + 64); + if (r) + goto error; + + ib.length_dw = 0; + + radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0); + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) + goto error; + + ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); + radeon_ib_free(rdev, &ib); + + return 0; + +error: + ttm_eu_backoff_reservation(&ticket, &head); + return r; +} + +/** + * radeon_vm_bo_set_addr - set bos virtual address inside a vm + * + * @rdev: radeon_device pointer + * @bo_va: bo_va to store the address + * @soffset: requested offset of the buffer in the VM address space + * @flags: attributes of pages (read/write/valid/etc.) + * + * Set offset of @bo_va (cayman+). + * Validate and set the offset requested within the vm address space. + * Returns 0 for success, error for failure. + * + * Object has to be reserved! + */ +int radeon_vm_bo_set_addr(struct radeon_device *rdev, + struct radeon_bo_va *bo_va, + uint64_t soffset, + uint32_t flags) +{ + uint64_t size = radeon_bo_size(bo_va->bo); + uint64_t eoffset, last_offset = 0; + struct radeon_vm *vm = bo_va->vm; + struct radeon_bo_va *tmp; + struct list_head *head; + unsigned last_pfn, pt_idx; + int r; + + if (soffset) { + /* make sure object fit at this offset */ + eoffset = soffset + size; + if (soffset >= eoffset) { + return -EINVAL; + } + + last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; + if (last_pfn > rdev->vm_manager.max_pfn) { + dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", + last_pfn, rdev->vm_manager.max_pfn); + return -EINVAL; + } + + } else { + eoffset = last_pfn = 0; + } + + mutex_lock(&vm->mutex); + head = &vm->va; + last_offset = 0; + list_for_each_entry(tmp, &vm->va, vm_list) { + if (bo_va == tmp) { + /* skip over currently modified bo */ + continue; + } + + if (soffset >= last_offset && eoffset <= tmp->soffset) { + /* bo can be added before this one */ + break; + } + if (eoffset > tmp->soffset && soffset < tmp->eoffset) { + /* bo and tmp overlap, invalid offset */ + dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", + bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, + (unsigned)tmp->soffset, (unsigned)tmp->eoffset); + mutex_unlock(&vm->mutex); + return -EINVAL; + } + last_offset = tmp->eoffset; + head = &tmp->vm_list; + } + + bo_va->soffset = soffset; + bo_va->eoffset = eoffset; + bo_va->flags = flags; + bo_va->valid = false; + list_move(&bo_va->vm_list, head); + + soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + + if (eoffset > vm->max_pde_used) + vm->max_pde_used = eoffset; + + radeon_bo_unreserve(bo_va->bo); + + /* walk over the address space and allocate the page tables */ + for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { + struct radeon_bo *pt; + + if (vm->page_tables[pt_idx].bo) + continue; + + /* drop mutex to allocate and clear page table */ + mutex_unlock(&vm->mutex); + + r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false, + RADEON_GEM_DOMAIN_VRAM, NULL, &pt); + if (r) + return r; + + r = radeon_vm_clear_bo(rdev, pt); + if (r) { + radeon_bo_unref(&pt); + radeon_bo_reserve(bo_va->bo, false); + return r; + } + + /* aquire mutex again */ + mutex_lock(&vm->mutex); + if (vm->page_tables[pt_idx].bo) { + /* someone else allocated the pt in the meantime */ + mutex_unlock(&vm->mutex); + radeon_bo_unref(&pt); + mutex_lock(&vm->mutex); + continue; + } + + vm->page_tables[pt_idx].addr = 0; + vm->page_tables[pt_idx].bo = pt; + } + + mutex_unlock(&vm->mutex); + return radeon_bo_reserve(bo_va->bo, false); +} + +/** + * radeon_vm_map_gart - get the physical address of a gart page + * + * @rdev: radeon_device pointer + * @addr: the unmapped addr + * + * Look up the physical address of the page that the pte resolves + * to (cayman+). + * Returns the physical address of the page. + */ +uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) +{ + uint64_t result; + + /* page table offset */ + result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; + + /* in case cpu page size != gpu page size*/ + result |= addr & (~PAGE_MASK); + + return result; +} + +/** + * radeon_vm_page_flags - translate page flags to what the hw uses + * + * @flags: flags comming from userspace + * + * Translate the flags the userspace ABI uses to hw flags. + */ +static uint32_t radeon_vm_page_flags(uint32_t flags) +{ + uint32_t hw_flags = 0; + hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; + hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; + hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; + if (flags & RADEON_VM_PAGE_SYSTEM) { + hw_flags |= R600_PTE_SYSTEM; + hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; + } + return hw_flags; +} + +/** + * radeon_vm_update_pdes - make sure that page directory is valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * + * Allocates new page tables if necessary + * and updates the page directory (cayman+). + * Returns 0 for success, error for failure. + * + * Global and local mutex must be locked! + */ +int radeon_vm_update_page_directory(struct radeon_device *rdev, + struct radeon_vm *vm) +{ + static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; + + uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); + uint64_t last_pde = ~0, last_pt = ~0; + unsigned count = 0, pt_idx, ndw; + struct radeon_ib ib; + int r; + + /* padding, etc. */ + ndw = 64; + + /* assume the worst case */ + ndw += vm->max_pde_used * 12; + + /* update too big for an IB */ + if (ndw > 0xfffff) + return -ENOMEM; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); + if (r) + return r; + ib.length_dw = 0; + + /* walk over the address space and update the page directory */ + for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { + struct radeon_bo *bo = vm->page_tables[pt_idx].bo; + uint64_t pde, pt; + + if (bo == NULL) + continue; + + pt = radeon_bo_gpu_offset(bo); + if (vm->page_tables[pt_idx].addr == pt) + continue; + vm->page_tables[pt_idx].addr = pt; + + pde = pd_addr + pt_idx * 8; + if (((last_pde + 8 * count) != pde) || + ((last_pt + incr * count) != pt)) { + + if (count) { + radeon_asic_vm_set_page(rdev, &ib, last_pde, + last_pt, count, incr, + R600_PTE_VALID); + } + + count = 1; + last_pde = pde; + last_pt = pt; + } else { + ++count; + } + } + + if (count) + radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count, + incr, R600_PTE_VALID); + + if (ib.length_dw != 0) { + radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + return r; + } + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_fence_unref(&vm->last_flush); + } + radeon_ib_free(rdev, &ib); + + return 0; +} + +/** + * radeon_vm_update_ptes - make sure that page tables are valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to + * @flags: mapping flags + * + * Update the page tables in the range @start - @end (cayman+). + * + * Global and local mutex must be locked! + */ +static void radeon_vm_update_ptes(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_ib *ib, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) +{ + static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; + + uint64_t last_pte = ~0, last_dst = ~0; + unsigned count = 0; + uint64_t addr; + + start = start / RADEON_GPU_PAGE_SIZE; + end = end / RADEON_GPU_PAGE_SIZE; + + /* walk over the address space and update the page tables */ + for (addr = start; addr < end; ) { + uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; + unsigned nptes; + uint64_t pte; + + if ((addr & ~mask) == (end & ~mask)) + nptes = end - addr; + else + nptes = RADEON_VM_PTE_COUNT - (addr & mask); + + pte = radeon_bo_gpu_offset(vm->page_tables[pt_idx].bo); + pte += (addr & mask) * 8; + + if ((last_pte + 8 * count) != pte) { + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pte, + last_dst, count, + RADEON_GPU_PAGE_SIZE, + flags); + } + + count = nptes; + last_pte = pte; + last_dst = dst; + } else { + count += nptes; + } + + addr += nptes; + dst += nptes * RADEON_GPU_PAGE_SIZE; + } + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pte, + last_dst, count, + RADEON_GPU_PAGE_SIZE, flags); + } +} + +/** + * radeon_vm_bo_update - map a bo into the vm page table + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * @mem: ttm mem + * + * Fill in the page table entries for @bo (cayman+). + * Returns 0 for success, -EINVAL for failure. + * + * Object have to be reserved and mutex must be locked! + */ +int radeon_vm_bo_update(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo, + struct ttm_mem_reg *mem) +{ + struct radeon_ib ib; + struct radeon_bo_va *bo_va; + unsigned nptes, ndw; + uint64_t addr; + int r; + + bo_va = radeon_vm_bo_find(vm, bo); + if (bo_va == NULL) { + dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); + return -EINVAL; + } + + if (!bo_va->soffset) { + dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", + bo, vm); + return -EINVAL; + } + + if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) + return 0; + + bo_va->flags &= ~RADEON_VM_PAGE_VALID; + bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; + if (mem) { + addr = mem->start << PAGE_SHIFT; + if (mem->mem_type != TTM_PL_SYSTEM) { + bo_va->flags |= RADEON_VM_PAGE_VALID; + bo_va->valid = true; + } + if (mem->mem_type == TTM_PL_TT) { + bo_va->flags |= RADEON_VM_PAGE_SYSTEM; + } else { + addr += rdev->vm_manager.vram_base_offset; + } + } else { + addr = 0; + bo_va->valid = false; + } + + trace_radeon_vm_bo_update(bo_va); + + nptes = radeon_bo_ngpu_pages(bo); + + /* padding, etc. */ + ndw = 64; + + if (RADEON_VM_BLOCK_SIZE > 11) + /* reserve space for one header for every 2k dwords */ + ndw += (nptes >> 11) * 4; + else + /* reserve space for one header for + every (1 << BLOCK_SIZE) entries */ + ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; + + /* reserve space for pte addresses */ + ndw += nptes * 2; + + /* update too big for an IB */ + if (ndw > 0xfffff) + return -ENOMEM; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); + if (r) + return r; + ib.length_dw = 0; + + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, + addr, radeon_vm_page_flags(bo_va->flags)); + + radeon_semaphore_sync_to(ib.semaphore, vm->fence); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + return r; + } + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_ib_free(rdev, &ib); + radeon_fence_unref(&vm->last_flush); + + return 0; +} + +/** + * radeon_vm_bo_rmv - remove a bo to a specific vm + * + * @rdev: radeon_device pointer + * @bo_va: requested bo_va + * + * Remove @bo_va->bo from the requested vm (cayman+). + * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and + * remove the ptes for @bo_va in the page table. + * Returns 0 for success. + * + * Object have to be reserved! + */ +int radeon_vm_bo_rmv(struct radeon_device *rdev, + struct radeon_bo_va *bo_va) +{ + int r = 0; + + mutex_lock(&bo_va->vm->mutex); + if (bo_va->soffset) + r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); + + list_del(&bo_va->vm_list); + mutex_unlock(&bo_va->vm->mutex); + list_del(&bo_va->bo_list); + + kfree(bo_va); + return r; +} + +/** + * radeon_vm_bo_invalidate - mark the bo as invalid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * + * Mark @bo as invalid (cayman+). + */ +void radeon_vm_bo_invalidate(struct radeon_device *rdev, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + list_for_each_entry(bo_va, &bo->va, bo_list) { + bo_va->valid = false; + } +} + +/** + * radeon_vm_init - initialize a vm instance + * + * @rdev: radeon_device pointer + * @vm: requested vm + * + * Init @vm fields (cayman+). + */ +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +{ + unsigned pd_size, pd_entries, pts_size; + int r; + + vm->id = 0; + vm->fence = NULL; + vm->last_flush = NULL; + vm->last_id_use = NULL; + mutex_init(&vm->mutex); + INIT_LIST_HEAD(&vm->va); + + pd_size = radeon_vm_directory_size(rdev); + pd_entries = radeon_vm_num_pdes(rdev); + + /* allocate page table array */ + pts_size = pd_entries * sizeof(struct radeon_vm_pt); + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + if (vm->page_tables == NULL) { + DRM_ERROR("Cannot allocate memory for page table array\n"); + return -ENOMEM; + } + + r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false, + RADEON_GEM_DOMAIN_VRAM, NULL, + &vm->page_directory); + if (r) + return r; + + r = radeon_vm_clear_bo(rdev, vm->page_directory); + if (r) { + radeon_bo_unref(&vm->page_directory); + vm->page_directory = NULL; + return r; + } + + return 0; +} + +/** + * radeon_vm_fini - tear down a vm instance + * + * @rdev: radeon_device pointer + * @vm: requested vm + * + * Tear down @vm (cayman+). + * Unbind the VM and remove all bos from the vm bo list + */ +void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_bo_va *bo_va, *tmp; + int i, r; + + if (!list_empty(&vm->va)) { + dev_err(rdev->dev, "still active bo inside vm\n"); + } + list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { + list_del_init(&bo_va->vm_list); + r = radeon_bo_reserve(bo_va->bo, false); + if (!r) { + list_del_init(&bo_va->bo_list); + radeon_bo_unreserve(bo_va->bo); + kfree(bo_va); + } + } + + + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) + radeon_bo_unref(&vm->page_tables[i].bo); + kfree(vm->page_tables); + + radeon_bo_unref(&vm->page_directory); + + radeon_fence_unref(&vm->fence); + radeon_fence_unref(&vm->last_flush); + radeon_fence_unref(&vm->last_id_use); + + mutex_destroy(&vm->mutex); +} diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600 index 20bfbda7b3f..ec0c6829c1d 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/r600 +++ b/drivers/gpu/drm/radeon/reg_srcs/r600 @@ -18,6 +18,7 @@ r600 0x9400 0x00028A3C VGT_GROUP_VECT_1_FMT_CNTL 0x00028A40 VGT_GS_MODE 0x00028A6C VGT_GS_OUT_PRIM_TYPE +0x00028B38 VGT_GS_MAX_VERT_OUT 0x000088C8 VGT_GS_PER_ES 0x000088E8 VGT_GS_PER_VS 0x000088D4 VGT_GS_VERTEX_REUSE diff --git a/drivers/gpu/drm/radeon/rs780_dpm.c b/drivers/gpu/drm/radeon/rs780_dpm.c index 8512085b0ae..02f7710de47 100644 --- a/drivers/gpu/drm/radeon/rs780_dpm.c +++ b/drivers/gpu/drm/radeon/rs780_dpm.c @@ -807,9 +807,6 @@ static int rs780_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -859,6 +856,10 @@ int rs780_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rs780_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c index bebf31c4d84..e7045b08571 100644 --- a/drivers/gpu/drm/radeon/rv6xx_dpm.c +++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c @@ -1891,9 +1891,6 @@ static int rv6xx_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -1943,6 +1940,10 @@ int rv6xx_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv6xx_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c index 80c595aba35..da041a43d82 100644 --- a/drivers/gpu/drm/radeon/rv770_dpm.c +++ b/drivers/gpu/drm/radeon/rv770_dpm.c @@ -2174,7 +2174,6 @@ static void rv7xx_parse_pplib_clock_info(struct radeon_device *rdev, struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev); struct rv7xx_ps *ps = rv770_get_ps(rps); u32 sclk, mclk; - u16 vddc; struct rv7xx_pl *pl; switch (index) { @@ -2214,8 +2213,8 @@ static void rv7xx_parse_pplib_clock_info(struct radeon_device *rdev, /* patch up vddc if necessary */ if (pl->vddc == 0xff01) { - if (radeon_atom_get_max_vddc(rdev, 0, 0, &vddc) == 0) - pl->vddc = vddc; + if (pi->max_vddc) + pl->vddc = pi->max_vddc; } if (rps->class & ATOM_PPLIB_CLASSIFICATION_ACPI) { @@ -2282,9 +2281,6 @@ int rv7xx_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -2362,6 +2358,10 @@ int rv770_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv7xx_parse_power_table(rdev); if (ret) return ret; @@ -2527,14 +2527,7 @@ u32 rv770_dpm_get_mclk(struct radeon_device *rdev, bool low) bool rv770_dpm_vblank_too_short(struct radeon_device *rdev) { u32 vblank_time = r600_dpm_get_vblank_time(rdev); - u32 switch_limit = 300; - - /* quirks */ - /* ASUS K70AF */ - if ((rdev->pdev->device == 0x9553) && - (rdev->pdev->subsystem_vendor == 0x1043) && - (rdev->pdev->subsystem_device == 0x1c42)) - switch_limit = 200; + u32 switch_limit = 200; /* 300 */ /* RV770 */ /* mclk switching doesn't seem to work reliably on desktop RV770s */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 09ec4f6c53b..8008cb8d532 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3434,8 +3434,6 @@ static int si_cp_resume(struct radeon_device *rdev) WREG32(CP_RB0_BASE, ring->gpu_addr >> 8); - ring->rptr = RREG32(CP_RB0_RPTR); - /* ring1 - compute only */ /* Set ring buffer size */ ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; @@ -3460,8 +3458,6 @@ static int si_cp_resume(struct radeon_device *rdev) WREG32(CP_RB1_BASE, ring->gpu_addr >> 8); - ring->rptr = RREG32(CP_RB1_RPTR); - /* ring2 - compute only */ /* Set ring buffer size */ ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; @@ -3486,8 +3482,6 @@ static int si_cp_resume(struct radeon_device *rdev) WREG32(CP_RB2_BASE, ring->gpu_addr >> 8); - ring->rptr = RREG32(CP_RB2_RPTR); - /* start the rings */ si_cp_start(rdev); rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; @@ -3872,11 +3866,9 @@ bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) if (!(reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } @@ -6338,6 +6330,10 @@ restart_ih: break; } break; + case 124: /* UVD */ + DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); + radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); + break; case 146: case 147: addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR); diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index 59be2cfcbb4..cf0fdad8c27 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -49,11 +49,9 @@ bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) mask = RADEON_RESET_DMA1; if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 0471501338f..9a3567bedaa 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2395,7 +2395,7 @@ static int si_populate_sq_ramping_values(struct radeon_device *rdev, if (SISLANDS_DPM2_SQ_RAMP_STI_SIZE > (STI_SIZE_MASK >> STI_SIZE_SHIFT)) enable_sq_ramping = false; - if (SISLANDS_DPM2_SQ_RAMP_LTI_RATIO <= (LTI_RATIO_MASK >> LTI_RATIO_SHIFT)) + if (SISLANDS_DPM2_SQ_RAMP_LTI_RATIO > (LTI_RATIO_MASK >> LTI_RATIO_SHIFT)) enable_sq_ramping = false; for (i = 0; i < state->performance_level_count; i++) { @@ -6271,9 +6271,6 @@ static int si_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -6350,6 +6347,10 @@ int si_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = si_parse_power_table(rdev); if (ret) return ret; @@ -6472,7 +6473,8 @@ void si_dpm_fini(struct radeon_device *rdev) void si_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, struct seq_file *m) { - struct radeon_ps *rps = rdev->pm.dpm.current_ps; + struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev); + struct radeon_ps *rps = &eg_pi->current_rps; struct ni_ps *ps = ni_get_ps(rps); struct rv7xx_pl *pl; u32 current_index = diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 9239a6d2912..683532f8493 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1798,4 +1798,51 @@ #define DMA_PACKET_CONSTANT_FILL 0xd #define DMA_PACKET_NOP 0xf +#define VCE_STATUS 0x20004 +#define VCE_VCPU_CNTL 0x20014 +#define VCE_CLK_EN (1 << 0) +#define VCE_VCPU_CACHE_OFFSET0 0x20024 +#define VCE_VCPU_CACHE_SIZE0 0x20028 +#define VCE_VCPU_CACHE_OFFSET1 0x2002c +#define VCE_VCPU_CACHE_SIZE1 0x20030 +#define VCE_VCPU_CACHE_OFFSET2 0x20034 +#define VCE_VCPU_CACHE_SIZE2 0x20038 +#define VCE_SOFT_RESET 0x20120 +#define VCE_ECPU_SOFT_RESET (1 << 0) +#define VCE_FME_SOFT_RESET (1 << 2) +#define VCE_RB_BASE_LO2 0x2016c +#define VCE_RB_BASE_HI2 0x20170 +#define VCE_RB_SIZE2 0x20174 +#define VCE_RB_RPTR2 0x20178 +#define VCE_RB_WPTR2 0x2017c +#define VCE_RB_BASE_LO 0x20180 +#define VCE_RB_BASE_HI 0x20184 +#define VCE_RB_SIZE 0x20188 +#define VCE_RB_RPTR 0x2018c +#define VCE_RB_WPTR 0x20190 +#define VCE_CLOCK_GATING_A 0x202f8 +#define VCE_CLOCK_GATING_B 0x202fc +#define VCE_UENC_CLOCK_GATING 0x205bc +#define VCE_UENC_REG_CLOCK_GATING 0x205c0 +#define VCE_FW_REG_STATUS 0x20e10 +# define VCE_FW_REG_STATUS_BUSY (1 << 0) +# define VCE_FW_REG_STATUS_PASS (1 << 3) +# define VCE_FW_REG_STATUS_DONE (1 << 11) +#define VCE_LMI_FW_START_KEYSEL 0x20e18 +#define VCE_LMI_FW_PERIODIC_CTRL 0x20e20 +#define VCE_LMI_CTRL2 0x20e74 +#define VCE_LMI_CTRL 0x20e98 +#define VCE_LMI_VM_CTRL 0x20ea0 +#define VCE_LMI_SWAP_CNTL 0x20eb4 +#define VCE_LMI_SWAP_CNTL1 0x20eb8 +#define VCE_LMI_CACHE_CTRL 0x20ef4 + +#define VCE_CMD_NO_OP 0x00000000 +#define VCE_CMD_END 0x00000001 +#define VCE_CMD_IB 0x00000002 +#define VCE_CMD_FENCE 0x00000003 +#define VCE_CMD_TRAP 0x00000004 +#define VCE_CMD_IB_AUTO 0x00000005 +#define VCE_CMD_SEMAPHORE 0x00000006 + #endif diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c index f121efe12dc..3f0e8d7b8db 100644 --- a/drivers/gpu/drm/radeon/sumo_dpm.c +++ b/drivers/gpu/drm/radeon/sumo_dpm.c @@ -1484,9 +1484,6 @@ static int sumo_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -1772,6 +1769,10 @@ int sumo_dpm_init(struct radeon_device *rdev) sumo_construct_boot_and_acpi_state(rdev); + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = sumo_parse_power_table(rdev); if (ret) return ret; @@ -1807,7 +1808,7 @@ void sumo_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev struct seq_file *m) { struct sumo_power_info *pi = sumo_get_pi(rdev); - struct radeon_ps *rps = rdev->pm.dpm.current_ps; + struct radeon_ps *rps = &pi->current_rps; struct sumo_ps *ps = sumo_get_ps(rps); struct sumo_pl *pl; u32 current_index = diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index 2d447192d6f..2a2822c0332 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -1694,9 +1694,6 @@ static int trinity_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -1895,6 +1892,10 @@ int trinity_dpm_init(struct radeon_device *rdev) trinity_construct_boot_state(rdev); + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = trinity_parse_power_table(rdev); if (ret) return ret; @@ -1926,7 +1927,8 @@ void trinity_dpm_print_power_state(struct radeon_device *rdev, void trinity_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, struct seq_file *m) { - struct radeon_ps *rps = rdev->pm.dpm.current_ps; + struct trinity_power_info *pi = trinity_get_pi(rdev); + struct radeon_ps *rps = &pi->current_rps; struct trinity_ps *ps = trinity_get_ps(rps); struct trinity_pl *pl; u32 current_index = diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index d4a68af1a27..0a243f0e5d6 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -262,7 +262,7 @@ int uvd_v1_0_start(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(UVD_RBC_RB_RPTR, 0x0); - ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR); + ring->wptr = RREG32(UVD_RBC_RB_RPTR); WREG32(UVD_RBC_RB_WPTR, ring->wptr); /* set the ring address */ diff --git a/drivers/gpu/drm/radeon/uvd_v2_2.c b/drivers/gpu/drm/radeon/uvd_v2_2.c index 824550db3fe..d1771004cb5 100644 --- a/drivers/gpu/drm/radeon/uvd_v2_2.c +++ b/drivers/gpu/drm/radeon/uvd_v2_2.c @@ -57,7 +57,6 @@ void uvd_v2_2_fence_emit(struct radeon_device *rdev, radeon_ring_write(ring, 0); radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); radeon_ring_write(ring, 2); - return; } /** diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c new file mode 100644 index 00000000000..b44d9c842f7 --- /dev/null +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -0,0 +1,187 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Christian König <christian.koenig@amd.com> + */ + +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "radeon.h" +#include "radeon_asic.h" +#include "sid.h" + +/** + * vce_v1_0_get_rptr - get read pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Returns the current hardware read pointer + */ +uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) + return RREG32(VCE_RB_RPTR); + else + return RREG32(VCE_RB_RPTR2); +} + +/** + * vce_v1_0_get_wptr - get write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Returns the current hardware write pointer + */ +uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) + return RREG32(VCE_RB_WPTR); + else + return RREG32(VCE_RB_WPTR2); +} + +/** + * vce_v1_0_set_wptr - set write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Commits the write pointer to the hardware + */ +void vce_v1_0_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) + WREG32(VCE_RB_WPTR, ring->wptr); + else + WREG32(VCE_RB_WPTR2, ring->wptr); +} + +/** + * vce_v1_0_start - start VCE block + * + * @rdev: radeon_device pointer + * + * Setup and start the VCE block + */ +int vce_v1_0_start(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + int i, j, r; + + /* set BUSY flag */ + WREG32_P(VCE_STATUS, 1, ~1); + + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + WREG32(VCE_RB_RPTR, ring->wptr); + WREG32(VCE_RB_WPTR, ring->wptr); + WREG32(VCE_RB_BASE_LO, ring->gpu_addr); + WREG32(VCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32(VCE_RB_SIZE, ring->ring_size / 4); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + WREG32(VCE_RB_RPTR2, ring->wptr); + WREG32(VCE_RB_WPTR2, ring->wptr); + WREG32(VCE_RB_BASE_LO2, ring->gpu_addr); + WREG32(VCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32(VCE_RB_SIZE2, ring->ring_size / 4); + + WREG32_P(VCE_VCPU_CNTL, VCE_CLK_EN, ~VCE_CLK_EN); + + WREG32_P(VCE_SOFT_RESET, + VCE_ECPU_SOFT_RESET | + VCE_FME_SOFT_RESET, ~( + VCE_ECPU_SOFT_RESET | + VCE_FME_SOFT_RESET)); + + mdelay(100); + + WREG32_P(VCE_SOFT_RESET, 0, ~( + VCE_ECPU_SOFT_RESET | + VCE_FME_SOFT_RESET)); + + for (i = 0; i < 10; ++i) { + uint32_t status; + for (j = 0; j < 100; ++j) { + status = RREG32(VCE_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); + WREG32_P(VCE_SOFT_RESET, VCE_ECPU_SOFT_RESET, ~VCE_ECPU_SOFT_RESET); + mdelay(10); + WREG32_P(VCE_SOFT_RESET, 0, ~VCE_ECPU_SOFT_RESET); + mdelay(10); + r = -1; + } + + /* clear BUSY flag */ + WREG32_P(VCE_STATUS, 0, ~1); + + if (r) { + DRM_ERROR("VCE not responding, giving up!!!\n"); + return r; + } + + return 0; +} + +int vce_v1_0_init(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + int r; + + r = vce_v1_0_start(rdev); + if (r) + return r; + + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + ring->ready = true; + r = radeon_ring_test(rdev, TN_RING_TYPE_VCE1_INDEX, ring); + if (r) { + ring->ready = false; + return r; + } + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + ring->ready = true; + r = radeon_ring_test(rdev, TN_RING_TYPE_VCE2_INDEX, ring); + if (r) { + ring->ready = false; + return r; + } + + DRM_INFO("VCE initialized successfully.\n"); + + return 0; +} diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c new file mode 100644 index 00000000000..1ac7bb825a1 --- /dev/null +++ b/drivers/gpu/drm/radeon/vce_v2_0.c @@ -0,0 +1,181 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Christian König <christian.koenig@amd.com> + */ + +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "radeon.h" +#include "radeon_asic.h" +#include "cikd.h" + +static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated) +{ + u32 tmp; + + if (gated) { + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp |= 0xe70000; + WREG32(VCE_CLOCK_GATING_B, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp |= 0xff000000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3fc; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + + WREG32(VCE_CGTT_CLK_OVERRIDE, 0); + } else { + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp |= 0xe7; + tmp &= ~0xe70000; + WREG32(VCE_CLOCK_GATING_B, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp |= 0x1fe000; + tmp &= ~0xff000000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp |= 0x3fc; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + } +} + +static void vce_v2_0_set_dyn_cg(struct radeon_device *rdev, bool gated) +{ + u32 orig, tmp; + + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp &= ~0x00060006; + if (gated) { + tmp |= 0xe10000; + } else { + tmp |= 0xe1; + tmp &= ~0xe10000; + } + WREG32(VCE_CLOCK_GATING_B, tmp); + + orig = tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp &= ~0x1fe000; + tmp &= ~0xff000000; + if (tmp != orig) + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + orig = tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3fc; + if (tmp != orig) + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + + if (gated) + WREG32(VCE_CGTT_CLK_OVERRIDE, 0); +} + +static void vce_v2_0_disable_cg(struct radeon_device *rdev) +{ + WREG32(VCE_CGTT_CLK_OVERRIDE, 7); +} + +void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable) +{ + bool sw_cg = false; + + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_VCE_MGCG)) { + if (sw_cg) + vce_v2_0_set_sw_cg(rdev, true); + else + vce_v2_0_set_dyn_cg(rdev, true); + } else { + vce_v2_0_disable_cg(rdev); + + if (sw_cg) + vce_v2_0_set_sw_cg(rdev, false); + else + vce_v2_0_set_dyn_cg(rdev, false); + } +} + +static void vce_v2_0_init_cg(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = RREG32(VCE_CLOCK_GATING_A); + tmp &= ~(CGC_CLK_GATE_DLY_TIMER_MASK | CGC_CLK_GATER_OFF_DLY_TIMER_MASK); + tmp |= (CGC_CLK_GATE_DLY_TIMER(0) | CGC_CLK_GATER_OFF_DLY_TIMER(4)); + tmp |= CGC_UENC_WAIT_AWAKE; + WREG32(VCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp &= ~(CLOCK_ON_DELAY_MASK | CLOCK_OFF_DELAY_MASK); + tmp |= (CLOCK_ON_DELAY(0) | CLOCK_OFF_DELAY(4)); + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp |= 0x10; + tmp &= ~0x100000; + WREG32(VCE_CLOCK_GATING_B, tmp); +} + +int vce_v2_0_resume(struct radeon_device *rdev) +{ + uint64_t addr = rdev->vce.gpu_addr; + uint32_t size; + + WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16)); + WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); + WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F); + WREG32(VCE_CLOCK_GATING_B, 0xf7); + + WREG32(VCE_LMI_CTRL, 0x00398000); + WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1); + WREG32(VCE_LMI_SWAP_CNTL, 0); + WREG32(VCE_LMI_SWAP_CNTL1, 0); + WREG32(VCE_LMI_VM_CTRL, 0); + + size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size); + WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE0, size); + + addr += size; + size = RADEON_VCE_STACK_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE1, size); + + addr += size; + size = RADEON_VCE_HEAP_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE2, size); + + WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100); + + WREG32_P(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, + ~VCE_SYS_INT_TRAP_INTERRUPT_EN); + + vce_v2_0_init_cg(rdev); + + return 0; +} diff --git a/drivers/gpu/drm/tegra/bus.c b/drivers/gpu/drm/tegra/bus.c index e38e5967d77..71cef5c13dc 100644 --- a/drivers/gpu/drm/tegra/bus.c +++ b/drivers/gpu/drm/tegra/bus.c @@ -63,7 +63,7 @@ int drm_host1x_init(struct drm_driver *driver, struct host1x_device *device) return 0; err_free: - drm_dev_free(drm); + drm_dev_unref(drm); return ret; } diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c index 3302f99e749..764be36397f 100644 --- a/drivers/gpu/drm/ttm/ttm_agp_backend.c +++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c @@ -126,6 +126,7 @@ struct ttm_tt *ttm_agp_tt_create(struct ttm_bo_device *bdev, agp_be->ttm.func = &ttm_agp_func; if (ttm_tt_init(&agp_be->ttm, bdev, size, page_flags, dummy_read_page)) { + kfree(agp_be); return NULL; } diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c index 37079859afc..53b51c4e671 100644 --- a/drivers/gpu/drm/ttm/ttm_object.c +++ b/drivers/gpu/drm/ttm/ttm_object.c @@ -292,7 +292,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, if (ret == 0) { ref = drm_hash_entry(hash, struct ttm_ref_object, hash); - if (!kref_get_unless_zero(&ref->kref)) { + if (kref_get_unless_zero(&ref->kref)) { rcu_read_unlock(); break; } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 9af99084b34..75f31909004 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -380,6 +380,9 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm) pgoff_t i; struct page **page = ttm->pages; + if (ttm->page_flags & TTM_PAGE_FLAG_SG) + return; + for (i = 0; i < ttm->num_pages; ++i) { (*page)->mapping = NULL; (*page++)->index = 0; diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index dbadd49e4c4..377176372da 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -421,7 +421,7 @@ static int udl_user_framebuffer_dirty(struct drm_framebuffer *fb, clips[i].x2 - clips[i].x1, clips[i].y2 - clips[i].y1); if (ret) - goto unlock; + break; } if (ufb->obj->base.import_attach) { diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index 8d67b943ac0..be4fcd0f0e0 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -60,7 +60,7 @@ int udl_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args) { - args->pitch = args->width * ((args->bpp + 1) / 8); + args->pitch = args->width * DIV_ROUND_UP(args->bpp, 8); args->size = args->pitch * args->height; return udl_gem_create(file, dev, args->size, &args->handle); diff --git a/drivers/gpu/drm/vmwgfx/svga3d_reg.h b/drivers/gpu/drm/vmwgfx/svga3d_reg.h index d95335cb90b..bb594c11605 100644 --- a/drivers/gpu/drm/vmwgfx/svga3d_reg.h +++ b/drivers/gpu/drm/vmwgfx/svga3d_reg.h @@ -1223,9 +1223,19 @@ typedef enum { #define SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL 1129 #define SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE 1130 - +#define SVGA_3D_CMD_GB_SCREEN_DMA 1131 +#define SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH 1132 +#define SVGA_3D_CMD_GB_MOB_FENCE 1133 +#define SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 1134 #define SVGA_3D_CMD_DEFINE_GB_MOB64 1135 #define SVGA_3D_CMD_REDEFINE_GB_MOB64 1136 +#define SVGA_3D_CMD_NOP_ERROR 1137 + +#define SVGA_3D_CMD_RESERVED1 1138 +#define SVGA_3D_CMD_RESERVED2 1139 +#define SVGA_3D_CMD_RESERVED3 1140 +#define SVGA_3D_CMD_RESERVED4 1141 +#define SVGA_3D_CMD_RESERVED5 1142 #define SVGA_3D_CMD_MAX 1142 #define SVGA_3D_CMD_FUTURE_MAX 3000 @@ -1973,8 +1983,7 @@ struct { uint32 sizeInBytes; uint32 validSizeInBytes; SVGAMobFormat ptDepth; -} -__attribute__((__packed__)) +} __packed SVGA3dCmdSetOTableBase; /* SVGA_3D_CMD_SET_OTABLE_BASE */ typedef @@ -1984,15 +1993,13 @@ struct { uint32 sizeInBytes; uint32 validSizeInBytes; SVGAMobFormat ptDepth; -} -__attribute__((__packed__)) +} __packed SVGA3dCmdSetOTableBase64; /* SVGA_3D_CMD_SET_OTABLE_BASE64 */ typedef struct { SVGAOTableType type; -} -__attribute__((__packed__)) +} __packed SVGA3dCmdReadbackOTable; /* SVGA_3D_CMD_READBACK_OTABLE */ /* @@ -2005,8 +2012,7 @@ struct SVGA3dCmdDefineGBMob { SVGAMobFormat ptDepth; PPN base; uint32 sizeInBytes; -} -__attribute__((__packed__)) +} __packed SVGA3dCmdDefineGBMob; /* SVGA_3D_CMD_DEFINE_GB_MOB */ @@ -2017,8 +2023,7 @@ SVGA3dCmdDefineGBMob; /* SVGA_3D_CMD_DEFINE_GB_MOB */ typedef struct SVGA3dCmdDestroyGBMob { SVGAMobId mobid; -} -__attribute__((__packed__)) +} __packed SVGA3dCmdDestroyGBMob; /* SVGA_3D_CMD_DESTROY_GB_MOB */ /* @@ -2031,8 +2036,7 @@ struct SVGA3dCmdRedefineGBMob { SVGAMobFormat ptDepth; PPN base; uint32 sizeInBytes; -} -__attribute__((__packed__)) +} __packed SVGA3dCmdRedefineGBMob; /* SVGA_3D_CMD_REDEFINE_GB_MOB */ /* @@ -2045,8 +2049,7 @@ struct SVGA3dCmdDefineGBMob64 { SVGAMobFormat ptDepth; PPN64 base; uint32 sizeInBytes; -} -__attribute__((__packed__)) +} __packed SVGA3dCmdDefineGBMob64; /* SVGA_3D_CMD_DEFINE_GB_MOB64 */ /* @@ -2059,8 +2062,7 @@ struct SVGA3dCmdRedefineGBMob64 { SVGAMobFormat ptDepth; PPN64 base; uint32 sizeInBytes; -} -__attribute__((__packed__)) +} __packed SVGA3dCmdRedefineGBMob64; /* SVGA_3D_CMD_REDEFINE_GB_MOB64 */ /* @@ -2070,8 +2072,7 @@ SVGA3dCmdRedefineGBMob64; /* SVGA_3D_CMD_REDEFINE_GB_MOB64 */ typedef struct SVGA3dCmdUpdateGBMobMapping { SVGAMobId mobid; -} -__attribute__((__packed__)) +} __packed SVGA3dCmdUpdateGBMobMapping; /* SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING */ /* @@ -2087,7 +2088,8 @@ struct SVGA3dCmdDefineGBSurface { uint32 multisampleCount; SVGA3dTextureFilter autogenFilter; SVGA3dSize size; -} SVGA3dCmdDefineGBSurface; /* SVGA_3D_CMD_DEFINE_GB_SURFACE */ +} __packed +SVGA3dCmdDefineGBSurface; /* SVGA_3D_CMD_DEFINE_GB_SURFACE */ /* * Destroy a guest-backed surface. @@ -2096,7 +2098,8 @@ struct SVGA3dCmdDefineGBSurface { typedef struct SVGA3dCmdDestroyGBSurface { uint32 sid; -} SVGA3dCmdDestroyGBSurface; /* SVGA_3D_CMD_DESTROY_GB_SURFACE */ +} __packed +SVGA3dCmdDestroyGBSurface; /* SVGA_3D_CMD_DESTROY_GB_SURFACE */ /* * Bind a guest-backed surface to an object. @@ -2106,7 +2109,8 @@ typedef struct SVGA3dCmdBindGBSurface { uint32 sid; SVGAMobId mobid; -} SVGA3dCmdBindGBSurface; /* SVGA_3D_CMD_BIND_GB_SURFACE */ +} __packed +SVGA3dCmdBindGBSurface; /* SVGA_3D_CMD_BIND_GB_SURFACE */ /* * Conditionally bind a mob to a guest backed surface if testMobid @@ -2123,7 +2127,7 @@ struct{ SVGAMobId testMobid; SVGAMobId mobid; uint32 flags; -} +} __packed SVGA3dCmdCondBindGBSurface; /* SVGA_3D_CMD_COND_BIND_GB_SURFACE */ /* @@ -2135,7 +2139,8 @@ typedef struct SVGA3dCmdUpdateGBImage { SVGA3dSurfaceImageId image; SVGA3dBox box; -} SVGA3dCmdUpdateGBImage; /* SVGA_3D_CMD_UPDATE_GB_IMAGE */ +} __packed +SVGA3dCmdUpdateGBImage; /* SVGA_3D_CMD_UPDATE_GB_IMAGE */ /* * Update an entire guest-backed surface. @@ -2145,7 +2150,8 @@ struct SVGA3dCmdUpdateGBImage { typedef struct SVGA3dCmdUpdateGBSurface { uint32 sid; -} SVGA3dCmdUpdateGBSurface; /* SVGA_3D_CMD_UPDATE_GB_SURFACE */ +} __packed +SVGA3dCmdUpdateGBSurface; /* SVGA_3D_CMD_UPDATE_GB_SURFACE */ /* * Readback an image in a guest-backed surface. @@ -2155,7 +2161,8 @@ struct SVGA3dCmdUpdateGBSurface { typedef struct SVGA3dCmdReadbackGBImage { SVGA3dSurfaceImageId image; -} SVGA3dCmdReadbackGBImage; /* SVGA_3D_CMD_READBACK_GB_IMAGE*/ +} __packed +SVGA3dCmdReadbackGBImage; /* SVGA_3D_CMD_READBACK_GB_IMAGE*/ /* * Readback an entire guest-backed surface. @@ -2165,7 +2172,8 @@ struct SVGA3dCmdReadbackGBImage { typedef struct SVGA3dCmdReadbackGBSurface { uint32 sid; -} SVGA3dCmdReadbackGBSurface; /* SVGA_3D_CMD_READBACK_GB_SURFACE */ +} __packed +SVGA3dCmdReadbackGBSurface; /* SVGA_3D_CMD_READBACK_GB_SURFACE */ /* * Readback a sub rect of an image in a guest-backed surface. After @@ -2179,7 +2187,7 @@ struct SVGA3dCmdReadbackGBImagePartial { SVGA3dSurfaceImageId image; SVGA3dBox box; uint32 invertBox; -} +} __packed SVGA3dCmdReadbackGBImagePartial; /* SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL */ /* @@ -2190,7 +2198,8 @@ SVGA3dCmdReadbackGBImagePartial; /* SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL */ typedef struct SVGA3dCmdInvalidateGBImage { SVGA3dSurfaceImageId image; -} SVGA3dCmdInvalidateGBImage; /* SVGA_3D_CMD_INVALIDATE_GB_IMAGE */ +} __packed +SVGA3dCmdInvalidateGBImage; /* SVGA_3D_CMD_INVALIDATE_GB_IMAGE */ /* * Invalidate an entire guest-backed surface. @@ -2200,7 +2209,8 @@ struct SVGA3dCmdInvalidateGBImage { typedef struct SVGA3dCmdInvalidateGBSurface { uint32 sid; -} SVGA3dCmdInvalidateGBSurface; /* SVGA_3D_CMD_INVALIDATE_GB_SURFACE */ +} __packed +SVGA3dCmdInvalidateGBSurface; /* SVGA_3D_CMD_INVALIDATE_GB_SURFACE */ /* * Invalidate a sub rect of an image in a guest-backed surface. After @@ -2214,7 +2224,7 @@ struct SVGA3dCmdInvalidateGBImagePartial { SVGA3dSurfaceImageId image; SVGA3dBox box; uint32 invertBox; -} +} __packed SVGA3dCmdInvalidateGBImagePartial; /* SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL */ /* @@ -2224,7 +2234,8 @@ SVGA3dCmdInvalidateGBImagePartial; /* SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL */ typedef struct SVGA3dCmdDefineGBContext { uint32 cid; -} SVGA3dCmdDefineGBContext; /* SVGA_3D_CMD_DEFINE_GB_CONTEXT */ +} __packed +SVGA3dCmdDefineGBContext; /* SVGA_3D_CMD_DEFINE_GB_CONTEXT */ /* * Destroy a guest-backed context. @@ -2233,7 +2244,8 @@ struct SVGA3dCmdDefineGBContext { typedef struct SVGA3dCmdDestroyGBContext { uint32 cid; -} SVGA3dCmdDestroyGBContext; /* SVGA_3D_CMD_DESTROY_GB_CONTEXT */ +} __packed +SVGA3dCmdDestroyGBContext; /* SVGA_3D_CMD_DESTROY_GB_CONTEXT */ /* * Bind a guest-backed context. @@ -2252,7 +2264,8 @@ struct SVGA3dCmdBindGBContext { uint32 cid; SVGAMobId mobid; uint32 validContents; -} SVGA3dCmdBindGBContext; /* SVGA_3D_CMD_BIND_GB_CONTEXT */ +} __packed +SVGA3dCmdBindGBContext; /* SVGA_3D_CMD_BIND_GB_CONTEXT */ /* * Readback a guest-backed context. @@ -2262,7 +2275,8 @@ struct SVGA3dCmdBindGBContext { typedef struct SVGA3dCmdReadbackGBContext { uint32 cid; -} SVGA3dCmdReadbackGBContext; /* SVGA_3D_CMD_READBACK_GB_CONTEXT */ +} __packed +SVGA3dCmdReadbackGBContext; /* SVGA_3D_CMD_READBACK_GB_CONTEXT */ /* * Invalidate a guest-backed context. @@ -2270,7 +2284,8 @@ struct SVGA3dCmdReadbackGBContext { typedef struct SVGA3dCmdInvalidateGBContext { uint32 cid; -} SVGA3dCmdInvalidateGBContext; /* SVGA_3D_CMD_INVALIDATE_GB_CONTEXT */ +} __packed +SVGA3dCmdInvalidateGBContext; /* SVGA_3D_CMD_INVALIDATE_GB_CONTEXT */ /* * Define a guest-backed shader. @@ -2281,7 +2296,8 @@ struct SVGA3dCmdDefineGBShader { uint32 shid; SVGA3dShaderType type; uint32 sizeInBytes; -} SVGA3dCmdDefineGBShader; /* SVGA_3D_CMD_DEFINE_GB_SHADER */ +} __packed +SVGA3dCmdDefineGBShader; /* SVGA_3D_CMD_DEFINE_GB_SHADER */ /* * Bind a guest-backed shader. @@ -2291,7 +2307,8 @@ typedef struct SVGA3dCmdBindGBShader { uint32 shid; SVGAMobId mobid; uint32 offsetInBytes; -} SVGA3dCmdBindGBShader; /* SVGA_3D_CMD_BIND_GB_SHADER */ +} __packed +SVGA3dCmdBindGBShader; /* SVGA_3D_CMD_BIND_GB_SHADER */ /* * Destroy a guest-backed shader. @@ -2299,7 +2316,8 @@ typedef struct SVGA3dCmdBindGBShader { typedef struct SVGA3dCmdDestroyGBShader { uint32 shid; -} SVGA3dCmdDestroyGBShader; /* SVGA_3D_CMD_DESTROY_GB_SHADER */ +} __packed +SVGA3dCmdDestroyGBShader; /* SVGA_3D_CMD_DESTROY_GB_SHADER */ typedef struct { @@ -2314,14 +2332,16 @@ struct { * Note that FLOAT and INT constants are 4-dwords in length, while * BOOL constants are 1-dword in length. */ -} SVGA3dCmdSetGBShaderConstInline; +} __packed +SVGA3dCmdSetGBShaderConstInline; /* SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE */ typedef struct { uint32 cid; SVGA3dQueryType type; -} SVGA3dCmdBeginGBQuery; /* SVGA_3D_CMD_BEGIN_GB_QUERY */ +} __packed +SVGA3dCmdBeginGBQuery; /* SVGA_3D_CMD_BEGIN_GB_QUERY */ typedef struct { @@ -2329,7 +2349,8 @@ struct { SVGA3dQueryType type; SVGAMobId mobid; uint32 offset; -} SVGA3dCmdEndGBQuery; /* SVGA_3D_CMD_END_GB_QUERY */ +} __packed +SVGA3dCmdEndGBQuery; /* SVGA_3D_CMD_END_GB_QUERY */ /* @@ -2346,21 +2367,22 @@ struct { SVGA3dQueryType type; SVGAMobId mobid; uint32 offset; -} SVGA3dCmdWaitForGBQuery; /* SVGA_3D_CMD_WAIT_FOR_GB_QUERY */ +} __packed +SVGA3dCmdWaitForGBQuery; /* SVGA_3D_CMD_WAIT_FOR_GB_QUERY */ typedef struct { SVGAMobId mobid; uint32 fbOffset; uint32 initalized; -} +} __packed SVGA3dCmdEnableGart; /* SVGA_3D_CMD_ENABLE_GART */ typedef struct { SVGAMobId mobid; uint32 gartOffset; -} +} __packed SVGA3dCmdMapMobIntoGart; /* SVGA_3D_CMD_MAP_MOB_INTO_GART */ @@ -2368,7 +2390,7 @@ typedef struct { uint32 gartOffset; uint32 numPages; -} +} __packed SVGA3dCmdUnmapGartRange; /* SVGA_3D_CMD_UNMAP_GART_RANGE */ @@ -2385,27 +2407,27 @@ struct { int32 xRoot; int32 yRoot; uint32 flags; -} +} __packed SVGA3dCmdDefineGBScreenTarget; /* SVGA_3D_CMD_DEFINE_GB_SCREENTARGET */ typedef struct { uint32 stid; -} +} __packed SVGA3dCmdDestroyGBScreenTarget; /* SVGA_3D_CMD_DESTROY_GB_SCREENTARGET */ typedef struct { uint32 stid; SVGA3dSurfaceImageId image; -} +} __packed SVGA3dCmdBindGBScreenTarget; /* SVGA_3D_CMD_BIND_GB_SCREENTARGET */ typedef struct { uint32 stid; SVGA3dBox box; -} +} __packed SVGA3dCmdUpdateGBScreenTarget; /* SVGA_3D_CMD_UPDATE_GB_SCREENTARGET */ /* @@ -2583,4 +2605,28 @@ typedef union { float f; } SVGA3dDevCapResult; +typedef enum { + SVGA3DCAPS_RECORD_UNKNOWN = 0, + SVGA3DCAPS_RECORD_DEVCAPS_MIN = 0x100, + SVGA3DCAPS_RECORD_DEVCAPS = 0x100, + SVGA3DCAPS_RECORD_DEVCAPS_MAX = 0x1ff, +} SVGA3dCapsRecordType; + +typedef +struct SVGA3dCapsRecordHeader { + uint32 length; + SVGA3dCapsRecordType type; +} +SVGA3dCapsRecordHeader; + +typedef +struct SVGA3dCapsRecord { + SVGA3dCapsRecordHeader header; + uint32 data[1]; +} +SVGA3dCapsRecord; + + +typedef uint32 SVGA3dCapPair[2]; + #endif /* _SVGA3D_REG_H_ */ diff --git a/drivers/gpu/drm/vmwgfx/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/svga3d_surfacedefs.h index 8369c3ba10f..ef338509614 100644 --- a/drivers/gpu/drm/vmwgfx/svga3d_surfacedefs.h +++ b/drivers/gpu/drm/vmwgfx/svga3d_surfacedefs.h @@ -38,8 +38,11 @@ #define DIV_ROUND_UP(x, y) (((x) + (y) - 1) / (y)) #define max_t(type, x, y) ((x) > (y) ? (x) : (y)) +#define min_t(type, x, y) ((x) < (y) ? (x) : (y)) #define surf_size_struct SVGA3dSize #define u32 uint32 +#define u64 uint64_t +#define U32_MAX ((u32)~0U) #endif /* __KERNEL__ */ @@ -704,8 +707,8 @@ static const struct svga3d_surface_desc svga3d_surface_descs[] = { static inline u32 clamped_umul32(u32 a, u32 b) { - uint64_t tmp = (uint64_t) a*b; - return (tmp > (uint64_t) ((u32) -1)) ? (u32) -1 : tmp; + u64 tmp = (u64) a*b; + return (tmp > (u64) U32_MAX) ? U32_MAX : tmp; } static inline const struct svga3d_surface_desc * @@ -834,7 +837,7 @@ svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format, bool cubemap) { const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format); - u32 total_size = 0; + u64 total_size = 0; u32 mip; for (mip = 0; mip < num_mip_levels; mip++) { @@ -847,7 +850,7 @@ svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format, if (cubemap) total_size *= SVGA3D_MAX_SURFACE_FACES; - return total_size; + return (u32) min_t(u64, total_size, (u64) U32_MAX); } diff --git a/drivers/gpu/drm/vmwgfx/svga_reg.h b/drivers/gpu/drm/vmwgfx/svga_reg.h index 71defa4d2d7..11323dd5196 100644 --- a/drivers/gpu/drm/vmwgfx/svga_reg.h +++ b/drivers/gpu/drm/vmwgfx/svga_reg.h @@ -169,10 +169,17 @@ enum { SVGA_REG_TRACES = 45, /* Enable trace-based updates even when FIFO is on */ SVGA_REG_GMRS_MAX_PAGES = 46, /* Maximum number of 4KB pages for all GMRs */ SVGA_REG_MEMORY_SIZE = 47, /* Total dedicated device memory excluding FIFO */ + SVGA_REG_COMMAND_LOW = 48, /* Lower 32 bits and submits commands */ + SVGA_REG_COMMAND_HIGH = 49, /* Upper 32 bits of command buffer PA */ SVGA_REG_MAX_PRIMARY_BOUNDING_BOX_MEM = 50, /* Max primary memory */ SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB = 51, /* Suggested limit on mob mem */ SVGA_REG_DEV_CAP = 52, /* Write dev cap index, read value */ - SVGA_REG_TOP = 53, /* Must be 1 more than the last register */ + SVGA_REG_CMD_PREPEND_LOW = 53, + SVGA_REG_CMD_PREPEND_HIGH = 54, + SVGA_REG_SCREENTARGET_MAX_WIDTH = 55, + SVGA_REG_SCREENTARGET_MAX_HEIGHT = 56, + SVGA_REG_MOB_MAX_SIZE = 57, + SVGA_REG_TOP = 58, /* Must be 1 more than the last register */ SVGA_PALETTE_BASE = 1024, /* Base of SVGA color map */ /* Next 768 (== 256*3) registers exist for colormap */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c index 82c41daebc0..1e80152674b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c @@ -37,7 +37,7 @@ struct vmw_user_context { -typedef int (*vmw_scrub_func)(struct vmw_ctx_bindinfo *); +typedef int (*vmw_scrub_func)(struct vmw_ctx_bindinfo *, bool); static void vmw_user_context_free(struct vmw_resource *res); static struct vmw_resource * @@ -50,9 +50,11 @@ static int vmw_gb_context_unbind(struct vmw_resource *res, bool readback, struct ttm_validate_buffer *val_buf); static int vmw_gb_context_destroy(struct vmw_resource *res); -static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi); -static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi); -static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi); +static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi, bool rebind); +static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi, + bool rebind); +static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi, bool rebind); +static void vmw_context_binding_state_scrub(struct vmw_ctx_binding_state *cbs); static void vmw_context_binding_state_kill(struct vmw_ctx_binding_state *cbs); static uint64_t vmw_user_context_size; @@ -111,10 +113,14 @@ static void vmw_hw_context_destroy(struct vmw_resource *res) if (res->func->destroy == vmw_gb_context_destroy) { mutex_lock(&dev_priv->cmdbuf_mutex); + mutex_lock(&dev_priv->binding_mutex); + (void) vmw_context_binding_state_kill + (&container_of(res, struct vmw_user_context, res)->cbs); (void) vmw_gb_context_destroy(res); if (dev_priv->pinned_bo != NULL && !dev_priv->query_cid_valid) __vmw_execbuf_release_pinned_bo(dev_priv, NULL); + mutex_unlock(&dev_priv->binding_mutex); mutex_unlock(&dev_priv->cmdbuf_mutex); return; } @@ -328,7 +334,7 @@ static int vmw_gb_context_unbind(struct vmw_resource *res, BUG_ON(bo->mem.mem_type != VMW_PL_MOB); mutex_lock(&dev_priv->binding_mutex); - vmw_context_binding_state_kill(&uctx->cbs); + vmw_context_binding_state_scrub(&uctx->cbs); submit_size = sizeof(*cmd2) + (readback ? sizeof(*cmd1) : 0); @@ -378,10 +384,6 @@ static int vmw_gb_context_destroy(struct vmw_resource *res) SVGA3dCmdHeader header; SVGA3dCmdDestroyGBContext body; } *cmd; - struct vmw_user_context *uctx = - container_of(res, struct vmw_user_context, res); - - BUG_ON(!list_empty(&uctx->cbs.list)); if (likely(res->id == -1)) return 0; @@ -528,8 +530,9 @@ out_unlock: * vmw_context_scrub_shader - scrub a shader binding from a context. * * @bi: single binding information. + * @rebind: Whether to issue a bind instead of scrub command. */ -static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi) +static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi, bool rebind) { struct vmw_private *dev_priv = bi->ctx->dev_priv; struct { @@ -548,7 +551,7 @@ static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi) cmd->header.size = sizeof(cmd->body); cmd->body.cid = bi->ctx->id; cmd->body.type = bi->i1.shader_type; - cmd->body.shid = SVGA3D_INVALID_ID; + cmd->body.shid = ((rebind) ? bi->res->id : SVGA3D_INVALID_ID); vmw_fifo_commit(dev_priv, sizeof(*cmd)); return 0; @@ -559,8 +562,10 @@ static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi) * from a context. * * @bi: single binding information. + * @rebind: Whether to issue a bind instead of scrub command. */ -static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi) +static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi, + bool rebind) { struct vmw_private *dev_priv = bi->ctx->dev_priv; struct { @@ -579,7 +584,7 @@ static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi) cmd->header.size = sizeof(cmd->body); cmd->body.cid = bi->ctx->id; cmd->body.type = bi->i1.rt_type; - cmd->body.target.sid = SVGA3D_INVALID_ID; + cmd->body.target.sid = ((rebind) ? bi->res->id : SVGA3D_INVALID_ID); cmd->body.target.face = 0; cmd->body.target.mipmap = 0; vmw_fifo_commit(dev_priv, sizeof(*cmd)); @@ -591,11 +596,13 @@ static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi) * vmw_context_scrub_texture - scrub a texture binding from a context. * * @bi: single binding information. + * @rebind: Whether to issue a bind instead of scrub command. * * TODO: Possibly complement this function with a function that takes * a list of texture bindings and combines them to a single command. */ -static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi) +static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi, + bool rebind) { struct vmw_private *dev_priv = bi->ctx->dev_priv; struct { @@ -619,7 +626,7 @@ static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi) cmd->body.c.cid = bi->ctx->id; cmd->body.s1.stage = bi->i1.texture_stage; cmd->body.s1.name = SVGA3D_TS_BIND_TEXTURE; - cmd->body.s1.value = (uint32) SVGA3D_INVALID_ID; + cmd->body.s1.value = ((rebind) ? bi->res->id : SVGA3D_INVALID_ID); vmw_fifo_commit(dev_priv, sizeof(*cmd)); return 0; @@ -692,6 +699,7 @@ int vmw_context_binding_add(struct vmw_ctx_binding_state *cbs, vmw_context_binding_drop(loc); loc->bi = *bi; + loc->bi.scrubbed = false; list_add_tail(&loc->ctx_list, &cbs->list); INIT_LIST_HEAD(&loc->res_list); @@ -727,12 +735,11 @@ static void vmw_context_binding_transfer(struct vmw_ctx_binding_state *cbs, if (loc->bi.ctx != NULL) vmw_context_binding_drop(loc); - loc->bi = *bi; - list_add_tail(&loc->ctx_list, &cbs->list); - if (bi->res != NULL) + if (bi->res != NULL) { + loc->bi = *bi; + list_add_tail(&loc->ctx_list, &cbs->list); list_add_tail(&loc->res_list, &bi->res->binding_head); - else - INIT_LIST_HEAD(&loc->res_list); + } } /** @@ -746,7 +753,10 @@ static void vmw_context_binding_transfer(struct vmw_ctx_binding_state *cbs, */ static void vmw_context_binding_kill(struct vmw_ctx_binding *cb) { - (void) vmw_scrub_funcs[cb->bi.bt](&cb->bi); + if (!cb->bi.scrubbed) { + (void) vmw_scrub_funcs[cb->bi.bt](&cb->bi, false); + cb->bi.scrubbed = true; + } vmw_context_binding_drop(cb); } @@ -768,6 +778,27 @@ static void vmw_context_binding_state_kill(struct vmw_ctx_binding_state *cbs) } /** + * vmw_context_binding_state_scrub - Scrub all bindings associated with a + * struct vmw_ctx_binding state structure. + * + * @cbs: Pointer to the context binding state tracker. + * + * Emits commands to scrub all bindings associated with the + * context binding state tracker. + */ +static void vmw_context_binding_state_scrub(struct vmw_ctx_binding_state *cbs) +{ + struct vmw_ctx_binding *entry; + + list_for_each_entry(entry, &cbs->list, ctx_list) { + if (!entry->bi.scrubbed) { + (void) vmw_scrub_funcs[entry->bi.bt](&entry->bi, false); + entry->bi.scrubbed = true; + } + } +} + +/** * vmw_context_binding_res_list_kill - Kill all bindings on a * resource binding list * @@ -785,6 +816,27 @@ void vmw_context_binding_res_list_kill(struct list_head *head) } /** + * vmw_context_binding_res_list_scrub - Scrub all bindings on a + * resource binding list + * + * @head: list head of resource binding list + * + * Scrub all bindings associated with a specific resource. Typically + * called before the resource is evicted. + */ +void vmw_context_binding_res_list_scrub(struct list_head *head) +{ + struct vmw_ctx_binding *entry; + + list_for_each_entry(entry, head, res_list) { + if (!entry->bi.scrubbed) { + (void) vmw_scrub_funcs[entry->bi.bt](&entry->bi, false); + entry->bi.scrubbed = true; + } + } +} + +/** * vmw_context_binding_state_transfer - Commit staged binding info * * @ctx: Pointer to context to commit the staged binding info to. @@ -803,3 +855,50 @@ void vmw_context_binding_state_transfer(struct vmw_resource *ctx, list_for_each_entry_safe(entry, next, &from->list, ctx_list) vmw_context_binding_transfer(&uctx->cbs, &entry->bi); } + +/** + * vmw_context_rebind_all - Rebind all scrubbed bindings of a context + * + * @ctx: The context resource + * + * Walks through the context binding list and rebinds all scrubbed + * resources. + */ +int vmw_context_rebind_all(struct vmw_resource *ctx) +{ + struct vmw_ctx_binding *entry; + struct vmw_user_context *uctx = + container_of(ctx, struct vmw_user_context, res); + struct vmw_ctx_binding_state *cbs = &uctx->cbs; + int ret; + + list_for_each_entry(entry, &cbs->list, ctx_list) { + if (likely(!entry->bi.scrubbed)) + continue; + + if (WARN_ON(entry->bi.res == NULL || entry->bi.res->id == + SVGA3D_INVALID_ID)) + continue; + + ret = vmw_scrub_funcs[entry->bi.bt](&entry->bi, true); + if (unlikely(ret != 0)) + return ret; + + entry->bi.scrubbed = false; + } + + return 0; +} + +/** + * vmw_context_binding_list - Return a list of context bindings + * + * @ctx: The context resource + * + * Returns the current list of bindings of the given context. Note that + * this list becomes stale as soon as the dev_priv::binding_mutex is unlocked. + */ +struct list_head *vmw_context_binding_list(struct vmw_resource *ctx) +{ + return &(container_of(ctx, struct vmw_user_context, res)->cbs.list); +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 9893328f8fd..0083cbf99ed 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -667,6 +667,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) dev_priv->memory_size = 512*1024*1024; } dev_priv->max_mob_pages = 0; + dev_priv->max_mob_size = 0; if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) { uint64_t mem_size = vmw_read(dev_priv, @@ -676,6 +677,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) dev_priv->prim_bb_mem = vmw_read(dev_priv, SVGA_REG_MAX_PRIMARY_BOUNDING_BOX_MEM); + dev_priv->max_mob_size = + vmw_read(dev_priv, SVGA_REG_MOB_MAX_SIZE); } else dev_priv->prim_bb_mem = dev_priv->vram_size; @@ -941,6 +944,7 @@ static void vmw_postclose(struct drm_device *dev, drm_master_put(&vmw_fp->locked_master); } + vmw_compat_shader_man_destroy(vmw_fp->shman); ttm_object_file_release(&vmw_fp->tfile); kfree(vmw_fp); } @@ -960,11 +964,17 @@ static int vmw_driver_open(struct drm_device *dev, struct drm_file *file_priv) if (unlikely(vmw_fp->tfile == NULL)) goto out_no_tfile; + vmw_fp->shman = vmw_compat_shader_man_create(dev_priv); + if (IS_ERR(vmw_fp->shman)) + goto out_no_shman; + file_priv->driver_priv = vmw_fp; dev_priv->bdev.dev_mapping = dev->dev_mapping; return 0; +out_no_shman: + ttm_object_file_release(&vmw_fp->tfile); out_no_tfile: kfree(vmw_fp); return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 554e7fa3308..9e4be172598 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -75,10 +75,14 @@ #define VMW_RES_FENCE ttm_driver_type3 #define VMW_RES_SHADER ttm_driver_type4 +struct vmw_compat_shader_manager; + struct vmw_fpriv { struct drm_master *locked_master; struct ttm_object_file *tfile; struct list_head fence_events; + bool gb_aware; + struct vmw_compat_shader_manager *shman; }; struct vmw_dma_buffer { @@ -272,6 +276,7 @@ struct vmw_ctx_bindinfo { struct vmw_resource *ctx; struct vmw_resource *res; enum vmw_ctx_binding_type bt; + bool scrubbed; union { SVGA3dShaderType shader_type; SVGA3dRenderTargetType rt_type; @@ -318,7 +323,7 @@ struct vmw_sw_context{ struct drm_open_hash res_ht; bool res_ht_initialized; bool kernel; /**< is the called made from the kernel */ - struct ttm_object_file *tfile; + struct vmw_fpriv *fp; struct list_head validate_nodes; struct vmw_relocation relocs[VMWGFX_MAX_RELOCATIONS]; uint32_t cur_reloc; @@ -336,6 +341,7 @@ struct vmw_sw_context{ bool needs_post_query_barrier; struct vmw_resource *error_resource; struct vmw_ctx_binding_state staged_bindings; + struct list_head staged_shaders; }; struct vmw_legacy_display; @@ -380,6 +386,7 @@ struct vmw_private { uint32_t max_gmr_ids; uint32_t max_gmr_pages; uint32_t max_mob_pages; + uint32_t max_mob_size; uint32_t memory_size; bool has_gmr; bool has_mob; @@ -569,6 +576,8 @@ struct vmw_user_resource_conv; extern void vmw_resource_unreference(struct vmw_resource **p_res); extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res); +extern struct vmw_resource * +vmw_resource_reference_unless_doomed(struct vmw_resource *res); extern int vmw_resource_validate(struct vmw_resource *res); extern int vmw_resource_reserve(struct vmw_resource *res, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); @@ -957,6 +966,9 @@ extern void vmw_context_binding_state_transfer(struct vmw_resource *res, struct vmw_ctx_binding_state *cbs); extern void vmw_context_binding_res_list_kill(struct list_head *head); +extern void vmw_context_binding_res_list_scrub(struct list_head *head); +extern int vmw_context_rebind_all(struct vmw_resource *ctx); +extern struct list_head *vmw_context_binding_list(struct vmw_resource *ctx); /* * Surface management - vmwgfx_surface.c @@ -991,6 +1003,28 @@ extern int vmw_shader_define_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int vmw_shader_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int vmw_compat_shader_lookup(struct vmw_compat_shader_manager *man, + SVGA3dShaderType shader_type, + u32 *user_key); +extern void vmw_compat_shaders_commit(struct vmw_compat_shader_manager *man, + struct list_head *list); +extern void vmw_compat_shaders_revert(struct vmw_compat_shader_manager *man, + struct list_head *list); +extern int vmw_compat_shader_remove(struct vmw_compat_shader_manager *man, + u32 user_key, + SVGA3dShaderType shader_type, + struct list_head *list); +extern int vmw_compat_shader_add(struct vmw_compat_shader_manager *man, + u32 user_key, const void *bytecode, + SVGA3dShaderType shader_type, + size_t size, + struct ttm_object_file *tfile, + struct list_head *list); +extern struct vmw_compat_shader_manager * +vmw_compat_shader_man_create(struct vmw_private *dev_priv); +extern void +vmw_compat_shader_man_destroy(struct vmw_compat_shader_manager *man); + /** * Inline helper functions diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 7a5f1eb55c5..efb575a7996 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -114,8 +114,10 @@ static void vmw_resource_list_unreserve(struct list_head *list, * persistent context binding tracker. */ if (unlikely(val->staged_bindings)) { - vmw_context_binding_state_transfer - (val->res, val->staged_bindings); + if (!backoff) { + vmw_context_binding_state_transfer + (val->res, val->staged_bindings); + } kfree(val->staged_bindings); val->staged_bindings = NULL; } @@ -178,6 +180,44 @@ static int vmw_resource_val_add(struct vmw_sw_context *sw_context, } /** + * vmw_resource_context_res_add - Put resources previously bound to a context on + * the validation list + * + * @dev_priv: Pointer to a device private structure + * @sw_context: Pointer to a software context used for this command submission + * @ctx: Pointer to the context resource + * + * This function puts all resources that were previously bound to @ctx on + * the resource validation list. This is part of the context state reemission + */ +static int vmw_resource_context_res_add(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + struct vmw_resource *ctx) +{ + struct list_head *binding_list; + struct vmw_ctx_binding *entry; + int ret = 0; + struct vmw_resource *res; + + mutex_lock(&dev_priv->binding_mutex); + binding_list = vmw_context_binding_list(ctx); + + list_for_each_entry(entry, binding_list, ctx_list) { + res = vmw_resource_reference_unless_doomed(entry->bi.res); + if (unlikely(res == NULL)) + continue; + + ret = vmw_resource_val_add(sw_context, entry->bi.res, NULL); + vmw_resource_unreference(&res); + if (unlikely(ret != 0)) + break; + } + + mutex_unlock(&dev_priv->binding_mutex); + return ret; +} + +/** * vmw_resource_relocation_add - Add a relocation to the relocation list * * @list: Pointer to head of relocation list. @@ -233,8 +273,12 @@ static void vmw_resource_relocations_apply(uint32_t *cb, { struct vmw_resource_relocation *rel; - list_for_each_entry(rel, list, head) - cb[rel->offset] = rel->res->id; + list_for_each_entry(rel, list, head) { + if (likely(rel->res != NULL)) + cb[rel->offset] = rel->res->id; + else + cb[rel->offset] = SVGA_3D_CMD_NOP; + } } static int vmw_cmd_invalid(struct vmw_private *dev_priv, @@ -379,22 +423,27 @@ static int vmw_resources_validate(struct vmw_sw_context *sw_context) } /** - * vmw_cmd_res_check - Check that a resource is present and if so, put it + * vmw_cmd_compat_res_check - Check that a resource is present and if so, put it * on the resource validate list unless it's already there. * * @dev_priv: Pointer to a device private structure. * @sw_context: Pointer to the software context. * @res_type: Resource type. * @converter: User-space visisble type specific information. - * @id: Pointer to the location in the command buffer currently being + * @id: user-space resource id handle. + * @id_loc: Pointer to the location in the command buffer currently being * parsed from where the user-space resource id handle is located. + * @p_val: Pointer to pointer to resource validalidation node. Populated + * on exit. */ -static int vmw_cmd_res_check(struct vmw_private *dev_priv, - struct vmw_sw_context *sw_context, - enum vmw_res_type res_type, - const struct vmw_user_resource_conv *converter, - uint32_t *id, - struct vmw_resource_val_node **p_val) +static int +vmw_cmd_compat_res_check(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + enum vmw_res_type res_type, + const struct vmw_user_resource_conv *converter, + uint32_t id, + uint32_t *id_loc, + struct vmw_resource_val_node **p_val) { struct vmw_res_cache_entry *rcache = &sw_context->res_cache[res_type]; @@ -402,7 +451,7 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv, struct vmw_resource_val_node *node; int ret; - if (*id == SVGA3D_INVALID_ID) { + if (id == SVGA3D_INVALID_ID) { if (p_val) *p_val = NULL; if (res_type == vmw_res_context) { @@ -417,7 +466,7 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv, * resource */ - if (likely(rcache->valid && *id == rcache->handle)) { + if (likely(rcache->valid && id == rcache->handle)) { const struct vmw_resource *res = rcache->res; rcache->node->first_usage = false; @@ -426,28 +475,28 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv, return vmw_resource_relocation_add (&sw_context->res_relocations, res, - id - sw_context->buf_start); + id_loc - sw_context->buf_start); } ret = vmw_user_resource_lookup_handle(dev_priv, - sw_context->tfile, - *id, + sw_context->fp->tfile, + id, converter, &res); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use resource 0x%08x.\n", - (unsigned) *id); + (unsigned) id); dump_stack(); return ret; } rcache->valid = true; rcache->res = res; - rcache->handle = *id; + rcache->handle = id; ret = vmw_resource_relocation_add(&sw_context->res_relocations, res, - id - sw_context->buf_start); + id_loc - sw_context->buf_start); if (unlikely(ret != 0)) goto out_no_reloc; @@ -459,7 +508,11 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv, if (p_val) *p_val = node; - if (node->first_usage && res_type == vmw_res_context) { + if (dev_priv->has_mob && node->first_usage && + res_type == vmw_res_context) { + ret = vmw_resource_context_res_add(dev_priv, sw_context, res); + if (unlikely(ret != 0)) + goto out_no_reloc; node->staged_bindings = kzalloc(sizeof(*node->staged_bindings), GFP_KERNEL); if (node->staged_bindings == NULL) { @@ -481,6 +534,59 @@ out_no_reloc: } /** + * vmw_cmd_res_check - Check that a resource is present and if so, put it + * on the resource validate list unless it's already there. + * + * @dev_priv: Pointer to a device private structure. + * @sw_context: Pointer to the software context. + * @res_type: Resource type. + * @converter: User-space visisble type specific information. + * @id_loc: Pointer to the location in the command buffer currently being + * parsed from where the user-space resource id handle is located. + * @p_val: Pointer to pointer to resource validalidation node. Populated + * on exit. + */ +static int +vmw_cmd_res_check(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + enum vmw_res_type res_type, + const struct vmw_user_resource_conv *converter, + uint32_t *id_loc, + struct vmw_resource_val_node **p_val) +{ + return vmw_cmd_compat_res_check(dev_priv, sw_context, res_type, + converter, *id_loc, id_loc, p_val); +} + +/** + * vmw_rebind_contexts - Rebind all resources previously bound to + * referenced contexts. + * + * @sw_context: Pointer to the software context. + * + * Rebind context binding points that have been scrubbed because of eviction. + */ +static int vmw_rebind_contexts(struct vmw_sw_context *sw_context) +{ + struct vmw_resource_val_node *val; + int ret; + + list_for_each_entry(val, &sw_context->resource_list, head) { + if (likely(!val->staged_bindings)) + continue; + + ret = vmw_context_rebind_all(val->res); + if (unlikely(ret != 0)) { + if (ret != -ERESTARTSYS) + DRM_ERROR("Failed to rebind context.\n"); + return ret; + } + } + + return 0; +} + +/** * vmw_cmd_cid_check - Check a command header for valid context information. * * @dev_priv: Pointer to a device private structure. @@ -496,7 +602,7 @@ static int vmw_cmd_cid_check(struct vmw_private *dev_priv, { struct vmw_cid_cmd { SVGA3dCmdHeader header; - __le32 cid; + uint32_t cid; } *cmd; cmd = container_of(header, struct vmw_cid_cmd, header); @@ -767,7 +873,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, struct vmw_relocation *reloc; int ret; - ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo); + ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use MOB buffer.\n"); return -EINVAL; @@ -828,7 +934,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, struct vmw_relocation *reloc; int ret; - ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo); + ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use GMR region.\n"); return -EINVAL; @@ -1127,7 +1233,8 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv, srf = vmw_res_to_srf(sw_context->res_cache[vmw_res_surface].res); - vmw_kms_cursor_snoop(srf, sw_context->tfile, &vmw_bo->base, header); + vmw_kms_cursor_snoop(srf, sw_context->fp->tfile, &vmw_bo->base, + header); out_no_surface: vmw_dmabuf_unreference(&vmw_bo); @@ -1478,6 +1585,98 @@ static int vmw_cmd_invalidate_gb_surface(struct vmw_private *dev_priv, &cmd->body.sid, NULL); } + +/** + * vmw_cmd_shader_define - Validate an SVGA_3D_CMD_SHADER_DEFINE + * command + * + * @dev_priv: Pointer to a device private struct. + * @sw_context: The software context being used for this batch. + * @header: Pointer to the command header in the command stream. + */ +static int vmw_cmd_shader_define(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGA3dCmdHeader *header) +{ + struct vmw_shader_define_cmd { + SVGA3dCmdHeader header; + SVGA3dCmdDefineShader body; + } *cmd; + int ret; + size_t size; + + cmd = container_of(header, struct vmw_shader_define_cmd, + header); + + ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context, + user_context_converter, &cmd->body.cid, + NULL); + if (unlikely(ret != 0)) + return ret; + + if (unlikely(!dev_priv->has_mob)) + return 0; + + size = cmd->header.size - sizeof(cmd->body); + ret = vmw_compat_shader_add(sw_context->fp->shman, + cmd->body.shid, cmd + 1, + cmd->body.type, size, + sw_context->fp->tfile, + &sw_context->staged_shaders); + if (unlikely(ret != 0)) + return ret; + + return vmw_resource_relocation_add(&sw_context->res_relocations, + NULL, &cmd->header.id - + sw_context->buf_start); + + return 0; +} + +/** + * vmw_cmd_shader_destroy - Validate an SVGA_3D_CMD_SHADER_DESTROY + * command + * + * @dev_priv: Pointer to a device private struct. + * @sw_context: The software context being used for this batch. + * @header: Pointer to the command header in the command stream. + */ +static int vmw_cmd_shader_destroy(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGA3dCmdHeader *header) +{ + struct vmw_shader_destroy_cmd { + SVGA3dCmdHeader header; + SVGA3dCmdDestroyShader body; + } *cmd; + int ret; + + cmd = container_of(header, struct vmw_shader_destroy_cmd, + header); + + ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context, + user_context_converter, &cmd->body.cid, + NULL); + if (unlikely(ret != 0)) + return ret; + + if (unlikely(!dev_priv->has_mob)) + return 0; + + ret = vmw_compat_shader_remove(sw_context->fp->shman, + cmd->body.shid, + cmd->body.type, + &sw_context->staged_shaders); + if (unlikely(ret != 0)) + return ret; + + return vmw_resource_relocation_add(&sw_context->res_relocations, + NULL, &cmd->header.id - + sw_context->buf_start); + + return 0; +} + /** * vmw_cmd_set_shader - Validate an SVGA_3D_CMD_SET_SHADER * command @@ -1509,10 +1708,18 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, if (dev_priv->has_mob) { struct vmw_ctx_bindinfo bi; struct vmw_resource_val_node *res_node; - - ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_shader, - user_shader_converter, - &cmd->body.shid, &res_node); + u32 shid = cmd->body.shid; + + if (shid != SVGA3D_INVALID_ID) + (void) vmw_compat_shader_lookup(sw_context->fp->shman, + cmd->body.type, + &shid); + + ret = vmw_cmd_compat_res_check(dev_priv, sw_context, + vmw_res_shader, + user_shader_converter, + shid, + &cmd->body.shid, &res_node); if (unlikely(ret != 0)) return ret; @@ -1527,6 +1734,39 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, } /** + * vmw_cmd_set_shader_const - Validate an SVGA_3D_CMD_SET_SHADER_CONST + * command + * + * @dev_priv: Pointer to a device private struct. + * @sw_context: The software context being used for this batch. + * @header: Pointer to the command header in the command stream. + */ +static int vmw_cmd_set_shader_const(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGA3dCmdHeader *header) +{ + struct vmw_set_shader_const_cmd { + SVGA3dCmdHeader header; + SVGA3dCmdSetShaderConst body; + } *cmd; + int ret; + + cmd = container_of(header, struct vmw_set_shader_const_cmd, + header); + + ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context, + user_context_converter, &cmd->body.cid, + NULL); + if (unlikely(ret != 0)) + return ret; + + if (dev_priv->has_mob) + header->id = SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE; + + return 0; +} + +/** * vmw_cmd_bind_gb_shader - Validate an SVGA_3D_CMD_BIND_GB_SHADER * command * @@ -1595,7 +1835,7 @@ static int vmw_cmd_check_not_3d(struct vmw_private *dev_priv, return 0; } -static const struct vmw_cmd_entry const vmw_cmd_entries[SVGA_3D_CMD_MAX] = { +static const struct vmw_cmd_entry vmw_cmd_entries[SVGA_3D_CMD_MAX] = { VMW_CMD_DEF(SVGA_3D_CMD_SURFACE_DEFINE, &vmw_cmd_invalid, false, false, false), VMW_CMD_DEF(SVGA_3D_CMD_SURFACE_DESTROY, &vmw_cmd_invalid, @@ -1634,14 +1874,14 @@ static const struct vmw_cmd_entry const vmw_cmd_entries[SVGA_3D_CMD_MAX] = { true, false, false), VMW_CMD_DEF(SVGA_3D_CMD_PRESENT, &vmw_cmd_present_check, false, false, false), - VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DEFINE, &vmw_cmd_cid_check, - true, true, false), - VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DESTROY, &vmw_cmd_cid_check, - true, true, false), + VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DEFINE, &vmw_cmd_shader_define, + true, false, false), + VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DESTROY, &vmw_cmd_shader_destroy, + true, false, false), VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER, &vmw_cmd_set_shader, true, false, false), - VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_cid_check, - true, true, false), + VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_set_shader_const, + true, false, false), VMW_CMD_DEF(SVGA_3D_CMD_DRAW_PRIMITIVES, &vmw_cmd_draw, true, false, false), VMW_CMD_DEF(SVGA_3D_CMD_SETSCISSORRECT, &vmw_cmd_cid_check, @@ -1792,6 +2032,9 @@ static int vmw_cmd_check(struct vmw_private *dev_priv, goto out_invalid; entry = &vmw_cmd_entries[cmd_id]; + if (unlikely(!entry->func)) + goto out_invalid; + if (unlikely(!entry->user_allow && !sw_context->kernel)) goto out_privileged; @@ -2171,7 +2414,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, } else sw_context->kernel = true; - sw_context->tfile = vmw_fpriv(file_priv)->tfile; + sw_context->fp = vmw_fpriv(file_priv); sw_context->cur_reloc = 0; sw_context->cur_val_buf = 0; sw_context->fence_flags = 0; @@ -2188,16 +2431,17 @@ int vmw_execbuf_process(struct drm_file *file_priv, goto out_unlock; sw_context->res_ht_initialized = true; } + INIT_LIST_HEAD(&sw_context->staged_shaders); INIT_LIST_HEAD(&resource_list); ret = vmw_cmd_check_all(dev_priv, sw_context, kernel_commands, command_size); if (unlikely(ret != 0)) - goto out_err; + goto out_err_nores; ret = vmw_resources_reserve(sw_context); if (unlikely(ret != 0)) - goto out_err; + goto out_err_nores; ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes); if (unlikely(ret != 0)) @@ -2225,6 +2469,12 @@ int vmw_execbuf_process(struct drm_file *file_priv, goto out_err; } + if (dev_priv->has_mob) { + ret = vmw_rebind_contexts(sw_context); + if (unlikely(ret != 0)) + goto out_unlock_binding; + } + cmd = vmw_fifo_reserve(dev_priv, command_size); if (unlikely(cmd == NULL)) { DRM_ERROR("Failed reserving fifo space for commands.\n"); @@ -2276,6 +2526,8 @@ int vmw_execbuf_process(struct drm_file *file_priv, } list_splice_init(&sw_context->resource_list, &resource_list); + vmw_compat_shaders_commit(sw_context->fp->shman, + &sw_context->staged_shaders); mutex_unlock(&dev_priv->cmdbuf_mutex); /* @@ -2289,10 +2541,11 @@ int vmw_execbuf_process(struct drm_file *file_priv, out_unlock_binding: mutex_unlock(&dev_priv->binding_mutex); out_err: - vmw_resource_relocations_free(&sw_context->res_relocations); - vmw_free_relocations(sw_context); ttm_eu_backoff_reservation(&ticket, &sw_context->validate_nodes); +out_err_nores: vmw_resource_list_unreserve(&sw_context->resource_list, true); + vmw_resource_relocations_free(&sw_context->res_relocations); + vmw_free_relocations(sw_context); vmw_clear_validations(sw_context); if (unlikely(dev_priv->pinned_bo != NULL && !dev_priv->query_cid_valid)) @@ -2301,6 +2554,8 @@ out_unlock: list_splice_init(&sw_context->resource_list, &resource_list); error_resource = sw_context->error_resource; sw_context->error_resource = NULL; + vmw_compat_shaders_revert(sw_context->fp->shman, + &sw_context->staged_shaders); mutex_unlock(&dev_priv->cmdbuf_mutex); /* diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index 116c4973676..47b70949bf3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -29,12 +29,18 @@ #include <drm/vmwgfx_drm.h> #include "vmwgfx_kms.h" +struct svga_3d_compat_cap { + SVGA3dCapsRecordHeader header; + SVGA3dCapPair pairs[SVGA3D_DEVCAP_MAX]; +}; + int vmw_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct vmw_private *dev_priv = vmw_priv(dev); struct drm_vmw_getparam_arg *param = (struct drm_vmw_getparam_arg *)data; + struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); switch (param->param) { case DRM_VMW_PARAM_NUM_STREAMS: @@ -60,6 +66,11 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, __le32 __iomem *fifo_mem = dev_priv->mmio_virt; const struct vmw_fifo_state *fifo = &dev_priv->fifo; + if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS)) { + param->value = SVGA3D_HWVERSION_WS8_B1; + break; + } + param->value = ioread32(fifo_mem + ((fifo->capabilities & @@ -69,19 +80,31 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, break; } case DRM_VMW_PARAM_MAX_SURF_MEMORY: - param->value = dev_priv->memory_size; + if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS) && + !vmw_fp->gb_aware) + param->value = dev_priv->max_mob_pages * PAGE_SIZE / 2; + else + param->value = dev_priv->memory_size; break; case DRM_VMW_PARAM_3D_CAPS_SIZE: - if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) - param->value = SVGA3D_DEVCAP_MAX; + if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS) && + vmw_fp->gb_aware) + param->value = SVGA3D_DEVCAP_MAX * sizeof(uint32_t); + else if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) + param->value = sizeof(struct svga_3d_compat_cap) + + sizeof(uint32_t); else param->value = (SVGA_FIFO_3D_CAPS_LAST - - SVGA_FIFO_3D_CAPS + 1); - param->value *= sizeof(uint32_t); + SVGA_FIFO_3D_CAPS + 1) * + sizeof(uint32_t); break; case DRM_VMW_PARAM_MAX_MOB_MEMORY: + vmw_fp->gb_aware = true; param->value = dev_priv->max_mob_pages * PAGE_SIZE; break; + case DRM_VMW_PARAM_MAX_MOB_SIZE: + param->value = dev_priv->max_mob_size; + break; default: DRM_ERROR("Illegal vmwgfx get param request: %d\n", param->param); @@ -91,6 +114,38 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, return 0; } +static int vmw_fill_compat_cap(struct vmw_private *dev_priv, void *bounce, + size_t size) +{ + struct svga_3d_compat_cap *compat_cap = + (struct svga_3d_compat_cap *) bounce; + unsigned int i; + size_t pair_offset = offsetof(struct svga_3d_compat_cap, pairs); + unsigned int max_size; + + if (size < pair_offset) + return -EINVAL; + + max_size = (size - pair_offset) / sizeof(SVGA3dCapPair); + + if (max_size > SVGA3D_DEVCAP_MAX) + max_size = SVGA3D_DEVCAP_MAX; + + compat_cap->header.length = + (pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32); + compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS; + + mutex_lock(&dev_priv->hw_mutex); + for (i = 0; i < max_size; ++i) { + vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); + compat_cap->pairs[i][0] = i; + compat_cap->pairs[i][1] = vmw_read(dev_priv, SVGA_REG_DEV_CAP); + } + mutex_unlock(&dev_priv->hw_mutex); + + return 0; +} + int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -104,41 +159,49 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, void *bounce; int ret; bool gb_objects = !!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS); + struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); if (unlikely(arg->pad64 != 0)) { DRM_ERROR("Illegal GET_3D_CAP argument.\n"); return -EINVAL; } - if (gb_objects) - size = SVGA3D_DEVCAP_MAX; + if (gb_objects && vmw_fp->gb_aware) + size = SVGA3D_DEVCAP_MAX * sizeof(uint32_t); + else if (gb_objects) + size = sizeof(struct svga_3d_compat_cap) + sizeof(uint32_t); else - size = (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1); - - size *= sizeof(uint32_t); + size = (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1) * + sizeof(uint32_t); if (arg->max_size < size) size = arg->max_size; - bounce = vmalloc(size); + bounce = vzalloc(size); if (unlikely(bounce == NULL)) { DRM_ERROR("Failed to allocate bounce buffer for 3D caps.\n"); return -ENOMEM; } - if (gb_objects) { - int i; + if (gb_objects && vmw_fp->gb_aware) { + int i, num; uint32_t *bounce32 = (uint32_t *) bounce; + num = size / sizeof(uint32_t); + if (num > SVGA3D_DEVCAP_MAX) + num = SVGA3D_DEVCAP_MAX; + mutex_lock(&dev_priv->hw_mutex); - for (i = 0; i < SVGA3D_DEVCAP_MAX; ++i) { + for (i = 0; i < num; ++i) { vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); *bounce32++ = vmw_read(dev_priv, SVGA_REG_DEV_CAP); } mutex_unlock(&dev_priv->hw_mutex); - + } else if (gb_objects) { + ret = vmw_fill_compat_cap(dev_priv, bounce, size); + if (unlikely(ret != 0)) + goto out_err; } else { - fifo_mem = dev_priv->mmio_virt; memcpy_fromio(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size); } @@ -146,6 +209,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, ret = copy_to_user(buffer, bounce, size); if (ret) ret = -EFAULT; +out_err: vfree(bounce); if (unlikely(ret != 0)) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index 4910e7b8181..d4a5a19cb8c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -134,6 +134,7 @@ static int vmw_setup_otable_base(struct vmw_private *dev_priv, cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd)); if (unlikely(cmd == NULL)) { DRM_ERROR("Failed reserving FIFO space for OTable setup.\n"); + ret = -ENOMEM; goto out_no_fifo; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 6fdd82d42f6..2aa4bc6a4d6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -88,6 +88,11 @@ struct vmw_resource *vmw_resource_reference(struct vmw_resource *res) return res; } +struct vmw_resource * +vmw_resource_reference_unless_doomed(struct vmw_resource *res) +{ + return kref_get_unless_zero(&res->kref) ? res : NULL; +} /** * vmw_resource_release_id - release a resource id to the id manager. @@ -136,8 +141,12 @@ static void vmw_resource_release(struct kref *kref) vmw_dmabuf_unreference(&res->backup); } - if (likely(res->hw_destroy != NULL)) + if (likely(res->hw_destroy != NULL)) { res->hw_destroy(res); + mutex_lock(&dev_priv->binding_mutex); + vmw_context_binding_res_list_kill(&res->binding_head); + mutex_unlock(&dev_priv->binding_mutex); + } id = res->id; if (res->res_free != NULL) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index 1457ec4b712..ee3856578a1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -29,6 +29,8 @@ #include "vmwgfx_resource_priv.h" #include "ttm/ttm_placement.h" +#define VMW_COMPAT_SHADER_HT_ORDER 12 + struct vmw_shader { struct vmw_resource res; SVGA3dShaderType type; @@ -40,6 +42,50 @@ struct vmw_user_shader { struct vmw_shader shader; }; +/** + * enum vmw_compat_shader_state - Staging state for compat shaders + */ +enum vmw_compat_shader_state { + VMW_COMPAT_COMMITED, + VMW_COMPAT_ADD, + VMW_COMPAT_DEL +}; + +/** + * struct vmw_compat_shader - Metadata for compat shaders. + * + * @handle: The TTM handle of the guest backed shader. + * @tfile: The struct ttm_object_file the guest backed shader is registered + * with. + * @hash: Hash item for lookup. + * @head: List head for staging lists or the compat shader manager list. + * @state: Staging state. + * + * The structure is protected by the cmdbuf lock. + */ +struct vmw_compat_shader { + u32 handle; + struct ttm_object_file *tfile; + struct drm_hash_item hash; + struct list_head head; + enum vmw_compat_shader_state state; +}; + +/** + * struct vmw_compat_shader_manager - Compat shader manager. + * + * @shaders: Hash table containing staged and commited compat shaders + * @list: List of commited shaders. + * @dev_priv: Pointer to a device private structure. + * + * @shaders and @list are protected by the cmdbuf mutex for now. + */ +struct vmw_compat_shader_manager { + struct drm_open_hash shaders; + struct list_head list; + struct vmw_private *dev_priv; +}; + static void vmw_user_shader_free(struct vmw_resource *res); static struct vmw_resource * vmw_user_shader_base_to_res(struct ttm_base_object *base); @@ -258,7 +304,7 @@ static int vmw_gb_shader_destroy(struct vmw_resource *res) return 0; mutex_lock(&dev_priv->binding_mutex); - vmw_context_binding_res_list_kill(&res->binding_head); + vmw_context_binding_res_list_scrub(&res->binding_head); cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd)); if (unlikely(cmd == NULL)) { @@ -325,13 +371,81 @@ int vmw_shader_destroy_ioctl(struct drm_device *dev, void *data, TTM_REF_USAGE); } +static int vmw_shader_alloc(struct vmw_private *dev_priv, + struct vmw_dma_buffer *buffer, + size_t shader_size, + size_t offset, + SVGA3dShaderType shader_type, + struct ttm_object_file *tfile, + u32 *handle) +{ + struct vmw_user_shader *ushader; + struct vmw_resource *res, *tmp; + int ret; + + /* + * Approximate idr memory usage with 128 bytes. It will be limited + * by maximum number_of shaders anyway. + */ + if (unlikely(vmw_user_shader_size == 0)) + vmw_user_shader_size = + ttm_round_pot(sizeof(struct vmw_user_shader)) + 128; + + ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), + vmw_user_shader_size, + false, true); + if (unlikely(ret != 0)) { + if (ret != -ERESTARTSYS) + DRM_ERROR("Out of graphics memory for shader " + "creation.\n"); + goto out; + } + + ushader = kzalloc(sizeof(*ushader), GFP_KERNEL); + if (unlikely(ushader == NULL)) { + ttm_mem_global_free(vmw_mem_glob(dev_priv), + vmw_user_shader_size); + ret = -ENOMEM; + goto out; + } + + res = &ushader->shader.res; + ushader->base.shareable = false; + ushader->base.tfile = NULL; + + /* + * From here on, the destructor takes over resource freeing. + */ + + ret = vmw_gb_shader_init(dev_priv, res, shader_size, + offset, shader_type, buffer, + vmw_user_shader_free); + if (unlikely(ret != 0)) + goto out; + + tmp = vmw_resource_reference(res); + ret = ttm_base_object_init(tfile, &ushader->base, false, + VMW_RES_SHADER, + &vmw_user_shader_base_release, NULL); + + if (unlikely(ret != 0)) { + vmw_resource_unreference(&tmp); + goto out_err; + } + + if (handle) + *handle = ushader->base.hash.key; +out_err: + vmw_resource_unreference(&res); +out: + return ret; +} + + int vmw_shader_define_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct vmw_private *dev_priv = vmw_priv(dev); - struct vmw_user_shader *ushader; - struct vmw_resource *res; - struct vmw_resource *tmp; struct drm_vmw_shader_create_arg *arg = (struct drm_vmw_shader_create_arg *)data; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; @@ -373,69 +487,326 @@ int vmw_shader_define_ioctl(struct drm_device *dev, void *data, goto out_bad_arg; } - /* - * Approximate idr memory usage with 128 bytes. It will be limited - * by maximum number_of shaders anyway. - */ + ret = ttm_read_lock(&vmaster->lock, true); + if (unlikely(ret != 0)) + goto out_bad_arg; - if (unlikely(vmw_user_shader_size == 0)) - vmw_user_shader_size = ttm_round_pot(sizeof(*ushader)) - + 128; + ret = vmw_shader_alloc(dev_priv, buffer, arg->size, arg->offset, + shader_type, tfile, &arg->shader_handle); - ret = ttm_read_lock(&vmaster->lock, true); + ttm_read_unlock(&vmaster->lock); +out_bad_arg: + vmw_dmabuf_unreference(&buffer); + return ret; +} + +/** + * vmw_compat_shader_lookup - Look up a compat shader + * + * @man: Pointer to the compat shader manager. + * @shader_type: The shader type, that combined with the user_key identifies + * the shader. + * @user_key: On entry, this should be a pointer to the user_key. + * On successful exit, it will contain the guest-backed shader's TTM handle. + * + * Returns 0 on success. Non-zero on failure, in which case the value pointed + * to by @user_key is unmodified. + */ +int vmw_compat_shader_lookup(struct vmw_compat_shader_manager *man, + SVGA3dShaderType shader_type, + u32 *user_key) +{ + struct drm_hash_item *hash; + int ret; + unsigned long key = *user_key | (shader_type << 24); + + ret = drm_ht_find_item(&man->shaders, key, &hash); if (unlikely(ret != 0)) return ret; - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), - vmw_user_shader_size, - false, true); - if (unlikely(ret != 0)) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Out of graphics memory for shader" - " creation.\n"); - goto out_unlock; + *user_key = drm_hash_entry(hash, struct vmw_compat_shader, + hash)->handle; + + return 0; +} + +/** + * vmw_compat_shader_free - Free a compat shader. + * + * @man: Pointer to the compat shader manager. + * @entry: Pointer to a struct vmw_compat_shader. + * + * Frees a struct vmw_compat_shder entry and drops its reference to the + * guest backed shader. + */ +static void vmw_compat_shader_free(struct vmw_compat_shader_manager *man, + struct vmw_compat_shader *entry) +{ + list_del(&entry->head); + WARN_ON(drm_ht_remove_item(&man->shaders, &entry->hash)); + WARN_ON(ttm_ref_object_base_unref(entry->tfile, entry->handle, + TTM_REF_USAGE)); + kfree(entry); +} + +/** + * vmw_compat_shaders_commit - Commit a list of compat shader actions. + * + * @man: Pointer to the compat shader manager. + * @list: Caller's list of compat shader actions. + * + * This function commits a list of compat shader additions or removals. + * It is typically called when the execbuf ioctl call triggering these + * actions has commited the fifo contents to the device. + */ +void vmw_compat_shaders_commit(struct vmw_compat_shader_manager *man, + struct list_head *list) +{ + struct vmw_compat_shader *entry, *next; + + list_for_each_entry_safe(entry, next, list, head) { + list_del(&entry->head); + switch (entry->state) { + case VMW_COMPAT_ADD: + entry->state = VMW_COMPAT_COMMITED; + list_add_tail(&entry->head, &man->list); + break; + case VMW_COMPAT_DEL: + ttm_ref_object_base_unref(entry->tfile, entry->handle, + TTM_REF_USAGE); + kfree(entry); + break; + default: + BUG(); + break; + } } +} - ushader = kzalloc(sizeof(*ushader), GFP_KERNEL); - if (unlikely(ushader == NULL)) { - ttm_mem_global_free(vmw_mem_glob(dev_priv), - vmw_user_shader_size); - ret = -ENOMEM; - goto out_unlock; +/** + * vmw_compat_shaders_revert - Revert a list of compat shader actions + * + * @man: Pointer to the compat shader manager. + * @list: Caller's list of compat shader actions. + * + * This function reverts a list of compat shader additions or removals. + * It is typically called when the execbuf ioctl call triggering these + * actions failed for some reason, and the command stream was never + * submitted. + */ +void vmw_compat_shaders_revert(struct vmw_compat_shader_manager *man, + struct list_head *list) +{ + struct vmw_compat_shader *entry, *next; + int ret; + + list_for_each_entry_safe(entry, next, list, head) { + switch (entry->state) { + case VMW_COMPAT_ADD: + vmw_compat_shader_free(man, entry); + break; + case VMW_COMPAT_DEL: + ret = drm_ht_insert_item(&man->shaders, &entry->hash); + list_del(&entry->head); + list_add_tail(&entry->head, &man->list); + entry->state = VMW_COMPAT_COMMITED; + break; + default: + BUG(); + break; + } } +} - res = &ushader->shader.res; - ushader->base.shareable = false; - ushader->base.tfile = NULL; +/** + * vmw_compat_shader_remove - Stage a compat shader for removal. + * + * @man: Pointer to the compat shader manager + * @user_key: The key that is used to identify the shader. The key is + * unique to the shader type. + * @shader_type: Shader type. + * @list: Caller's list of staged shader actions. + * + * This function stages a compat shader for removal and removes the key from + * the shader manager's hash table. If the shader was previously only staged + * for addition it is completely removed (But the execbuf code may keep a + * reference if it was bound to a context between addition and removal). If + * it was previously commited to the manager, it is staged for removal. + */ +int vmw_compat_shader_remove(struct vmw_compat_shader_manager *man, + u32 user_key, SVGA3dShaderType shader_type, + struct list_head *list) +{ + struct vmw_compat_shader *entry; + struct drm_hash_item *hash; + int ret; - /* - * From here on, the destructor takes over resource freeing. - */ + ret = drm_ht_find_item(&man->shaders, user_key | (shader_type << 24), + &hash); + if (likely(ret != 0)) + return -EINVAL; - ret = vmw_gb_shader_init(dev_priv, res, arg->size, - arg->offset, shader_type, buffer, - vmw_user_shader_free); + entry = drm_hash_entry(hash, struct vmw_compat_shader, hash); + + switch (entry->state) { + case VMW_COMPAT_ADD: + vmw_compat_shader_free(man, entry); + break; + case VMW_COMPAT_COMMITED: + (void) drm_ht_remove_item(&man->shaders, &entry->hash); + list_del(&entry->head); + entry->state = VMW_COMPAT_DEL; + list_add_tail(&entry->head, list); + break; + default: + BUG(); + break; + } + + return 0; +} + +/** + * vmw_compat_shader_add - Create a compat shader and add the + * key to the manager + * + * @man: Pointer to the compat shader manager + * @user_key: The key that is used to identify the shader. The key is + * unique to the shader type. + * @bytecode: Pointer to the bytecode of the shader. + * @shader_type: Shader type. + * @tfile: Pointer to a struct ttm_object_file that the guest-backed shader is + * to be created with. + * @list: Caller's list of staged shader actions. + * + * Note that only the key is added to the shader manager's hash table. + * The shader is not yet added to the shader manager's list of shaders. + */ +int vmw_compat_shader_add(struct vmw_compat_shader_manager *man, + u32 user_key, const void *bytecode, + SVGA3dShaderType shader_type, + size_t size, + struct ttm_object_file *tfile, + struct list_head *list) +{ + struct vmw_dma_buffer *buf; + struct ttm_bo_kmap_obj map; + bool is_iomem; + struct vmw_compat_shader *compat; + u32 handle; + int ret; + + if (user_key > ((1 << 24) - 1) || (unsigned) shader_type > 16) + return -EINVAL; + + /* Allocate and pin a DMA buffer */ + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (unlikely(buf == NULL)) + return -ENOMEM; + + ret = vmw_dmabuf_init(man->dev_priv, buf, size, &vmw_sys_ne_placement, + true, vmw_dmabuf_bo_free); if (unlikely(ret != 0)) - goto out_unlock; + goto out; - tmp = vmw_resource_reference(res); - ret = ttm_base_object_init(tfile, &ushader->base, false, - VMW_RES_SHADER, - &vmw_user_shader_base_release, NULL); + ret = ttm_bo_reserve(&buf->base, false, true, false, NULL); + if (unlikely(ret != 0)) + goto no_reserve; + /* Map and copy shader bytecode. */ + ret = ttm_bo_kmap(&buf->base, 0, PAGE_ALIGN(size) >> PAGE_SHIFT, + &map); if (unlikely(ret != 0)) { - vmw_resource_unreference(&tmp); - goto out_err; + ttm_bo_unreserve(&buf->base); + goto no_reserve; } - arg->shader_handle = ushader->base.hash.key; -out_err: - vmw_resource_unreference(&res); -out_unlock: - ttm_read_unlock(&vmaster->lock); -out_bad_arg: - vmw_dmabuf_unreference(&buffer); + memcpy(ttm_kmap_obj_virtual(&map, &is_iomem), bytecode, size); + WARN_ON(is_iomem); + + ttm_bo_kunmap(&map); + ret = ttm_bo_validate(&buf->base, &vmw_sys_placement, false, true); + WARN_ON(ret != 0); + ttm_bo_unreserve(&buf->base); + + /* Create a guest-backed shader container backed by the dma buffer */ + ret = vmw_shader_alloc(man->dev_priv, buf, size, 0, shader_type, + tfile, &handle); + vmw_dmabuf_unreference(&buf); + if (unlikely(ret != 0)) + goto no_reserve; + /* + * Create a compat shader structure and stage it for insertion + * in the manager + */ + compat = kzalloc(sizeof(*compat), GFP_KERNEL); + if (compat == NULL) + goto no_compat; + + compat->hash.key = user_key | (shader_type << 24); + ret = drm_ht_insert_item(&man->shaders, &compat->hash); + if (unlikely(ret != 0)) + goto out_invalid_key; + + compat->state = VMW_COMPAT_ADD; + compat->handle = handle; + compat->tfile = tfile; + list_add_tail(&compat->head, list); + return 0; + +out_invalid_key: + kfree(compat); +no_compat: + ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE); +no_reserve: +out: return ret; +} + +/** + * vmw_compat_shader_man_create - Create a compat shader manager + * + * @dev_priv: Pointer to a device private structure. + * + * Typically done at file open time. If successful returns a pointer to a + * compat shader manager. Otherwise returns an error pointer. + */ +struct vmw_compat_shader_manager * +vmw_compat_shader_man_create(struct vmw_private *dev_priv) +{ + struct vmw_compat_shader_manager *man; + int ret; + + man = kzalloc(sizeof(*man), GFP_KERNEL); + if (man == NULL) + return ERR_PTR(-ENOMEM); + + man->dev_priv = dev_priv; + INIT_LIST_HEAD(&man->list); + ret = drm_ht_create(&man->shaders, VMW_COMPAT_SHADER_HT_ORDER); + if (ret == 0) + return man; + + kfree(man); + return ERR_PTR(ret); +} + +/** + * vmw_compat_shader_man_destroy - Destroy a compat shader manager + * + * @man: Pointer to the shader manager to destroy. + * + * Typically done at file close time. + */ +void vmw_compat_shader_man_destroy(struct vmw_compat_shader_manager *man) +{ + struct vmw_compat_shader *entry, *next; + + mutex_lock(&man->dev_priv->cmdbuf_mutex); + list_for_each_entry_safe(entry, next, &man->list, head) + vmw_compat_shader_free(man, entry); + mutex_unlock(&man->dev_priv->cmdbuf_mutex); + kfree(man); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 979da1c246a..82468d90291 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -908,8 +908,8 @@ int vmw_surface_reference_ioctl(struct drm_device *dev, void *data, rep->size_addr; if (user_sizes) - ret = copy_to_user(user_sizes, srf->sizes, - srf->num_sizes * sizeof(*srf->sizes)); + ret = copy_to_user(user_sizes, &srf->base_size, + sizeof(srf->base_size)); if (unlikely(ret != 0)) { DRM_ERROR("copy_to_user failed %p %u\n", user_sizes, srf->num_sizes); @@ -1111,7 +1111,7 @@ static int vmw_gb_surface_destroy(struct vmw_resource *res) return 0; mutex_lock(&dev_priv->binding_mutex); - vmw_context_binding_res_list_kill(&res->binding_head); + vmw_context_binding_res_list_scrub(&res->binding_head); cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd)); if (unlikely(cmd == NULL)) { |