From 7198e6b03155f6dadecadba004eb83b81a6ffe4c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 19 Jul 2013 12:59:32 -0400 Subject: drm/msm: add a3xx gpu support Add initial support for a3xx 3d core. So far, with hardware that I've seen to date, we can have: + zero, one, or two z180 2d cores + a3xx or a2xx 3d core, which share a common CP (the firmware for the CP seems to implement some different PM4 packet types but the basics of cmdstream submission are the same) Which means that the eventual complete "class" hierarchy, once support for all past and present hw is in place, becomes: + msm_gpu + adreno_gpu + a3xx_gpu + a2xx_gpu + z180_gpu This commit splits out the parts that will eventually be common between a2xx/a3xx into adreno_gpu, and the parts that are even common to z180 into msm_gpu. Note that there is no cmdstream validation required. All memory access from the GPU is via IOMMU/MMU. So as long as you don't map silly things to the GPU, there isn't much damage that the GPU can do. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu.c | 411 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 411 insertions(+) create mode 100644 drivers/gpu/drm/msm/msm_gpu.c (limited to 'drivers/gpu/drm/msm/msm_gpu.c') diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c new file mode 100644 index 00000000000..7c6541e4a7e --- /dev/null +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -0,0 +1,411 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "msm_gpu.h" +#include "msm_gem.h" + + +/* + * Power Management: + */ + +#ifdef CONFIG_MSM_BUS_SCALING +#include +#include +static void bs_init(struct msm_gpu *gpu, struct platform_device *pdev) +{ + struct drm_device *dev = gpu->dev; + struct kgsl_device_platform_data *pdata = pdev->dev.platform_data; + + if (!pdev) { + dev_err(dev->dev, "could not find dtv pdata\n"); + return; + } + + if (pdata->bus_scale_table) { + gpu->bsc = msm_bus_scale_register_client(pdata->bus_scale_table); + DBG("bus scale client: %08x", gpu->bsc); + } +} + +static void bs_fini(struct msm_gpu *gpu) +{ + if (gpu->bsc) { + msm_bus_scale_unregister_client(gpu->bsc); + gpu->bsc = 0; + } +} + +static void bs_set(struct msm_gpu *gpu, int idx) +{ + if (gpu->bsc) { + DBG("set bus scaling: %d", idx); + msm_bus_scale_client_update_request(gpu->bsc, idx); + } +} +#else +static void bs_init(struct msm_gpu *gpu, struct platform_device *pdev) {} +static void bs_fini(struct msm_gpu *gpu) {} +static void bs_set(struct msm_gpu *gpu, int idx) {} +#endif + +static int enable_pwrrail(struct msm_gpu *gpu) +{ + struct drm_device *dev = gpu->dev; + int ret = 0; + + if (gpu->gpu_reg) { + ret = regulator_enable(gpu->gpu_reg); + if (ret) { + dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); + return ret; + } + } + + if (gpu->gpu_cx) { + ret = regulator_enable(gpu->gpu_cx); + if (ret) { + dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); + return ret; + } + } + + return 0; +} + +static int disable_pwrrail(struct msm_gpu *gpu) +{ + if (gpu->gpu_cx) + regulator_disable(gpu->gpu_cx); + if (gpu->gpu_reg) + regulator_disable(gpu->gpu_reg); + return 0; +} + +static int enable_clk(struct msm_gpu *gpu) +{ + struct clk *rate_clk = NULL; + int i; + + /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */ + for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) { + if (gpu->grp_clks[i]) { + clk_prepare(gpu->grp_clks[i]); + rate_clk = gpu->grp_clks[i]; + } + } + + if (rate_clk && gpu->fast_rate) + clk_set_rate(rate_clk, gpu->fast_rate); + + for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) + if (gpu->grp_clks[i]) + clk_enable(gpu->grp_clks[i]); + + return 0; +} + +static int disable_clk(struct msm_gpu *gpu) +{ + struct clk *rate_clk = NULL; + int i; + + /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */ + for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) { + if (gpu->grp_clks[i]) { + clk_disable(gpu->grp_clks[i]); + rate_clk = gpu->grp_clks[i]; + } + } + + if (rate_clk && gpu->slow_rate) + clk_set_rate(rate_clk, gpu->slow_rate); + + for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) + if (gpu->grp_clks[i]) + clk_unprepare(gpu->grp_clks[i]); + + return 0; +} + +static int enable_axi(struct msm_gpu *gpu) +{ + if (gpu->ebi1_clk) + clk_prepare_enable(gpu->ebi1_clk); + if (gpu->bus_freq) + bs_set(gpu, gpu->bus_freq); + return 0; +} + +static int disable_axi(struct msm_gpu *gpu) +{ + if (gpu->ebi1_clk) + clk_disable_unprepare(gpu->ebi1_clk); + if (gpu->bus_freq) + bs_set(gpu, 0); + return 0; +} + +int msm_gpu_pm_resume(struct msm_gpu *gpu) +{ + int ret; + + DBG("%s", gpu->name); + + ret = enable_pwrrail(gpu); + if (ret) + return ret; + + ret = enable_clk(gpu); + if (ret) + return ret; + + ret = enable_axi(gpu); + if (ret) + return ret; + + return 0; +} + +int msm_gpu_pm_suspend(struct msm_gpu *gpu) +{ + int ret; + + DBG("%s", gpu->name); + + ret = disable_axi(gpu); + if (ret) + return ret; + + ret = disable_clk(gpu); + if (ret) + return ret; + + ret = disable_pwrrail(gpu); + if (ret) + return ret; + + return 0; +} + +/* + * Cmdstream submission/retirement: + */ + +static void retire_worker(struct work_struct *work) +{ + struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); + struct drm_device *dev = gpu->dev; + uint32_t fence = gpu->funcs->last_fence(gpu); + + mutex_lock(&dev->struct_mutex); + + while (!list_empty(&gpu->active_list)) { + struct msm_gem_object *obj; + + obj = list_first_entry(&gpu->active_list, + struct msm_gem_object, mm_list); + + if (obj->fence <= fence) { + /* move to inactive: */ + msm_gem_move_to_inactive(&obj->base); + msm_gem_put_iova(&obj->base, gpu->id); + drm_gem_object_unreference(&obj->base); + } else { + break; + } + } + + msm_update_fence(gpu->dev, fence); + + mutex_unlock(&dev->struct_mutex); +} + +/* call from irq handler to schedule work to retire bo's */ +void msm_gpu_retire(struct msm_gpu *gpu) +{ + struct msm_drm_private *priv = gpu->dev->dev_private; + queue_work(priv->wq, &gpu->retire_work); +} + +/* add bo's to gpu's ring, and kick gpu: */ +int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, + struct msm_file_private *ctx) +{ + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + int i, ret; + + mutex_lock(&dev->struct_mutex); + + submit->fence = ++priv->next_fence; + + ret = gpu->funcs->submit(gpu, submit, ctx); + priv->lastctx = ctx; + + for (i = 0; i < submit->nr_bos; i++) { + struct msm_gem_object *msm_obj = submit->bos[i].obj; + + /* can't happen yet.. but when we add 2d support we'll have + * to deal w/ cross-ring synchronization: + */ + WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu)); + + if (!is_active(msm_obj)) { + uint32_t iova; + + /* ring takes a reference to the bo and iova: */ + drm_gem_object_reference(&msm_obj->base); + msm_gem_get_iova_locked(&msm_obj->base, + submit->gpu->id, &iova); + } + + msm_gem_move_to_active(&msm_obj->base, gpu, submit->fence); + } + mutex_unlock(&dev->struct_mutex); + + return ret; +} + +/* + * Init/Cleanup: + */ + +static irqreturn_t irq_handler(int irq, void *data) +{ + struct msm_gpu *gpu = data; + return gpu->funcs->irq(gpu); +} + +static const char *clk_names[] = { + "src_clk", "core_clk", "iface_clk", "mem_clk", "mem_iface_clk", +}; + +int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, + struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, + const char *name, const char *ioname, const char *irqname, int ringsz) +{ + int i, ret; + + gpu->dev = drm; + gpu->funcs = funcs; + gpu->name = name; + + INIT_LIST_HEAD(&gpu->active_list); + INIT_WORK(&gpu->retire_work, retire_worker); + + BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks)); + + /* Map registers: */ + gpu->mmio = msm_ioremap(pdev, ioname, name); + if (IS_ERR(gpu->mmio)) { + ret = PTR_ERR(gpu->mmio); + goto fail; + } + + /* Get Interrupt: */ + gpu->irq = platform_get_irq_byname(pdev, irqname); + if (gpu->irq < 0) { + ret = gpu->irq; + dev_err(drm->dev, "failed to get irq: %d\n", ret); + goto fail; + } + + ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, + IRQF_TRIGGER_HIGH, gpu->name, gpu); + if (ret) { + dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); + goto fail; + } + + /* Acquire clocks: */ + for (i = 0; i < ARRAY_SIZE(clk_names); i++) { + gpu->grp_clks[i] = devm_clk_get(&pdev->dev, clk_names[i]); + DBG("grp_clks[%s]: %p", clk_names[i], gpu->grp_clks[i]); + if (IS_ERR(gpu->grp_clks[i])) + gpu->grp_clks[i] = NULL; + } + + gpu->ebi1_clk = devm_clk_get(&pdev->dev, "bus_clk"); + DBG("ebi1_clk: %p", gpu->ebi1_clk); + if (IS_ERR(gpu->ebi1_clk)) + gpu->ebi1_clk = NULL; + + /* Acquire regulators: */ + gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd"); + DBG("gpu_reg: %p", gpu->gpu_reg); + if (IS_ERR(gpu->gpu_reg)) + gpu->gpu_reg = NULL; + + gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx"); + DBG("gpu_cx: %p", gpu->gpu_cx); + if (IS_ERR(gpu->gpu_cx)) + gpu->gpu_cx = NULL; + + /* Setup IOMMU.. eventually we will (I think) do this once per context + * and have separate page tables per context. For now, to keep things + * simple and to get something working, just use a single address space: + */ + gpu->iommu = iommu_domain_alloc(&platform_bus_type); + if (!gpu->iommu) { + dev_err(drm->dev, "failed to allocate IOMMU\n"); + ret = -ENOMEM; + goto fail; + } + gpu->id = msm_register_iommu(drm, gpu->iommu); + + /* Create ringbuffer: */ + gpu->rb = msm_ringbuffer_new(gpu, ringsz); + if (IS_ERR(gpu->rb)) { + ret = PTR_ERR(gpu->rb); + gpu->rb = NULL; + dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); + goto fail; + } + + ret = msm_gem_get_iova_locked(gpu->rb->bo, gpu->id, &gpu->rb_iova); + if (ret) { + gpu->rb_iova = 0; + dev_err(drm->dev, "could not map ringbuffer: %d\n", ret); + goto fail; + } + + bs_init(gpu, pdev); + + return 0; + +fail: + return ret; +} + +void msm_gpu_cleanup(struct msm_gpu *gpu) +{ + DBG("%s", gpu->name); + + WARN_ON(!list_empty(&gpu->active_list)); + + bs_fini(gpu); + + if (gpu->rb) { + if (gpu->rb_iova) + msm_gem_put_iova(gpu->rb->bo, gpu->id); + msm_ringbuffer_destroy(gpu->rb); + } + + if (gpu->iommu) + iommu_domain_free(gpu->iommu); +} -- cgit v1.2.3-70-g09d2 From bd6f82d8289422f618b98451a43887f452b3423e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 24 Aug 2013 14:20:38 -0400 Subject: drm/msm: add basic hangcheck/recovery mechanism A basic, no-frills recovery mechanism in case the gpu gets wedged. We could try to be a bit more fancy and restart the next submit after the one that got wedged, but for now keep it simple. This is enough to recover things if, for example, the gpu hangs mid way through a piglit run. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/adreno_gpu.c | 26 +++++++++++++++-- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 3 +- drivers/gpu/drm/msm/msm_gpu.c | 52 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/msm/msm_gpu.h | 10 +++++++ 5 files changed, 87 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/msm/msm_gpu.c') diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 13d61bbed30..035bd13dc8b 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -371,6 +371,7 @@ static const struct adreno_gpu_funcs funcs = { .hw_init = a3xx_hw_init, .pm_suspend = msm_gpu_pm_suspend, .pm_resume = msm_gpu_pm_resume, + .recover = adreno_recover, .last_fence = adreno_last_fence, .submit = adreno_submit, .flush = adreno_flush, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 282163ee3fa..a60584763b6 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -111,6 +111,28 @@ uint32_t adreno_last_fence(struct msm_gpu *gpu) return adreno_gpu->memptrs->fence; } +void adreno_recover(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct drm_device *dev = gpu->dev; + int ret; + + gpu->funcs->pm_suspend(gpu); + + /* reset ringbuffer: */ + gpu->rb->cur = gpu->rb->start; + + /* reset completed fence seqno, just discard anything pending: */ + adreno_gpu->memptrs->fence = gpu->submitted_fence; + + gpu->funcs->pm_resume(gpu); + ret = gpu->funcs->hw_init(gpu); + if (ret) { + dev_err(dev->dev, "gpu hw init failed: %d\n", ret); + /* hmm, oh well? */ + } +} + int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { @@ -119,8 +141,6 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_ringbuffer *ring = gpu->rb; unsigned i, ibs = 0; - adreno_gpu->last_fence = submit->fence; - for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: @@ -225,7 +245,7 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) adreno_gpu->rev.patchid); seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, - adreno_gpu->last_fence); + gpu->submitted_fence); seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr); seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr); seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 6b49c4f27fe..f73abfba7c2 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -54,8 +54,6 @@ struct adreno_gpu { uint32_t revn; /* numeric revision name */ const struct adreno_gpu_funcs *funcs; - uint32_t last_fence; - /* firmware: */ const struct firmware *pm4, *pfp; @@ -99,6 +97,7 @@ static inline bool adreno_is_a330(struct adreno_gpu *gpu) int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); int adreno_hw_init(struct msm_gpu *gpu); uint32_t adreno_last_fence(struct msm_gpu *gpu); +void adreno_recover(struct msm_gpu *gpu); int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); void adreno_flush(struct msm_gpu *gpu); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 7c6541e4a7e..e1e1ec9321f 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -202,6 +202,51 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu) return 0; } +/* + * Hangcheck detection for locked gpu: + */ + +static void recover_worker(struct work_struct *work) +{ + struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); + struct drm_device *dev = gpu->dev; + + dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); + + mutex_lock(&dev->struct_mutex); + gpu->funcs->recover(gpu); + mutex_unlock(&dev->struct_mutex); + + msm_gpu_retire(gpu); +} + +static void hangcheck_timer_reset(struct msm_gpu *gpu) +{ + DBG("%s", gpu->name); + mod_timer(&gpu->hangcheck_timer, + round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES)); +} + +static void hangcheck_handler(unsigned long data) +{ + struct msm_gpu *gpu = (struct msm_gpu *)data; + uint32_t fence = gpu->funcs->last_fence(gpu); + + if (fence != gpu->hangcheck_fence) { + /* some progress has been made.. ya! */ + gpu->hangcheck_fence = fence; + } else if (fence < gpu->submitted_fence) { + /* no progress and not done.. hung! */ + struct msm_drm_private *priv = gpu->dev->dev_private; + gpu->hangcheck_fence = fence; + queue_work(priv->wq, &gpu->recover_work); + } + + /* if still more pending work, reset the hangcheck timer: */ + if (gpu->submitted_fence > gpu->hangcheck_fence) + hangcheck_timer_reset(gpu); +} + /* * Cmdstream submission/retirement: */ @@ -254,6 +299,8 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, submit->fence = ++priv->next_fence; + gpu->submitted_fence = submit->fence; + ret = gpu->funcs->submit(gpu, submit, ctx); priv->lastctx = ctx; @@ -276,6 +323,7 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_gem_move_to_active(&msm_obj->base, gpu, submit->fence); } + hangcheck_timer_reset(gpu); mutex_unlock(&dev->struct_mutex); return ret; @@ -307,6 +355,10 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, INIT_LIST_HEAD(&gpu->active_list); INIT_WORK(&gpu->retire_work, retire_worker); + INIT_WORK(&gpu->recover_work, recover_worker); + + setup_timer(&gpu->hangcheck_timer, hangcheck_handler, + (unsigned long)gpu); BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks)); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 8d2cd6c2226..8cd829e520b 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -51,6 +51,7 @@ struct msm_gpu_funcs { void (*idle)(struct msm_gpu *gpu); irqreturn_t (*irq)(struct msm_gpu *irq); uint32_t (*last_fence)(struct msm_gpu *gpu); + void (*recover)(struct msm_gpu *gpu); void (*destroy)(struct msm_gpu *gpu); #ifdef CONFIG_DEBUG_FS /* show GPU status in debugfs: */ @@ -69,6 +70,8 @@ struct msm_gpu { /* list of GEM active objects: */ struct list_head active_list; + uint32_t submitted_fence; + /* worker for handling active-list retiring: */ struct work_struct retire_work; @@ -83,6 +86,13 @@ struct msm_gpu { struct clk *ebi1_clk, *grp_clks[5]; uint32_t fast_rate, slow_rate, bus_freq; uint32_t bsc; + + /* Hang Detction: */ +#define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ +#define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) + struct timer_list hangcheck_timer; + uint32_t hangcheck_fence; + struct work_struct recover_work; }; static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) -- cgit v1.2.3-70-g09d2