diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nv40_graph.c')
-rw-r--r-- | drivers/gpu/drm/nouveau/nv40_graph.c | 306 |
1 files changed, 76 insertions, 230 deletions
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c index d3e0a2a6acf..53e8afe1dcd 100644 --- a/drivers/gpu/drm/nouveau/nv40_graph.c +++ b/drivers/gpu/drm/nouveau/nv40_graph.c @@ -24,36 +24,10 @@ * */ -#include <linux/firmware.h> - #include "drmP.h" #include "drm.h" #include "nouveau_drv.h" - -MODULE_FIRMWARE("nouveau/nv40.ctxprog"); -MODULE_FIRMWARE("nouveau/nv40.ctxvals"); -MODULE_FIRMWARE("nouveau/nv41.ctxprog"); -MODULE_FIRMWARE("nouveau/nv41.ctxvals"); -MODULE_FIRMWARE("nouveau/nv42.ctxprog"); -MODULE_FIRMWARE("nouveau/nv42.ctxvals"); -MODULE_FIRMWARE("nouveau/nv43.ctxprog"); -MODULE_FIRMWARE("nouveau/nv43.ctxvals"); -MODULE_FIRMWARE("nouveau/nv44.ctxprog"); -MODULE_FIRMWARE("nouveau/nv44.ctxvals"); -MODULE_FIRMWARE("nouveau/nv46.ctxprog"); -MODULE_FIRMWARE("nouveau/nv46.ctxvals"); -MODULE_FIRMWARE("nouveau/nv47.ctxprog"); -MODULE_FIRMWARE("nouveau/nv47.ctxvals"); -MODULE_FIRMWARE("nouveau/nv49.ctxprog"); -MODULE_FIRMWARE("nouveau/nv49.ctxvals"); -MODULE_FIRMWARE("nouveau/nv4a.ctxprog"); -MODULE_FIRMWARE("nouveau/nv4a.ctxvals"); -MODULE_FIRMWARE("nouveau/nv4b.ctxprog"); -MODULE_FIRMWARE("nouveau/nv4b.ctxvals"); -MODULE_FIRMWARE("nouveau/nv4c.ctxprog"); -MODULE_FIRMWARE("nouveau/nv4c.ctxvals"); -MODULE_FIRMWARE("nouveau/nv4e.ctxprog"); -MODULE_FIRMWARE("nouveau/nv4e.ctxvals"); +#include "nouveau_grctx.h" struct nouveau_channel * nv40_graph_channel(struct drm_device *dev) @@ -83,27 +57,30 @@ nv40_graph_create_context(struct nouveau_channel *chan) { struct drm_device *dev = chan->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; - struct nouveau_gpuobj *ctx; + struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph; int ret; - /* Allocate a 175KiB block of PRAMIN to store the context. This - * is massive overkill for a lot of chipsets, but it should be safe - * until we're able to implement this properly (will happen at more - * or less the same time we're able to write our own context programs. - */ - ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, 175*1024, 16, - NVOBJ_FLAG_ZERO_ALLOC, - &chan->ramin_grctx); + ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size, + 16, NVOBJ_FLAG_ZERO_ALLOC, + &chan->ramin_grctx); if (ret) return ret; - ctx = chan->ramin_grctx->gpuobj; /* Initialise default context values */ dev_priv->engine.instmem.prepare_access(dev, true); - nv40_grctx_vals_load(dev, ctx); - nv_wo32(dev, ctx, 0, ctx->im_pramin->start); - dev_priv->engine.instmem.finish_access(dev); + if (!pgraph->ctxprog) { + struct nouveau_grctx ctx = {}; + ctx.dev = chan->dev; + ctx.mode = NOUVEAU_GRCTX_VALS; + ctx.data = chan->ramin_grctx->gpuobj; + nv40_grctx_init(&ctx); + } else { + nouveau_grctx_vals_load(dev, chan->ramin_grctx->gpuobj); + } + nv_wo32(dev, chan->ramin_grctx->gpuobj, 0, + chan->ramin_grctx->gpuobj->im_pramin->start); + dev_priv->engine.instmem.finish_access(dev); return 0; } @@ -204,133 +181,46 @@ nv40_graph_unload_context(struct drm_device *dev) return ret; } -struct nouveau_ctxprog { - uint32_t signature; - uint8_t version; - uint16_t length; - uint32_t data[]; -} __attribute__ ((packed)); - -struct nouveau_ctxvals { - uint32_t signature; - uint8_t version; - uint32_t length; - struct { - uint32_t offset; - uint32_t value; - } data[]; -} __attribute__ ((packed)); - -int -nv40_grctx_init(struct drm_device *dev) -{ - struct drm_nouveau_private *dev_priv = dev->dev_private; - struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph; - const int chipset = dev_priv->chipset; - const struct firmware *fw; - const struct nouveau_ctxprog *cp; - const struct nouveau_ctxvals *cv; - char name[32]; - int ret, i; - - pgraph->accel_blocked = true; - - if (!pgraph->ctxprog) { - sprintf(name, "nouveau/nv%02x.ctxprog", chipset); - ret = request_firmware(&fw, name, &dev->pdev->dev); - if (ret) { - NV_ERROR(dev, "No ctxprog for NV%02x\n", chipset); - return ret; - } - - pgraph->ctxprog = kmalloc(fw->size, GFP_KERNEL); - if (!pgraph->ctxprog) { - NV_ERROR(dev, "OOM copying ctxprog\n"); - release_firmware(fw); - return -ENOMEM; - } - memcpy(pgraph->ctxprog, fw->data, fw->size); - - cp = pgraph->ctxprog; - if (cp->signature != 0x5043564e || cp->version != 0 || - cp->length != ((fw->size - 7) / 4)) { - NV_ERROR(dev, "ctxprog invalid\n"); - release_firmware(fw); - nv40_grctx_fini(dev); - return -EINVAL; - } - release_firmware(fw); - } - - if (!pgraph->ctxvals) { - sprintf(name, "nouveau/nv%02x.ctxvals", chipset); - ret = request_firmware(&fw, name, &dev->pdev->dev); - if (ret) { - NV_ERROR(dev, "No ctxvals for NV%02x\n", chipset); - nv40_grctx_fini(dev); - return ret; - } - - pgraph->ctxvals = kmalloc(fw->size, GFP_KERNEL); - if (!pgraph->ctxprog) { - NV_ERROR(dev, "OOM copying ctxprog\n"); - release_firmware(fw); - nv40_grctx_fini(dev); - return -ENOMEM; - } - memcpy(pgraph->ctxvals, fw->data, fw->size); - - cv = (void *)pgraph->ctxvals; - if (cv->signature != 0x5643564e || cv->version != 0 || - cv->length != ((fw->size - 9) / 8)) { - NV_ERROR(dev, "ctxvals invalid\n"); - release_firmware(fw); - nv40_grctx_fini(dev); - return -EINVAL; - } - release_firmware(fw); - } - - cp = pgraph->ctxprog; - - nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0); - for (i = 0; i < cp->length; i++) - nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, cp->data[i]); - - pgraph->accel_blocked = false; - return 0; -} - void -nv40_grctx_fini(struct drm_device *dev) +nv40_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) { struct drm_nouveau_private *dev_priv = dev->dev_private; - struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph; - - if (pgraph->ctxprog) { - kfree(pgraph->ctxprog); - pgraph->ctxprog = NULL; - } + uint32_t limit = max(1u, addr + size) - 1; - if (pgraph->ctxvals) { - kfree(pgraph->ctxprog); - pgraph->ctxvals = NULL; - } -} + if (pitch) + addr |= 1; -void -nv40_grctx_vals_load(struct drm_device *dev, struct nouveau_gpuobj *ctx) -{ - struct drm_nouveau_private *dev_priv = dev->dev_private; - struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph; - struct nouveau_ctxvals *cv = pgraph->ctxvals; - int i; + switch (dev_priv->chipset) { + case 0x44: + case 0x4a: + case 0x4e: + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); + break; - if (!cv) - return; + case 0x46: + case 0x47: + case 0x49: + case 0x4b: + nv_wr32(dev, NV47_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV47_PGRAPH_TILE(i), addr); + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch); + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit); + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr); + break; - for (i = 0; i < cv->length; i++) - nv_wo32(dev, ctx, cv->data[i].offset, cv->data[i].value); + default: + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch); + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit); + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr); + break; + } } /* @@ -347,7 +237,8 @@ nv40_graph_init(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = (struct drm_nouveau_private *)dev->dev_private; - uint32_t vramsz, tmp; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; + uint32_t vramsz; int i, j; nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) & @@ -355,7 +246,26 @@ nv40_graph_init(struct drm_device *dev) nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) | NV_PMC_ENABLE_PGRAPH); - nv40_grctx_init(dev); + if (nouveau_ctxfw) { + nouveau_grctx_prog_load(dev); + dev_priv->engine.graph.grctx_size = 175 * 1024; + } + + if (!dev_priv->engine.graph.ctxprog) { + struct nouveau_grctx ctx = {}; + uint32_t cp[256]; + + ctx.dev = dev; + ctx.mode = NOUVEAU_GRCTX_PROG; + ctx.data = cp; + ctx.ctxprog_max = 256; + nv40_grctx_init(&ctx); + dev_priv->engine.graph.grctx_size = ctx.ctxvals_pos * 4; + + nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0); + for (i = 0; i < ctx.ctxprog_len; i++) + nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, cp[i]); + } /* No context present currently */ nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0x00000000); @@ -425,74 +335,9 @@ nv40_graph_init(struct drm_device *dev) nv_wr32(dev, 0x400b38, 0x2ffff800); nv_wr32(dev, 0x400b3c, 0x00006000); - /* copy tile info from PFB */ - switch (dev_priv->chipset) { - case 0x40: /* vanilla NV40 */ - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { - tmp = nv_rd32(dev, NV10_PFB_TILE(i)); - nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp); - tmp = nv_rd32(dev, NV10_PFB_TLIMIT(i)); - nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp); - tmp = nv_rd32(dev, NV10_PFB_TSIZE(i)); - nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp); - tmp = nv_rd32(dev, NV10_PFB_TSTATUS(i)); - nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp); - } - break; - case 0x44: - case 0x4a: - case 0x4e: /* NV44-based cores don't have 0x406900? */ - for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) { - tmp = nv_rd32(dev, NV40_PFB_TILE(i)); - nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i)); - nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSIZE(i)); - nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i)); - nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp); - } - break; - case 0x46: - case 0x47: - case 0x49: - case 0x4b: /* G7X-based cores */ - for (i = 0; i < NV40_PFB_TILE__SIZE_1; i++) { - tmp = nv_rd32(dev, NV40_PFB_TILE(i)); - nv_wr32(dev, NV47_PGRAPH_TILE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i)); - nv_wr32(dev, NV47_PGRAPH_TLIMIT0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSIZE(i)); - nv_wr32(dev, NV47_PGRAPH_TSIZE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i)); - nv_wr32(dev, NV47_PGRAPH_TSTATUS0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp); - } - break; - default: /* everything else */ - for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) { - tmp = nv_rd32(dev, NV40_PFB_TILE(i)); - nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i)); - nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSIZE(i)); - nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i)); - nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp); - } - break; - } + /* Turn all the tiling regions off. */ + for (i = 0; i < pfb->num_tiles; i++) + nv40_graph_set_region_tiling(dev, i, 0, 0, 0); /* begin RAM config */ vramsz = drm_get_resource_len(dev, 0) - 1; @@ -535,6 +380,7 @@ nv40_graph_init(struct drm_device *dev) void nv40_graph_takedown(struct drm_device *dev) { + nouveau_grctx_fini(dev); } struct nouveau_pgraph_object_class nv40_graph_grclass[] = { |