diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-18 21:13:20 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-18 21:13:20 +0200 |
commit | f6dc8ccaab6d8f63cbae1e6c73fe972b26f5376c (patch) | |
tree | c5643fcdc884a8d0bfc3f1bc28039cab7394e5bc /drivers/gpu/drm/radeon | |
parent | 323ec001c6bb98eeabb5abbdbb8c8055d9496554 (diff) | |
parent | 5b664cb235e97afbf34db9c4d77f08ebd725335e (diff) |
Merge branch 'linus' into core/generic-dma-coherent
Conflicts:
kernel/Makefile
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r-- | drivers/gpu/drm/radeon/Makefile | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/r300_cmdbuf.c | 1071 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/r300_reg.h | 1772 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_cp.c | 1773 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_drv.c | 126 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_drv.h | 1406 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_ioc32.c | 424 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_irq.c | 320 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_mem.c | 302 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_microcode.h | 1844 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_state.c | 3203 |
11 files changed, 12251 insertions, 0 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile new file mode 100644 index 00000000000..feb521ebc39 --- /dev/null +++ b/drivers/gpu/drm/radeon/Makefile @@ -0,0 +1,10 @@ +# +# Makefile for the drm device driver. This driver provides support for the +# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. + +ccflags-y := -Iinclude/drm +radeon-y := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o radeon_irq.o r300_cmdbuf.o + +radeon-$(CONFIG_COMPAT) += radeon_ioc32.o + +obj-$(CONFIG_DRM_RADEON)+= radeon.o diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c new file mode 100644 index 00000000000..702df45320f --- /dev/null +++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c @@ -0,0 +1,1071 @@ +/* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*- + * + * Copyright (C) The Weather Channel, Inc. 2002. + * Copyright (C) 2004 Nicolai Haehnle. + * All Rights Reserved. + * + * The Weather Channel (TM) funded Tungsten Graphics to develop the + * initial release of the Radeon 8500 driver under the XFree86 license. + * This notice must be preserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Nicolai Haehnle <prefect_@gmx.net> + */ + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" +#include "r300_reg.h" + +#define R300_SIMULTANEOUS_CLIPRECTS 4 + +/* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects + */ +static const int r300_cliprect_cntl[4] = { + 0xAAAA, + 0xEEEE, + 0xFEFE, + 0xFFFE +}; + +/** + * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command + * buffer, starting with index n. + */ +static int r300_emit_cliprects(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf, int n) +{ + struct drm_clip_rect box; + int nr; + int i; + RING_LOCALS; + + nr = cmdbuf->nbox - n; + if (nr > R300_SIMULTANEOUS_CLIPRECTS) + nr = R300_SIMULTANEOUS_CLIPRECTS; + + DRM_DEBUG("%i cliprects\n", nr); + + if (nr) { + BEGIN_RING(6 + nr * 2); + OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1)); + + for (i = 0; i < nr; ++i) { + if (DRM_COPY_FROM_USER_UNCHECKED + (&box, &cmdbuf->boxes[n + i], sizeof(box))) { + DRM_ERROR("copy cliprect faulted\n"); + return -EFAULT; + } + + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) { + box.x1 = (box.x1) & + R300_CLIPRECT_MASK; + box.y1 = (box.y1) & + R300_CLIPRECT_MASK; + box.x2 = (box.x2) & + R300_CLIPRECT_MASK; + box.y2 = (box.y2) & + R300_CLIPRECT_MASK; + } else { + box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) & + R300_CLIPRECT_MASK; + box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) & + R300_CLIPRECT_MASK; + box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) & + R300_CLIPRECT_MASK; + box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) & + R300_CLIPRECT_MASK; + + } + OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) | + (box.y1 << R300_CLIPRECT_Y_SHIFT)); + OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) | + (box.y2 << R300_CLIPRECT_Y_SHIFT)); + + } + + OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]); + + /* TODO/SECURITY: Force scissors to a safe value, otherwise the + * client might be able to trample over memory. + * The impact should be very limited, but I'd rather be safe than + * sorry. + */ + OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1)); + OUT_RING(0); + OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK); + ADVANCE_RING(); + } else { + /* Why we allow zero cliprect rendering: + * There are some commands in a command buffer that must be submitted + * even when there are no cliprects, e.g. DMA buffer discard + * or state setting (though state setting could be avoided by + * simulating a loss of context). + * + * Now since the cmdbuf interface is so chaotic right now (and is + * bound to remain that way for a bit until things settle down), + * it is basically impossible to filter out the commands that are + * necessary and those that aren't. + * + * So I choose the safe way and don't do any filtering at all; + * instead, I simply set up the engine so that all rendering + * can't produce any fragments. + */ + BEGIN_RING(2); + OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0); + ADVANCE_RING(); + } + + return 0; +} + +static u8 r300_reg_flags[0x10000 >> 2]; + +void r300_init_reg_flags(struct drm_device *dev) +{ + int i; + drm_radeon_private_t *dev_priv = dev->dev_private; + + memset(r300_reg_flags, 0, 0x10000 >> 2); +#define ADD_RANGE_MARK(reg, count,mark) \ + for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\ + r300_reg_flags[i]|=(mark); + +#define MARK_SAFE 1 +#define MARK_CHECK_OFFSET 2 + +#define ADD_RANGE(reg, count) ADD_RANGE_MARK(reg, count, MARK_SAFE) + + /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */ + ADD_RANGE(R300_SE_VPORT_XSCALE, 6); + ADD_RANGE(R300_VAP_CNTL, 1); + ADD_RANGE(R300_SE_VTE_CNTL, 2); + ADD_RANGE(0x2134, 2); + ADD_RANGE(R300_VAP_CNTL_STATUS, 1); + ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2); + ADD_RANGE(0x21DC, 1); + ADD_RANGE(R300_VAP_UNKNOWN_221C, 1); + ADD_RANGE(R300_VAP_CLIP_X_0, 4); + ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1); + ADD_RANGE(R300_VAP_UNKNOWN_2288, 1); + ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2); + ADD_RANGE(R300_VAP_PVS_CNTL_1, 3); + ADD_RANGE(R300_GB_ENABLE, 1); + ADD_RANGE(R300_GB_MSPOS0, 5); + ADD_RANGE(R300_TX_CNTL, 1); + ADD_RANGE(R300_TX_ENABLE, 1); + ADD_RANGE(0x4200, 4); + ADD_RANGE(0x4214, 1); + ADD_RANGE(R300_RE_POINTSIZE, 1); + ADD_RANGE(0x4230, 3); + ADD_RANGE(R300_RE_LINE_CNT, 1); + ADD_RANGE(R300_RE_UNK4238, 1); + ADD_RANGE(0x4260, 3); + ADD_RANGE(R300_RE_SHADE, 4); + ADD_RANGE(R300_RE_POLYGON_MODE, 5); + ADD_RANGE(R300_RE_ZBIAS_CNTL, 1); + ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4); + ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1); + ADD_RANGE(R300_RE_CULL_CNTL, 1); + ADD_RANGE(0x42C0, 2); + ADD_RANGE(R300_RS_CNTL_0, 2); + + ADD_RANGE(R300_SC_HYPERZ, 2); + ADD_RANGE(0x43E8, 1); + + ADD_RANGE(0x46A4, 5); + + ADD_RANGE(R300_RE_FOG_STATE, 1); + ADD_RANGE(R300_FOG_COLOR_R, 3); + ADD_RANGE(R300_PP_ALPHA_TEST, 2); + ADD_RANGE(0x4BD8, 1); + ADD_RANGE(R300_PFS_PARAM_0_X, 64); + ADD_RANGE(0x4E00, 1); + ADD_RANGE(R300_RB3D_CBLEND, 2); + ADD_RANGE(R300_RB3D_COLORMASK, 1); + ADD_RANGE(R300_RB3D_BLEND_COLOR, 3); + ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET); /* check offset */ + ADD_RANGE(R300_RB3D_COLORPITCH0, 1); + ADD_RANGE(0x4E50, 9); + ADD_RANGE(0x4E88, 1); + ADD_RANGE(0x4EA0, 2); + ADD_RANGE(R300_ZB_CNTL, 3); + ADD_RANGE(R300_ZB_FORMAT, 4); + ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET); /* check offset */ + ADD_RANGE(R300_ZB_DEPTHPITCH, 1); + ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1); + ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13); + + ADD_RANGE(R300_TX_FILTER_0, 16); + ADD_RANGE(R300_TX_FILTER1_0, 16); + ADD_RANGE(R300_TX_SIZE_0, 16); + ADD_RANGE(R300_TX_FORMAT_0, 16); + ADD_RANGE(R300_TX_PITCH_0, 16); + /* Texture offset is dangerous and needs more checking */ + ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET); + ADD_RANGE(R300_TX_CHROMA_KEY_0, 16); + ADD_RANGE(R300_TX_BORDER_COLOR_0, 16); + + /* Sporadic registers used as primitives are emitted */ + ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1); + ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1); + ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8); + ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8); + + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) { + ADD_RANGE(R500_VAP_INDEX_OFFSET, 1); + ADD_RANGE(R500_US_CONFIG, 2); + ADD_RANGE(R500_US_CODE_ADDR, 3); + ADD_RANGE(R500_US_FC_CTRL, 1); + ADD_RANGE(R500_RS_IP_0, 16); + ADD_RANGE(R500_RS_INST_0, 16); + ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2); + ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2); + ADD_RANGE(R500_ZB_FIFO_SIZE, 2); + } else { + ADD_RANGE(R300_PFS_CNTL_0, 3); + ADD_RANGE(R300_PFS_NODE_0, 4); + ADD_RANGE(R300_PFS_TEXI_0, 64); + ADD_RANGE(R300_PFS_INSTR0_0, 64); + ADD_RANGE(R300_PFS_INSTR1_0, 64); + ADD_RANGE(R300_PFS_INSTR2_0, 64); + ADD_RANGE(R300_PFS_INSTR3_0, 64); + ADD_RANGE(R300_RS_INTERP_0, 8); + ADD_RANGE(R300_RS_ROUTE_0, 8); + + } +} + +static __inline__ int r300_check_range(unsigned reg, int count) +{ + int i; + if (reg & ~0xffff) + return -1; + for (i = (reg >> 2); i < (reg >> 2) + count; i++) + if (r300_reg_flags[i] != MARK_SAFE) + return 1; + return 0; +} + +static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t * + dev_priv, + drm_radeon_kcmd_buffer_t + * cmdbuf, + drm_r300_cmd_header_t + header) +{ + int reg; + int sz; + int i; + int values[64]; + RING_LOCALS; + + sz = header.packet0.count; + reg = (header.packet0.reghi << 8) | header.packet0.reglo; + + if ((sz > 64) || (sz < 0)) { + DRM_ERROR + ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n", + reg, sz); + return -EINVAL; + } + for (i = 0; i < sz; i++) { + values[i] = ((int *)cmdbuf->buf)[i]; + switch (r300_reg_flags[(reg >> 2) + i]) { + case MARK_SAFE: + break; + case MARK_CHECK_OFFSET: + if (!radeon_check_offset(dev_priv, (u32) values[i])) { + DRM_ERROR + ("Offset failed range check (reg=%04x sz=%d)\n", + reg, sz); + return -EINVAL; + } + break; + default: + DRM_ERROR("Register %04x failed check as flag=%02x\n", + reg + i * 4, r300_reg_flags[(reg >> 2) + i]); + return -EINVAL; + } + } + + BEGIN_RING(1 + sz); + OUT_RING(CP_PACKET0(reg, sz - 1)); + OUT_RING_TABLE(values, sz); + ADVANCE_RING(); + + cmdbuf->buf += sz * 4; + cmdbuf->bufsz -= sz * 4; + + return 0; +} + +/** + * Emits a packet0 setting arbitrary registers. + * Called by r300_do_cp_cmdbuf. + * + * Note that checks are performed on contents and addresses of the registers + */ +static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf, + drm_r300_cmd_header_t header) +{ + int reg; + int sz; + RING_LOCALS; + + sz = header.packet0.count; + reg = (header.packet0.reghi << 8) | header.packet0.reglo; + + if (!sz) + return 0; + + if (sz * 4 > cmdbuf->bufsz) + return -EINVAL; + + if (reg + sz * 4 >= 0x10000) { + DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg, + sz); + return -EINVAL; + } + + if (r300_check_range(reg, sz)) { + /* go and check everything */ + return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf, + header); + } + /* the rest of the data is safe to emit, whatever the values the user passed */ + + BEGIN_RING(1 + sz); + OUT_RING(CP_PACKET0(reg, sz - 1)); + OUT_RING_TABLE((int *)cmdbuf->buf, sz); + ADVANCE_RING(); + + cmdbuf->buf += sz * 4; + cmdbuf->bufsz -= sz * 4; + + return 0; +} + +/** + * Uploads user-supplied vertex program instructions or parameters onto + * the graphics card. + * Called by r300_do_cp_cmdbuf. + */ +static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf, + drm_r300_cmd_header_t header) +{ + int sz; + int addr; + RING_LOCALS; + + sz = header.vpu.count; + addr = (header.vpu.adrhi << 8) | header.vpu.adrlo; + + if (!sz) + return 0; + if (sz * 16 > cmdbuf->bufsz) + return -EINVAL; + + BEGIN_RING(5 + sz * 4); + /* Wait for VAP to come to senses.. */ + /* there is no need to emit it multiple times, (only once before VAP is programmed, + but this optimization is for later */ + OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0); + OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr); + OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1)); + OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4); + + ADVANCE_RING(); + + cmdbuf->buf += sz * 16; + cmdbuf->bufsz -= sz * 16; + + return 0; +} + +/** + * Emit a clear packet from userspace. + * Called by r300_emit_packet3. + */ +static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf) +{ + RING_LOCALS; + + if (8 * 4 > cmdbuf->bufsz) + return -EINVAL; + + BEGIN_RING(10); + OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); + OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | + (1 << R300_PRIM_NUM_VERTICES_SHIFT)); + OUT_RING_TABLE((int *)cmdbuf->buf, 8); + ADVANCE_RING(); + + cmdbuf->buf += 8 * 4; + cmdbuf->bufsz -= 8 * 4; + + return 0; +} + +static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf, + u32 header) +{ + int count, i, k; +#define MAX_ARRAY_PACKET 64 + u32 payload[MAX_ARRAY_PACKET]; + u32 narrays; + RING_LOCALS; + + count = (header >> 16) & 0x3fff; + + if ((count + 1) > MAX_ARRAY_PACKET) { + DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n", + count); + return -EINVAL; + } + memset(payload, 0, MAX_ARRAY_PACKET * 4); + memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4); + + /* carefully check packet contents */ + + narrays = payload[0]; + k = 0; + i = 1; + while ((k < narrays) && (i < (count + 1))) { + i++; /* skip attribute field */ + if (!radeon_check_offset(dev_priv, payload[i])) { + DRM_ERROR + ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", + k, i); + return -EINVAL; + } + k++; + i++; + if (k == narrays) + break; + /* have one more to process, they come in pairs */ + if (!radeon_check_offset(dev_priv, payload[i])) { + DRM_ERROR + ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", + k, i); + return -EINVAL; + } + k++; + i++; + } + /* do the counts match what we expect ? */ + if ((k != narrays) || (i != (count + 1))) { + DRM_ERROR + ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n", + k, i, narrays, count + 1); + return -EINVAL; + } + + /* all clear, output packet */ + + BEGIN_RING(count + 2); + OUT_RING(header); + OUT_RING_TABLE(payload, count + 1); + ADVANCE_RING(); + + cmdbuf->buf += (count + 2) * 4; + cmdbuf->bufsz -= (count + 2) * 4; + + return 0; +} + +static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf) +{ + u32 *cmd = (u32 *) cmdbuf->buf; + int count, ret; + RING_LOCALS; + + count=(cmd[0]>>16) & 0x3fff; + + if (cmd[0] & 0x8000) { + u32 offset; + + if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL + | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) { + offset = cmd[2] << 10; + ret = !radeon_check_offset(dev_priv, offset); + if (ret) { + DRM_ERROR("Invalid bitblt first offset is %08X\n", offset); + return -EINVAL; + } + } + + if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) && + (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) { + offset = cmd[3] << 10; + ret = !radeon_check_offset(dev_priv, offset); + if (ret) { + DRM_ERROR("Invalid bitblt second offset is %08X\n", offset); + return -EINVAL; + } + + } + } + + BEGIN_RING(count+2); + OUT_RING(cmd[0]); + OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1); + ADVANCE_RING(); + + cmdbuf->buf += (count+2)*4; + cmdbuf->bufsz -= (count+2)*4; + + return 0; +} + +static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf) +{ + u32 *cmd = (u32 *) cmdbuf->buf; + int count, ret; + RING_LOCALS; + + count=(cmd[0]>>16) & 0x3fff; + + if ((cmd[1] & 0x8000ffff) != 0x80000810) { + DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]); + return -EINVAL; + } + ret = !radeon_check_offset(dev_priv, cmd[2]); + if (ret) { + DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]); + return -EINVAL; + } + + BEGIN_RING(count+2); + OUT_RING(cmd[0]); + OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1); + ADVANCE_RING(); + + cmdbuf->buf += (count+2)*4; + cmdbuf->bufsz -= (count+2)*4; + + return 0; +} + +static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf) +{ + u32 header; + int count; + RING_LOCALS; + + if (4 > cmdbuf->bufsz) + return -EINVAL; + + /* Fixme !! This simply emits a packet without much checking. + We need to be smarter. */ + + /* obtain first word - actual packet3 header */ + header = *(u32 *) cmdbuf->buf; + + /* Is it packet 3 ? */ + if ((header >> 30) != 0x3) { + DRM_ERROR("Not a packet3 header (0x%08x)\n", header); + return -EINVAL; + } + + count = (header >> 16) & 0x3fff; + + /* Check again now that we know how much data to expect */ + if ((count + 2) * 4 > cmdbuf->bufsz) { + DRM_ERROR + ("Expected packet3 of length %d but have only %d bytes left\n", + (count + 2) * 4, cmdbuf->bufsz); + return -EINVAL; + } + + /* Is it a packet type we know about ? */ + switch (header & 0xff00) { + case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */ + return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header); + + case RADEON_CNTL_BITBLT_MULTI: + return r300_emit_bitblt_multi(dev_priv, cmdbuf); + + case RADEON_CP_INDX_BUFFER: /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */ + return r300_emit_indx_buffer(dev_priv, cmdbuf); + case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */ + case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */ + case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */ + case RADEON_WAIT_FOR_IDLE: + case RADEON_CP_NOP: + /* these packets are safe */ + break; + default: + DRM_ERROR("Unknown packet3 header (0x%08x)\n", header); + return -EINVAL; + } + + BEGIN_RING(count + 2); + OUT_RING(header); + OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1); + ADVANCE_RING(); + + cmdbuf->buf += (count + 2) * 4; + cmdbuf->bufsz -= (count + 2) * 4; + + return 0; +} + +/** + * Emit a rendering packet3 from userspace. + * Called by r300_do_cp_cmdbuf. + */ +static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf, + drm_r300_cmd_header_t header) +{ + int n; + int ret; + char *orig_buf = cmdbuf->buf; + int orig_bufsz = cmdbuf->bufsz; + + /* This is a do-while-loop so that we run the interior at least once, + * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale. + */ + n = 0; + do { + if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) { + ret = r300_emit_cliprects(dev_priv, cmdbuf, n); + if (ret) + return ret; + + cmdbuf->buf = orig_buf; + cmdbuf->bufsz = orig_bufsz; + } + + switch (header.packet3.packet) { + case R300_CMD_PACKET3_CLEAR: + DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n"); + ret = r300_emit_clear(dev_priv, cmdbuf); + if (ret) { + DRM_ERROR("r300_emit_clear failed\n"); + return ret; + } + break; + + case R300_CMD_PACKET3_RAW: + DRM_DEBUG("R300_CMD_PACKET3_RAW\n"); + ret = r300_emit_raw_packet3(dev_priv, cmdbuf); + if (ret) { + DRM_ERROR("r300_emit_raw_packet3 failed\n"); + return ret; + } + break; + + default: + DRM_ERROR("bad packet3 type %i at %p\n", + header.packet3.packet, + cmdbuf->buf - sizeof(header)); + return -EINVAL; + } + + n += R300_SIMULTANEOUS_CLIPRECTS; + } while (n < cmdbuf->nbox); + + return 0; +} + +/* Some of the R300 chips seem to be extremely touchy about the two registers + * that are configured in r300_pacify. + * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace + * sends a command buffer that contains only state setting commands and a + * vertex program/parameter upload sequence, this will eventually lead to a + * lockup, unless the sequence is bracketed by calls to r300_pacify. + * So we should take great care to *always* call r300_pacify before + * *anything* 3D related, and again afterwards. This is what the + * call bracket in r300_do_cp_cmdbuf is for. + */ + +/** + * Emit the sequence to pacify R300. + */ +static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv) +{ + RING_LOCALS; + + BEGIN_RING(6); + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A); + OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0)); + OUT_RING(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE| + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0)); + OUT_RING(0x0); + ADVANCE_RING(); +} + +/** + * Called by r300_do_cp_cmdbuf to update the internal buffer age and state. + * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must + * be careful about how this function is called. + */ +static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_buf_priv_t *buf_priv = buf->dev_private; + + buf_priv->age = ++dev_priv->sarea_priv->last_dispatch; + buf->pending = 1; + buf->used = 0; +} + +static void r300_cmd_wait(drm_radeon_private_t * dev_priv, + drm_r300_cmd_header_t header) +{ + u32 wait_until; + RING_LOCALS; + + if (!header.wait.flags) + return; + + wait_until = 0; + + switch(header.wait.flags) { + case R300_WAIT_2D: + wait_until = RADEON_WAIT_2D_IDLE; + break; + case R300_WAIT_3D: + wait_until = RADEON_WAIT_3D_IDLE; + break; + case R300_NEW_WAIT_2D_3D: + wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE; + break; + case R300_NEW_WAIT_2D_2D_CLEAN: + wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN; + break; + case R300_NEW_WAIT_3D_3D_CLEAN: + wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN; + break; + case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN: + wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN; + wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN; + break; + default: + return; + } + + BEGIN_RING(2); + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_RING(wait_until); + ADVANCE_RING(); +} + +static int r300_scratch(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf, + drm_r300_cmd_header_t header) +{ + u32 *ref_age_base; + u32 i, buf_idx, h_pending; + RING_LOCALS; + + if (cmdbuf->bufsz < + (sizeof(u64) + header.scratch.n_bufs * sizeof(buf_idx))) { + return -EINVAL; + } + + if (header.scratch.reg >= 5) { + return -EINVAL; + } + + dev_priv->scratch_ages[header.scratch.reg]++; + + ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf); + + cmdbuf->buf += sizeof(u64); + cmdbuf->bufsz -= sizeof(u64); + + for (i=0; i < header.scratch.n_bufs; i++) { + buf_idx = *(u32 *)cmdbuf->buf; + buf_idx *= 2; /* 8 bytes per buf */ + + if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) { + return -EINVAL; + } + + if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) { + return -EINVAL; + } + + if (h_pending == 0) { + return -EINVAL; + } + + h_pending--; + + if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) { + return -EINVAL; + } + + cmdbuf->buf += sizeof(buf_idx); + cmdbuf->bufsz -= sizeof(buf_idx); + } + + BEGIN_RING(2); + OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) ); + OUT_RING( dev_priv->scratch_ages[header.scratch.reg] ); + ADVANCE_RING(); + + return 0; +} + +/** + * Uploads user-supplied vertex program instructions or parameters onto + * the graphics card. + * Called by r300_do_cp_cmdbuf. + */ +static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf, + drm_r300_cmd_header_t header) +{ + int sz; + int addr; + int type; + int clamp; + int stride; + RING_LOCALS; + + sz = header.r500fp.count; + /* address is 9 bits 0 - 8, bit 1 of flags is part of address */ + addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo; + + type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE); + clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); + + addr |= (type << 16); + addr |= (clamp << 17); + + stride = type ? 4 : 6; + + DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type); + if (!sz) + return 0; + if (sz * stride * 4 > cmdbuf->bufsz) + return -EINVAL; + + BEGIN_RING(3 + sz * stride); + OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr); + OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1)); + OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride); + + ADVANCE_RING(); + + cmdbuf->buf += sz * stride * 4; + cmdbuf->bufsz -= sz * stride * 4; + + return 0; +} + + +/** + * Parses and validates a user-supplied command buffer and emits appropriate + * commands on the DMA ring buffer. + * Called by the ioctl handler function radeon_cp_cmdbuf. + */ +int r300_do_cp_cmdbuf(struct drm_device *dev, + struct drm_file *file_priv, + drm_radeon_kcmd_buffer_t *cmdbuf) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_device_dma *dma = dev->dma; + struct drm_buf *buf = NULL; + int emit_dispatch_age = 0; + int ret = 0; + + DRM_DEBUG("\n"); + + /* See the comment above r300_emit_begin3d for why this call must be here, + * and what the cleanup gotos are for. */ + r300_pacify(dev_priv); + + if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) { + ret = r300_emit_cliprects(dev_priv, cmdbuf, 0); + if (ret) + goto cleanup; + } + + while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) { + int idx; + drm_r300_cmd_header_t header; + + header.u = *(unsigned int *)cmdbuf->buf; + + cmdbuf->buf += sizeof(header); + cmdbuf->bufsz -= sizeof(header); + + switch (header.header.cmd_type) { + case R300_CMD_PACKET0: + DRM_DEBUG("R300_CMD_PACKET0\n"); + ret = r300_emit_packet0(dev_priv, cmdbuf, header); + if (ret) { + DRM_ERROR("r300_emit_packet0 failed\n"); + goto cleanup; + } + break; + + case R300_CMD_VPU: + DRM_DEBUG("R300_CMD_VPU\n"); + ret = r300_emit_vpu(dev_priv, cmdbuf, header); + if (ret) { + DRM_ERROR("r300_emit_vpu failed\n"); + goto cleanup; + } + break; + + case R300_CMD_PACKET3: + DRM_DEBUG("R300_CMD_PACKET3\n"); + ret = r300_emit_packet3(dev_priv, cmdbuf, header); + if (ret) { + DRM_ERROR("r300_emit_packet3 failed\n"); + goto cleanup; + } + break; + + case R300_CMD_END3D: + DRM_DEBUG("R300_CMD_END3D\n"); + /* TODO: + Ideally userspace driver should not need to issue this call, + i.e. the drm driver should issue it automatically and prevent + lockups. + + In practice, we do not understand why this call is needed and what + it does (except for some vague guesses that it has to do with cache + coherence) and so the user space driver does it. + + Once we are sure which uses prevent lockups the code could be moved + into the kernel and the userspace driver will not + need to use this command. + + Note that issuing this command does not hurt anything + except, possibly, performance */ + r300_pacify(dev_priv); + break; + + case R300_CMD_CP_DELAY: + /* simple enough, we can do it here */ + DRM_DEBUG("R300_CMD_CP_DELAY\n"); + { + int i; + RING_LOCALS; + + BEGIN_RING(header.delay.count); + for (i = 0; i < header.delay.count; i++) + OUT_RING(RADEON_CP_PACKET2); + ADVANCE_RING(); + } + break; + + case R300_CMD_DMA_DISCARD: + DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n"); + idx = header.dma.buf_idx; + if (idx < 0 || idx >= dma->buf_count) { + DRM_ERROR("buffer index %d (of %d max)\n", + idx, dma->buf_count - 1); + ret = -EINVAL; + goto cleanup; + } + + buf = dma->buflist[idx]; + if (buf->file_priv != file_priv || buf->pending) { + DRM_ERROR("bad buffer %p %p %d\n", + buf->file_priv, file_priv, + buf->pending); + ret = -EINVAL; + goto cleanup; + } + + emit_dispatch_age = 1; + r300_discard_buffer(dev, buf); + break; + + case R300_CMD_WAIT: + DRM_DEBUG("R300_CMD_WAIT\n"); + r300_cmd_wait(dev_priv, header); + break; + + case R300_CMD_SCRATCH: + DRM_DEBUG("R300_CMD_SCRATCH\n"); + ret = r300_scratch(dev_priv, cmdbuf, header); + if (ret) { + DRM_ERROR("r300_scratch failed\n"); + goto cleanup; + } + break; + + case R300_CMD_R500FP: + if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) { + DRM_ERROR("Calling r500 command on r300 card\n"); + ret = -EINVAL; + goto cleanup; + } + DRM_DEBUG("R300_CMD_R500FP\n"); + ret = r300_emit_r500fp(dev_priv, cmdbuf, header); + if (ret) { + DRM_ERROR("r300_emit_r500fp failed\n"); + goto cleanup; + } + break; + default: + DRM_ERROR("bad cmd_type %i at %p\n", + header.header.cmd_type, + cmdbuf->buf - sizeof(header)); + ret = -EINVAL; + goto cleanup; + } + } + + DRM_DEBUG("END\n"); + + cleanup: + r300_pacify(dev_priv); + + /* We emit the vertex buffer age here, outside the pacifier "brackets" + * for two reasons: + * (1) This may coalesce multiple age emissions into a single one and + * (2) more importantly, some chips lock up hard when scratch registers + * are written inside the pacifier bracket. + */ + if (emit_dispatch_age) { + RING_LOCALS; + + /* Emit the vertex buffer age */ + BEGIN_RING(2); + RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch); + ADVANCE_RING(); + } + + COMMIT_RING(); + + return ret; +} diff --git a/drivers/gpu/drm/radeon/r300_reg.h b/drivers/gpu/drm/radeon/r300_reg.h new file mode 100644 index 00000000000..a6802f26afc --- /dev/null +++ b/drivers/gpu/drm/radeon/r300_reg.h @@ -0,0 +1,1772 @@ +/************************************************************************** + +Copyright (C) 2004-2005 Nicolai Haehnle et al. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +#ifndef _R300_REG_H +#define _R300_REG_H + +#define R300_MC_INIT_MISC_LAT_TIMER 0x180 +# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28 + +#define R300_MC_INIT_GFX_LAT_TIMER 0x154 +# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28 + +/* + * This file contains registers and constants for the R300. They have been + * found mostly by examining command buffers captured using glxtest, as well + * as by extrapolating some known registers and constants from the R200. + * I am fairly certain that they are correct unless stated otherwise + * in comments. + */ + +#define R300_SE_VPORT_XSCALE 0x1D98 +#define R300_SE_VPORT_XOFFSET 0x1D9C +#define R300_SE_VPORT_YSCALE 0x1DA0 +#define R300_SE_VPORT_YOFFSET 0x1DA4 +#define R300_SE_VPORT_ZSCALE 0x1DA8 +#define R300_SE_VPORT_ZOFFSET 0x1DAC + + +/* + * Vertex Array Processing (VAP) Control + * Stolen from r200 code from Christoph Brill (It's a guess!) + */ +#define R300_VAP_CNTL 0x2080 + +/* This register is written directly and also starts data section + * in many 3d CP_PACKET3's + */ +#define R300_VAP_VF_CNTL 0x2084 +# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0 +# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0) +# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0) +# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0) +# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0) +# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0) +# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0) + +# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4 + /* State based - direct writes to registers trigger vertex + generation */ +# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4) + + /* I don't think I saw these three used.. */ +# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6 +# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9 +# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10 + + /* index size - when not set the indices are assumed to be 16 bit */ +# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11) + /* number of vertices */ +# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 + +/* BEGIN: Wild guesses */ +#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090 +# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT (1<<1) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) /* GUESS */ + +#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094 + /* each of the following is 3 bits wide, specifies number + of components */ +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 +/* END: Wild guesses */ + +#define R300_SE_VTE_CNTL 0x20b0 +# define R300_VPORT_X_SCALE_ENA 0x00000001 +# define R300_VPORT_X_OFFSET_ENA 0x00000002 +# define R300_VPORT_Y_SCALE_ENA 0x00000004 +# define R300_VPORT_Y_OFFSET_ENA 0x00000008 +# define R300_VPORT_Z_SCALE_ENA 0x00000010 +# define R300_VPORT_Z_OFFSET_ENA 0x00000020 +# define R300_VTX_XY_FMT 0x00000100 +# define R300_VTX_Z_FMT 0x00000200 +# define R300_VTX_W0_FMT 0x00000400 +# define R300_VTX_W0_NORMALIZE 0x00000800 +# define R300_VTX_ST_DENORMALIZED 0x00001000 + +/* BEGIN: Vertex data assembly - lots of uncertainties */ + +/* gap */ + +#define R300_VAP_CNTL_STATUS 0x2140 +# define R300_VC_NO_SWAP (0 << 0) +# define R300_VC_16BIT_SWAP (1 << 0) +# define R300_VC_32BIT_SWAP (2 << 0) +# define R300_VAP_TCL_BYPASS (1 << 8) + +/* gap */ + +/* Where do we get our vertex data? + * + * Vertex data either comes either from immediate mode registers or from + * vertex arrays. + * There appears to be no mixed mode (though we can force the pitch of + * vertex arrays to 0, effectively reusing the same element over and over + * again). + * + * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure + * if these registers influence vertex array processing. + * + * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3. + * + * In both cases, vertex attributes are then passed through INPUT_ROUTE. + * + * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data + * into the vertex processor's input registers. + * The first word routes the first input, the second word the second, etc. + * The corresponding input is routed into the register with the given index. + * The list is ended by a word with INPUT_ROUTE_END set. + * + * Always set COMPONENTS_4 in immediate mode. + */ + +#define R300_VAP_INPUT_ROUTE_0_0 0x2150 +# define R300_INPUT_ROUTE_COMPONENTS_1 (0 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_2 (1 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_3 (2 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_4 (3 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_RGBA (4 << 0) /* GUESS */ +# define R300_VAP_INPUT_ROUTE_IDX_SHIFT 8 +# define R300_VAP_INPUT_ROUTE_IDX_MASK (31 << 8) /* GUESS */ +# define R300_VAP_INPUT_ROUTE_END (1 << 13) +# define R300_INPUT_ROUTE_IMMEDIATE_MODE (0 << 14) /* GUESS */ +# define R300_INPUT_ROUTE_FLOAT (1 << 14) /* GUESS */ +# define R300_INPUT_ROUTE_UNSIGNED_BYTE (2 << 14) /* GUESS */ +# define R300_INPUT_ROUTE_FLOAT_COLOR (3 << 14) /* GUESS */ +#define R300_VAP_INPUT_ROUTE_0_1 0x2154 +#define R300_VAP_INPUT_ROUTE_0_2 0x2158 +#define R300_VAP_INPUT_ROUTE_0_3 0x215C +#define R300_VAP_INPUT_ROUTE_0_4 0x2160 +#define R300_VAP_INPUT_ROUTE_0_5 0x2164 +#define R300_VAP_INPUT_ROUTE_0_6 0x2168 +#define R300_VAP_INPUT_ROUTE_0_7 0x216C + +/* gap */ + +/* Notes: + * - always set up to produce at least two attributes: + * if vertex program uses only position, fglrx will set normal, too + * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. + */ +#define R300_VAP_INPUT_CNTL_0 0x2180 +# define R300_INPUT_CNTL_0_COLOR 0x00000001 +#define R300_VAP_INPUT_CNTL_1 0x2184 +# define R300_INPUT_CNTL_POS 0x00000001 +# define R300_INPUT_CNTL_NORMAL 0x00000002 +# define R300_INPUT_CNTL_COLOR 0x00000004 +# define R300_INPUT_CNTL_TC0 0x00000400 +# define R300_INPUT_CNTL_TC1 0x00000800 +# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */ +# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */ +# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */ +# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */ +# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */ +# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ + +/* gap */ + +/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0 + * are set to a swizzling bit pattern, other words are 0. + * + * In immediate mode, the pattern is always set to xyzw. In vertex array + * mode, the swizzling pattern is e.g. used to set zw components in texture + * coordinates with only tweo components. + */ +#define R300_VAP_INPUT_ROUTE_1_0 0x21E0 +# define R300_INPUT_ROUTE_SELECT_X 0 +# define R300_INPUT_ROUTE_SELECT_Y 1 +# define R300_INPUT_ROUTE_SELECT_Z 2 +# define R300_INPUT_ROUTE_SELECT_W 3 +# define R300_INPUT_ROUTE_SELECT_ZERO 4 +# define R300_INPUT_ROUTE_SELECT_ONE 5 +# define R300_INPUT_ROUTE_SELECT_MASK 7 +# define R300_INPUT_ROUTE_X_SHIFT 0 +# define R300_INPUT_ROUTE_Y_SHIFT 3 +# define R300_INPUT_ROUTE_Z_SHIFT 6 +# define R300_INPUT_ROUTE_W_SHIFT 9 +# define R300_INPUT_ROUTE_ENABLE (15 << 12) +#define R300_VAP_INPUT_ROUTE_1_1 0x21E4 +#define R300_VAP_INPUT_ROUTE_1_2 0x21E8 +#define R300_VAP_INPUT_ROUTE_1_3 0x21EC +#define R300_VAP_INPUT_ROUTE_1_4 0x21F0 +#define R300_VAP_INPUT_ROUTE_1_5 0x21F4 +#define R300_VAP_INPUT_ROUTE_1_6 0x21F8 +#define R300_VAP_INPUT_ROUTE_1_7 0x21FC + +/* END: Vertex data assembly */ + +/* gap */ + +/* BEGIN: Upload vertex program and data */ + +/* + * The programmable vertex shader unit has a memory bank of unknown size + * that can be written to in 16 byte units by writing the address into + * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs). + * + * Pointers into the memory bank are always in multiples of 16 bytes. + * + * The memory bank is divided into areas with fixed meaning. + * + * Starting at address UPLOAD_PROGRAM: Vertex program instructions. + * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB), + * whereas the difference between known addresses suggests size 512. + * + * Starting at address UPLOAD_PARAMETERS: Vertex program parameters. + * Native reported limits and the VPI layout suggest size 256, whereas + * difference between known addresses suggests size 512. + * + * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the + * floating point pointsize. The exact purpose of this state is uncertain, + * as there is also the R300_RE_POINTSIZE register. + * + * Multiple vertex programs and parameter sets can be loaded at once, + * which could explain the size discrepancy. + */ +#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 +# define R300_PVS_UPLOAD_PROGRAM 0x00000000 +# define R300_PVS_UPLOAD_PARAMETERS 0x00000200 +# define R300_PVS_UPLOAD_POINTSIZE 0x00000406 + +/* gap */ + +#define R300_VAP_PVS_UPLOAD_DATA 0x2208 + +/* END: Upload vertex program and data */ + +/* gap */ + +/* I do not know the purpose of this register. However, I do know that + * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL + * for normal rendering. + */ +#define R300_VAP_UNKNOWN_221C 0x221C +# define R300_221C_NORMAL 0x00000000 +# define R300_221C_CLEAR 0x0001C000 + +/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first + * plane is per-pixel and the second plane is per-vertex. + * + * This was determined by experimentation alone but I believe it is correct. + * + * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest. + */ +#define R300_VAP_CLIP_X_0 0x2220 +#define R300_VAP_CLIP_X_1 0x2224 +#define R300_VAP_CLIP_Y_0 0x2228 +#define R300_VAP_CLIP_Y_1 0x2230 + +/* gap */ + +/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between + * rendering commands and overwriting vertex program parameters. + * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and + * avoids bugs caused by still running shaders reading bad data from memory. + */ +#define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */ + +/* Absolutely no clue what this register is about. */ +#define R300_VAP_UNKNOWN_2288 0x2288 +# define R300_2288_R300 0x00750000 /* -- nh */ +# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ + +/* gap */ + +/* Addresses are relative to the vertex program instruction area of the + * memory bank. PROGRAM_END points to the last instruction of the active + * program + * + * The meaning of the two UNKNOWN fields is obviously not known. However, + * experiments so far have shown that both *must* point to an instruction + * inside the vertex program, otherwise the GPU locks up. + * + * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and + * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to + * position takes place. + * + * Most likely this is used to ignore rest of the program in cases + * where group of verts arent visible. For some reason this "section" + * is sometimes accepted other instruction that have no relationship with + * position calculations. + */ +#define R300_VAP_PVS_CNTL_1 0x22D0 +# define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0 +# define R300_PVS_CNTL_1_POS_END_SHIFT 10 +# define R300_PVS_CNTL_1_PROGRAM_END_SHIFT 20 +/* Addresses are relative the the vertex program parameters area. */ +#define R300_VAP_PVS_CNTL_2 0x22D4 +# define R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 +# define R300_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 +#define R300_VAP_PVS_CNTL_3 0x22D8 +# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT 10 +# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0 + +/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for + * immediate vertices + */ +#define R300_VAP_VTX_COLOR_R 0x2464 +#define R300_VAP_VTX_COLOR_G 0x2468 +#define R300_VAP_VTX_COLOR_B 0x246C +#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */ +#define R300_VAP_VTX_POS_0_Y_1 0x2494 +#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */ +#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */ +#define R300_VAP_VTX_POS_0_Y_2 0x24A4 +#define R300_VAP_VTX_POS_0_Z_2 0x24A8 +/* write 0 to indicate end of packet? */ +#define R300_VAP_VTX_END_OF_PKT 0x24AC + +/* gap */ + +/* These are values from r300_reg/r300_reg.h - they are known to be correct + * and are here so we can use one register file instead of several + * - Vladimir + */ +#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000 +# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5) +# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16) + +#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004 + /* each of the following is 3 bits wide, specifies number + of components */ +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 + +/* UNK30 seems to enables point to quad transformation on textures + * (or something closely related to that). + * This bit is rather fatal at the time being due to lackings at pixel + * shader side + */ +#define R300_GB_ENABLE 0x4008 +# define R300_GB_POINT_STUFF_ENABLE (1<<0) +# define R300_GB_LINE_STUFF_ENABLE (1<<1) +# define R300_GB_TRIANGLE_STUFF_ENABLE (1<<2) +# define R300_GB_STENCIL_AUTO_ENABLE (1<<4) +# define R300_GB_UNK31 (1<<31) + /* each of the following is 2 bits wide */ +#define R300_GB_TEX_REPLICATE 0 +#define R300_GB_TEX_ST 1 +#define R300_GB_TEX_STR 2 +# define R300_GB_TEX0_SOURCE_SHIFT 16 +# define R300_GB_TEX1_SOURCE_SHIFT 18 +# define R300_GB_TEX2_SOURCE_SHIFT 20 +# define R300_GB_TEX3_SOURCE_SHIFT 22 +# define R300_GB_TEX4_SOURCE_SHIFT 24 +# define R300_GB_TEX5_SOURCE_SHIFT 26 +# define R300_GB_TEX6_SOURCE_SHIFT 28 +# define R300_GB_TEX7_SOURCE_SHIFT 30 + +/* MSPOS - positions for multisample antialiasing (?) */ +#define R300_GB_MSPOS0 0x4010 + /* shifts - each of the fields is 4 bits */ +# define R300_GB_MSPOS0__MS_X0_SHIFT 0 +# define R300_GB_MSPOS0__MS_Y0_SHIFT 4 +# define R300_GB_MSPOS0__MS_X1_SHIFT 8 +# define R300_GB_MSPOS0__MS_Y1_SHIFT 12 +# define R300_GB_MSPOS0__MS_X2_SHIFT 16 +# define R300_GB_MSPOS0__MS_Y2_SHIFT 20 +# define R300_GB_MSPOS0__MSBD0_Y 24 +# define R300_GB_MSPOS0__MSBD0_X 28 + +#define R300_GB_MSPOS1 0x4014 +# define R300_GB_MSPOS1__MS_X3_SHIFT 0 +# define R300_GB_MSPOS1__MS_Y3_SHIFT 4 +# define R300_GB_MSPOS1__MS_X4_SHIFT 8 +# define R300_GB_MSPOS1__MS_Y4_SHIFT 12 +# define R300_GB_MSPOS1__MS_X5_SHIFT 16 +# define R300_GB_MSPOS1__MS_Y5_SHIFT 20 +# define R300_GB_MSPOS1__MSBD1 24 + + +#define R300_GB_TILE_CONFIG 0x4018 +# define R300_GB_TILE_ENABLE (1<<0) +# define R300_GB_TILE_PIPE_COUNT_RV300 0 +# define R300_GB_TILE_PIPE_COUNT_R300 (3<<1) +# define R300_GB_TILE_PIPE_COUNT_R420 (7<<1) +# define R300_GB_TILE_PIPE_COUNT_RV410 (3<<1) +# define R300_GB_TILE_SIZE_8 0 +# define R300_GB_TILE_SIZE_16 (1<<4) +# define R300_GB_TILE_SIZE_32 (2<<4) +# define R300_GB_SUPER_SIZE_1 (0<<6) +# define R300_GB_SUPER_SIZE_2 (1<<6) +# define R300_GB_SUPER_SIZE_4 (2<<6) +# define R300_GB_SUPER_SIZE_8 (3<<6) +# define R300_GB_SUPER_SIZE_16 (4<<6) +# define R300_GB_SUPER_SIZE_32 (5<<6) +# define R300_GB_SUPER_SIZE_64 (6<<6) +# define R300_GB_SUPER_SIZE_128 (7<<6) +# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */ +# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */ +# define R300_GB_SUPER_TILE_A 0 +# define R300_GB_SUPER_TILE_B (1<<15) +# define R300_GB_SUBPIXEL_1_12 0 +# define R300_GB_SUBPIXEL_1_16 (1<<16) + +#define R300_GB_FIFO_SIZE 0x4024 + /* each of the following is 2 bits wide */ +#define R300_GB_FIFO_SIZE_32 0 +#define R300_GB_FIFO_SIZE_64 1 +#define R300_GB_FIFO_SIZE_128 2 +#define R300_GB_FIFO_SIZE_256 3 +# define R300_SC_IFIFO_SIZE_SHIFT 0 +# define R300_SC_TZFIFO_SIZE_SHIFT 2 +# define R300_SC_BFIFO_SIZE_SHIFT 4 + +# define R300_US_OFIFO_SIZE_SHIFT 12 +# define R300_US_WFIFO_SIZE_SHIFT 14 + /* the following use the same constants as above, but meaning is + is times 2 (i.e. instead of 32 words it means 64 */ +# define R300_RS_TFIFO_SIZE_SHIFT 6 +# define R300_RS_CFIFO_SIZE_SHIFT 8 +# define R300_US_RAM_SIZE_SHIFT 10 + /* watermarks, 3 bits wide */ +# define R300_RS_HIGHWATER_COL_SHIFT 16 +# define R300_RS_HIGHWATER_TEX_SHIFT 19 +# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ +# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 + +#define R300_GB_SELECT 0x401C +# define R300_GB_FOG_SELECT_C0A 0 +# define R300_GB_FOG_SELECT_C1A 1 +# define R300_GB_FOG_SELECT_C2A 2 +# define R300_GB_FOG_SELECT_C3A 3 +# define R300_GB_FOG_SELECT_1_1_W 4 +# define R300_GB_FOG_SELECT_Z 5 +# define R300_GB_DEPTH_SELECT_Z 0 +# define R300_GB_DEPTH_SELECT_1_1_W (1<<3) +# define R300_GB_W_SELECT_1_W 0 +# define R300_GB_W_SELECT_1 (1<<4) + +#define R300_GB_AA_CONFIG 0x4020 +# define R300_AA_DISABLE 0x00 +# define R300_AA_ENABLE 0x01 +# define R300_AA_SUBSAMPLES_2 0 +# define R300_AA_SUBSAMPLES_3 (1<<1) +# define R300_AA_SUBSAMPLES_4 (2<<1) +# define R300_AA_SUBSAMPLES_6 (3<<1) + +/* gap */ + +/* Zero to flush caches. */ +#define R300_TX_CNTL 0x4100 +#define R300_TX_FLUSH 0x0 + +/* The upper enable bits are guessed, based on fglrx reported limits. */ +#define R300_TX_ENABLE 0x4104 +# define R300_TX_ENABLE_0 (1 << 0) +# define R300_TX_ENABLE_1 (1 << 1) +# define R300_TX_ENABLE_2 (1 << 2) +# define R300_TX_ENABLE_3 (1 << 3) +# define R300_TX_ENABLE_4 (1 << 4) +# define R300_TX_ENABLE_5 (1 << 5) +# define R300_TX_ENABLE_6 (1 << 6) +# define R300_TX_ENABLE_7 (1 << 7) +# define R300_TX_ENABLE_8 (1 << 8) +# define R300_TX_ENABLE_9 (1 << 9) +# define R300_TX_ENABLE_10 (1 << 10) +# define R300_TX_ENABLE_11 (1 << 11) +# define R300_TX_ENABLE_12 (1 << 12) +# define R300_TX_ENABLE_13 (1 << 13) +# define R300_TX_ENABLE_14 (1 << 14) +# define R300_TX_ENABLE_15 (1 << 15) + +/* The pointsize is given in multiples of 6. The pointsize can be + * enormous: Clear() renders a single point that fills the entire + * framebuffer. + */ +#define R300_RE_POINTSIZE 0x421C +# define R300_POINTSIZE_Y_SHIFT 0 +# define R300_POINTSIZE_Y_MASK (0xFFFF << 0) /* GUESS */ +# define R300_POINTSIZE_X_SHIFT 16 +# define R300_POINTSIZE_X_MASK (0xFFFF << 16) /* GUESS */ +# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) + +/* The line width is given in multiples of 6. + * In default mode lines are classified as vertical lines. + * HO: horizontal + * VE: vertical or horizontal + * HO & VE: no classification + */ +#define R300_RE_LINE_CNT 0x4234 +# define R300_LINESIZE_SHIFT 0 +# define R300_LINESIZE_MASK (0xFFFF << 0) /* GUESS */ +# define R300_LINESIZE_MAX (R300_LINESIZE_MASK / 6) +# define R300_LINE_CNT_HO (1 << 16) +# define R300_LINE_CNT_VE (1 << 17) + +/* Some sort of scale or clamp value for texcoordless textures. */ +#define R300_RE_UNK4238 0x4238 + +/* Something shade related */ +#define R300_RE_SHADE 0x4274 + +#define R300_RE_SHADE_MODEL 0x4278 +# define R300_RE_SHADE_MODEL_SMOOTH 0x3aaaa +# define R300_RE_SHADE_MODEL_FLAT 0x39595 + +/* Dangerous */ +#define R300_RE_POLYGON_MODE 0x4288 +# define R300_PM_ENABLED (1 << 0) +# define R300_PM_FRONT_POINT (0 << 0) +# define R300_PM_BACK_POINT (0 << 0) +# define R300_PM_FRONT_LINE (1 << 4) +# define R300_PM_FRONT_FILL (1 << 5) +# define R300_PM_BACK_LINE (1 << 7) +# define R300_PM_BACK_FILL (1 << 8) + +/* Fog parameters */ +#define R300_RE_FOG_SCALE 0x4294 +#define R300_RE_FOG_START 0x4298 + +/* Not sure why there are duplicate of factor and constant values. + * My best guess so far is that there are separate zbiases for test and write. + * Ordering might be wrong. + * Some of the tests indicate that fgl has a fallback implementation of zbias + * via pixel shaders. + */ +#define R300_RE_ZBIAS_CNTL 0x42A0 /* GUESS */ +#define R300_RE_ZBIAS_T_FACTOR 0x42A4 +#define R300_RE_ZBIAS_T_CONSTANT 0x42A8 +#define R300_RE_ZBIAS_W_FACTOR 0x42AC +#define R300_RE_ZBIAS_W_CONSTANT 0x42B0 + +/* This register needs to be set to (1<<1) for RV350 to correctly + * perform depth test (see --vb-triangles in r300_demo) + * Don't know about other chips. - Vladimir + * This is set to 3 when GL_POLYGON_OFFSET_FILL is on. + * My guess is that there are two bits for each zbias primitive + * (FILL, LINE, POINT). + * One to enable depth test and one for depth write. + * Yet this doesnt explain why depth writes work ... + */ +#define R300_RE_OCCLUSION_CNTL 0x42B4 +# define R300_OCCLUSION_ON (1<<1) + +#define R300_RE_CULL_CNTL 0x42B8 +# define R300_CULL_FRONT (1 << 0) +# define R300_CULL_BACK (1 << 1) +# define R300_FRONT_FACE_CCW (0 << 2) +# define R300_FRONT_FACE_CW (1 << 2) + + +/* BEGIN: Rasterization / Interpolators - many guesses */ + +/* 0_UNKNOWN_18 has always been set except for clear operations. + * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends + * on the vertex program, *not* the fragment program) + */ +#define R300_RS_CNTL_0 0x4300 +# define R300_RS_CNTL_TC_CNT_SHIFT 2 +# define R300_RS_CNTL_TC_CNT_MASK (7 << 2) + /* number of color interpolators used */ +# define R300_RS_CNTL_CI_CNT_SHIFT 7 +# define R300_RS_CNTL_0_UNKNOWN_18 (1 << 18) + /* Guess: RS_CNTL_1 holds the index of the highest used RS_ROUTE_n + register. */ +#define R300_RS_CNTL_1 0x4304 + +/* gap */ + +/* Only used for texture coordinates. + * Use the source field to route texture coordinate input from the + * vertex program to the desired interpolator. Note that the source + * field is relative to the outputs the vertex program *actually* + * writes. If a vertex program only writes texcoord[1], this will + * be source index 0. + * Set INTERP_USED on all interpolators that produce data used by + * the fragment program. INTERP_USED looks like a swizzling mask, + * but I haven't seen it used that way. + * + * Note: The _UNKNOWN constants are always set in their respective + * register. I don't know if this is necessary. + */ +#define R300_RS_INTERP_0 0x4310 +#define R300_RS_INTERP_1 0x4314 +# define R300_RS_INTERP_1_UNKNOWN 0x40 +#define R300_RS_INTERP_2 0x4318 +# define R300_RS_INTERP_2_UNKNOWN 0x80 +#define R300_RS_INTERP_3 0x431C +# define R300_RS_INTERP_3_UNKNOWN 0xC0 +#define R300_RS_INTERP_4 0x4320 +#define R300_RS_INTERP_5 0x4324 +#define R300_RS_INTERP_6 0x4328 +#define R300_RS_INTERP_7 0x432C +# define R300_RS_INTERP_SRC_SHIFT 2 +# define R300_RS_INTERP_SRC_MASK (7 << 2) +# define R300_RS_INTERP_USED 0x00D10000 + +/* These DWORDs control how vertex data is routed into fragment program + * registers, after interpolators. + */ +#define R300_RS_ROUTE_0 0x4330 +#define R300_RS_ROUTE_1 0x4334 +#define R300_RS_ROUTE_2 0x4338 +#define R300_RS_ROUTE_3 0x433C /* GUESS */ +#define R300_RS_ROUTE_4 0x4340 /* GUESS */ +#define R300_RS_ROUTE_5 0x4344 /* GUESS */ +#define R300_RS_ROUTE_6 0x4348 /* GUESS */ +#define R300_RS_ROUTE_7 0x434C /* GUESS */ +# define R300_RS_ROUTE_SOURCE_INTERP_0 0 +# define R300_RS_ROUTE_SOURCE_INTERP_1 1 +# define R300_RS_ROUTE_SOURCE_INTERP_2 2 +# define R300_RS_ROUTE_SOURCE_INTERP_3 3 +# define R300_RS_ROUTE_SOURCE_INTERP_4 4 +# define R300_RS_ROUTE_SOURCE_INTERP_5 5 /* GUESS */ +# define R300_RS_ROUTE_SOURCE_INTERP_6 6 /* GUESS */ +# define R300_RS_ROUTE_SOURCE_INTERP_7 7 /* GUESS */ +# define R300_RS_ROUTE_ENABLE (1 << 3) /* GUESS */ +# define R300_RS_ROUTE_DEST_SHIFT 6 +# define R300_RS_ROUTE_DEST_MASK (31 << 6) /* GUESS */ + +/* Special handling for color: When the fragment program uses color, + * the ROUTE_0_COLOR bit is set and ROUTE_0_COLOR_DEST contains the + * color register index. + * + * Apperently you may set the R300_RS_ROUTE_0_COLOR bit, but not provide any + * R300_RS_ROUTE_0_COLOR_DEST value; this setup is used for clearing the state. + * See r300_ioctl.c:r300EmitClearState. I'm not sure if this setup is strictly + * correct or not. - Oliver. + */ +# define R300_RS_ROUTE_0_COLOR (1 << 14) +# define R300_RS_ROUTE_0_COLOR_DEST_SHIFT 17 +# define R300_RS_ROUTE_0_COLOR_DEST_MASK (31 << 17) /* GUESS */ +/* As above, but for secondary color */ +# define R300_RS_ROUTE_1_COLOR1 (1 << 14) +# define R300_RS_ROUTE_1_COLOR1_DEST_SHIFT 17 +# define R300_RS_ROUTE_1_COLOR1_DEST_MASK (31 << 17) +# define R300_RS_ROUTE_1_UNKNOWN11 (1 << 11) +/* END: Rasterization / Interpolators - many guesses */ + +/* Hierarchical Z Enable */ +#define R300_SC_HYPERZ 0x43a4 +# define R300_SC_HYPERZ_DISABLE (0 << 0) +# define R300_SC_HYPERZ_ENABLE (1 << 0) +# define R300_SC_HYPERZ_MIN (0 << 1) +# define R300_SC_HYPERZ_MAX (1 << 1) +# define R300_SC_HYPERZ_ADJ_256 (0 << 2) +# define R300_SC_HYPERZ_ADJ_128 (1 << 2) +# define R300_SC_HYPERZ_ADJ_64 (2 << 2) +# define R300_SC_HYPERZ_ADJ_32 (3 << 2) +# define R300_SC_HYPERZ_ADJ_16 (4 << 2) +# define R300_SC_HYPERZ_ADJ_8 (5 << 2) +# define R300_SC_HYPERZ_ADJ_4 (6 << 2) +# define R300_SC_HYPERZ_ADJ_2 (7 << 2) +# define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5) +# define R300_SC_HYPERZ_HZ_Z0MIN (1 << 5) +# define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6) +# define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6) + +#define R300_SC_EDGERULE 0x43a8 + +/* BEGIN: Scissors and cliprects */ + +/* There are four clipping rectangles. Their corner coordinates are inclusive. + * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending + * on whether the pixel is inside cliprects 0-3, respectively. For example, + * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned + * the number 3 (binary 0011). + * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set, + * the pixel is rasterized. + * + * In addition to this, there is a scissors rectangle. Only pixels inside the + * scissors rectangle are drawn. (coordinates are inclusive) + * + * For some reason, the top-left corner of the framebuffer is at (1440, 1440) + * for the purpose of clipping and scissors. + */ +#define R300_RE_CLIPRECT_TL_0 0x43B0 +#define R300_RE_CLIPRECT_BR_0 0x43B4 +#define R300_RE_CLIPRECT_TL_1 0x43B8 +#define R300_RE_CLIPRECT_BR_1 0x43BC +#define R300_RE_CLIPRECT_TL_2 0x43C0 +#define R300_RE_CLIPRECT_BR_2 0x43C4 +#define R300_RE_CLIPRECT_TL_3 0x43C8 +#define R300_RE_CLIPRECT_BR_3 0x43CC +# define R300_CLIPRECT_OFFSET 1440 +# define R300_CLIPRECT_MASK 0x1FFF +# define R300_CLIPRECT_X_SHIFT 0 +# define R300_CLIPRECT_X_MASK (0x1FFF << 0) +# define R300_CLIPRECT_Y_SHIFT 13 +# define R300_CLIPRECT_Y_MASK (0x1FFF << 13) +#define R300_RE_CLIPRECT_CNTL 0x43D0 +# define R300_CLIP_OUT (1 << 0) +# define R300_CLIP_0 (1 << 1) +# define R300_CLIP_1 (1 << 2) +# define R300_CLIP_10 (1 << 3) +# define R300_CLIP_2 (1 << 4) +# define R300_CLIP_20 (1 << 5) +# define R300_CLIP_21 (1 << 6) +# define R300_CLIP_210 (1 << 7) +# define R300_CLIP_3 (1 << 8) +# define R300_CLIP_30 (1 << 9) +# define R300_CLIP_31 (1 << 10) +# define R300_CLIP_310 (1 << 11) +# define R300_CLIP_32 (1 << 12) +# define R300_CLIP_320 (1 << 13) +# define R300_CLIP_321 (1 << 14) +# define R300_CLIP_3210 (1 << 15) + +/* gap */ + +#define R300_RE_SCISSORS_TL 0x43E0 +#define R300_RE_SCISSORS_BR 0x43E4 +# define R300_SCISSORS_OFFSET 1440 +# define R300_SCISSORS_X_SHIFT 0 +# define R300_SCISSORS_X_MASK (0x1FFF << 0) +# define R300_SCISSORS_Y_SHIFT 13 +# define R300_SCISSORS_Y_MASK (0x1FFF << 13) +/* END: Scissors and cliprects */ + +/* BEGIN: Texture specification */ + +/* + * The texture specification dwords are grouped by meaning and not by texture + * unit. This means that e.g. the offset for texture image unit N is found in + * register TX_OFFSET_0 + (4*N) + */ +#define R300_TX_FILTER_0 0x4400 +# define R300_TX_REPEAT 0 +# define R300_TX_MIRRORED 1 +# define R300_TX_CLAMP 4 +# define R300_TX_CLAMP_TO_EDGE 2 +# define R300_TX_CLAMP_TO_BORDER 6 +# define R300_TX_WRAP_S_SHIFT 0 +# define R300_TX_WRAP_S_MASK (7 << 0) +# define R300_TX_WRAP_T_SHIFT 3 +# define R300_TX_WRAP_T_MASK (7 << 3) +# define R300_TX_WRAP_Q_SHIFT 6 +# define R300_TX_WRAP_Q_MASK (7 << 6) +# define R300_TX_MAG_FILTER_NEAREST (1 << 9) +# define R300_TX_MAG_FILTER_LINEAR (2 << 9) +# define R300_TX_MAG_FILTER_MASK (3 << 9) +# define R300_TX_MIN_FILTER_NEAREST (1 << 11) +# define R300_TX_MIN_FILTER_LINEAR (2 << 11) +# define R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST (5 << 11) +# define R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR (9 << 11) +# define R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST (6 << 11) +# define R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR (10 << 11) + +/* NOTE: NEAREST doesnt seem to exist. + * Im not seting MAG_FILTER_MASK and (3 << 11) on for all + * anisotropy modes because that would void selected mag filter + */ +# define R300_TX_MIN_FILTER_ANISO_NEAREST (0 << 13) +# define R300_TX_MIN_FILTER_ANISO_LINEAR (0 << 13) +# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (1 << 13) +# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR (2 << 13) +# define R300_TX_MIN_FILTER_MASK ( (15 << 11) | (3 << 13) ) +# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) +# define R300_TX_MAX_ANISO_2_TO_1 (2 << 21) +# define R300_TX_MAX_ANISO_4_TO_1 (4 << 21) +# define R300_TX_MAX_ANISO_8_TO_1 (6 << 21) +# define R300_TX_MAX_ANISO_16_TO_1 (8 << 21) +# define R300_TX_MAX_ANISO_MASK (14 << 21) + +#define R300_TX_FILTER1_0 0x4440 +# define R300_CHROMA_KEY_MODE_DISABLE 0 +# define R300_CHROMA_KEY_FORCE 1 +# define R300_CHROMA_KEY_BLEND 2 +# define R300_MC_ROUND_NORMAL (0<<2) +# define R300_MC_ROUND_MPEG4 (1<<2) +# define R300_LOD_BIAS_MASK 0x1fff +# define R300_EDGE_ANISO_EDGE_DIAG (0<<13) +# define R300_EDGE_ANISO_EDGE_ONLY (1<<13) +# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14) +# define R300_MC_COORD_TRUNCATE_MPEG (1<<14) +# define R300_TX_TRI_PERF_0_8 (0<<15) +# define R300_TX_TRI_PERF_1_8 (1<<15) +# define R300_TX_TRI_PERF_1_4 (2<<15) +# define R300_TX_TRI_PERF_3_8 (3<<15) +# define R300_ANISO_THRESHOLD_MASK (7<<17) + +#define R300_TX_SIZE_0 0x4480 +# define R300_TX_WIDTHMASK_SHIFT 0 +# define R300_TX_WIDTHMASK_MASK (2047 << 0) +# define R300_TX_HEIGHTMASK_SHIFT 11 +# define R300_TX_HEIGHTMASK_MASK (2047 << 11) +# define R300_TX_UNK23 (1 << 23) +# define R300_TX_MAX_MIP_LEVEL_SHIFT 26 +# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) +# define R300_TX_SIZE_PROJECTED (1<<30) +# define R300_TX_SIZE_TXPITCH_EN (1<<31) +#define R300_TX_FORMAT_0 0x44C0 + /* The interpretation of the format word by Wladimir van der Laan */ + /* The X, Y, Z and W refer to the layout of the components. + They are given meanings as R, G, B and Alpha by the swizzle + specification */ +# define R300_TX_FORMAT_X8 0x0 +# define R300_TX_FORMAT_X16 0x1 +# define R300_TX_FORMAT_Y4X4 0x2 +# define R300_TX_FORMAT_Y8X8 0x3 +# define R300_TX_FORMAT_Y16X16 0x4 +# define R300_TX_FORMAT_Z3Y3X2 0x5 +# define R300_TX_FORMAT_Z5Y6X5 0x6 +# define R300_TX_FORMAT_Z6Y5X5 0x7 +# define R300_TX_FORMAT_Z11Y11X10 0x8 +# define R300_TX_FORMAT_Z10Y11X11 0x9 +# define R300_TX_FORMAT_W4Z4Y4X4 0xA +# define R300_TX_FORMAT_W1Z5Y5X5 0xB +# define R300_TX_FORMAT_W8Z8Y8X8 0xC +# define R300_TX_FORMAT_W2Z10Y10X10 0xD +# define R300_TX_FORMAT_W16Z16Y16X16 0xE +# define R300_TX_FORMAT_DXT1 0xF +# define R300_TX_FORMAT_DXT3 0x10 +# define R300_TX_FORMAT_DXT5 0x11 +# define R300_TX_FORMAT_D3DMFT_CxV8U8 0x12 /* no swizzle */ +# define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */ +# define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */ +# define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */ + /* 0x16 - some 16 bit green format.. ?? */ +# define R300_TX_FORMAT_UNK25 (1 << 25) /* no swizzle */ +# define R300_TX_FORMAT_CUBIC_MAP (1 << 26) + + /* gap */ + /* Floating point formats */ + /* Note - hardware supports both 16 and 32 bit floating point */ +# define R300_TX_FORMAT_FL_I16 0x18 +# define R300_TX_FORMAT_FL_I16A16 0x19 +# define R300_TX_FORMAT_FL_R16G16B16A16 0x1A +# define R300_TX_FORMAT_FL_I32 0x1B +# define R300_TX_FORMAT_FL_I32A32 0x1C +# define R300_TX_FORMAT_FL_R32G32B32A32 0x1D + /* alpha modes, convenience mostly */ + /* if you have alpha, pick constant appropriate to the + number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */ +# define R300_TX_FORMAT_ALPHA_1CH 0x000 +# define R300_TX_FORMAT_ALPHA_2CH 0x200 +# define R300_TX_FORMAT_ALPHA_4CH 0x600 +# define R300_TX_FORMAT_ALPHA_NONE 0xA00 + /* Swizzling */ + /* constants */ +# define R300_TX_FORMAT_X 0 +# define R300_TX_FORMAT_Y 1 +# define R300_TX_FORMAT_Z 2 +# define R300_TX_FORMAT_W 3 +# define R300_TX_FORMAT_ZERO 4 +# define R300_TX_FORMAT_ONE 5 + /* 2.0*Z, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_Z 6 + /* 2.0*W, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_W 7 + +# define R300_TX_FORMAT_B_SHIFT 18 +# define R300_TX_FORMAT_G_SHIFT 15 +# define R300_TX_FORMAT_R_SHIFT 12 +# define R300_TX_FORMAT_A_SHIFT 9 + /* Convenience macro to take care of layout and swizzling */ +# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \ + ((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \ + | ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \ + | ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \ + | ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \ + | (R300_TX_FORMAT_##FMT) \ + ) + /* These can be ORed with result of R300_EASY_TX_FORMAT() + We don't really know what they do. Take values from a + constant color ? */ +# define R300_TX_FORMAT_CONST_X (1<<5) +# define R300_TX_FORMAT_CONST_Y (2<<5) +# define R300_TX_FORMAT_CONST_Z (4<<5) +# define R300_TX_FORMAT_CONST_W (8<<5) + +# define R300_TX_FORMAT_YUV_MODE 0x00800000 + +#define R300_TX_PITCH_0 0x4500 /* obvious missing in gap */ +#define R300_TX_OFFSET_0 0x4540 + /* BEGIN: Guess from R200 */ +# define R300_TXO_ENDIAN_NO_SWAP (0 << 0) +# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) +# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) +# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) +# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MICRO_TILE (1 << 3) +# define R300_TXO_OFFSET_MASK 0xffffffe0 +# define R300_TXO_OFFSET_SHIFT 5 + /* END: Guess from R200 */ + +/* 32 bit chroma key */ +#define R300_TX_CHROMA_KEY_0 0x4580 +/* ff00ff00 == { 0, 1.0, 0, 1.0 } */ +#define R300_TX_BORDER_COLOR_0 0x45C0 + +/* END: Texture specification */ + +/* BEGIN: Fragment program instruction set */ + +/* Fragment programs are written directly into register space. + * There are separate instruction streams for texture instructions and ALU + * instructions. + * In order to synchronize these streams, the program is divided into up + * to 4 nodes. Each node begins with a number of TEX operations, followed + * by a number of ALU operations. + * The first node can have zero TEX ops, all subsequent nodes must have at + * least + * one TEX ops. + * All nodes must have at least one ALU op. + * + * The index of the last node is stored in PFS_CNTL_0: A value of 0 means + * 1 node, a value of 3 means 4 nodes. + * The total amount of instructions is defined in PFS_CNTL_2. The offsets are + * offsets into the respective instruction streams, while *_END points to the + * last instruction relative to this offset. + */ +#define R300_PFS_CNTL_0 0x4600 +# define R300_PFS_CNTL_LAST_NODES_SHIFT 0 +# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0) +# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3) +#define R300_PFS_CNTL_1 0x4604 +/* There is an unshifted value here which has so far always been equal to the + * index of the highest used temporary register. + */ +#define R300_PFS_CNTL_2 0x4608 +# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0 +# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0) +# define R300_PFS_CNTL_ALU_END_SHIFT 6 +# define R300_PFS_CNTL_ALU_END_MASK (63 << 6) +# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 12 +# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 12) /* GUESS */ +# define R300_PFS_CNTL_TEX_END_SHIFT 18 +# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* GUESS */ + +/* gap */ + +/* Nodes are stored backwards. The last active node is always stored in + * PFS_NODE_3. + * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The + * first node is stored in NODE_2, the second node is stored in NODE_3. + * + * Offsets are relative to the master offset from PFS_CNTL_2. + */ +#define R300_PFS_NODE_0 0x4610 +#define R300_PFS_NODE_1 0x4614 +#define R300_PFS_NODE_2 0x4618 +#define R300_PFS_NODE_3 0x461C +# define R300_PFS_NODE_ALU_OFFSET_SHIFT 0 +# define R300_PFS_NODE_ALU_OFFSET_MASK (63 << 0) +# define R300_PFS_NODE_ALU_END_SHIFT 6 +# define R300_PFS_NODE_ALU_END_MASK (63 << 6) +# define R300_PFS_NODE_TEX_OFFSET_SHIFT 12 +# define R300_PFS_NODE_TEX_OFFSET_MASK (31 << 12) +# define R300_PFS_NODE_TEX_END_SHIFT 17 +# define R300_PFS_NODE_TEX_END_MASK (31 << 17) +# define R300_PFS_NODE_OUTPUT_COLOR (1 << 22) +# define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23) + +/* TEX + * As far as I can tell, texture instructions cannot write into output + * registers directly. A subsequent ALU instruction is always necessary, + * even if it's just MAD o0, r0, 1, 0 + */ +#define R300_PFS_TEXI_0 0x4620 +# define R300_FPITX_SRC_SHIFT 0 +# define R300_FPITX_SRC_MASK (31 << 0) + /* GUESS */ +# define R300_FPITX_SRC_CONST (1 << 5) +# define R300_FPITX_DST_SHIFT 6 +# define R300_FPITX_DST_MASK (31 << 6) +# define R300_FPITX_IMAGE_SHIFT 11 + /* GUESS based on layout and native limits */ +# define R300_FPITX_IMAGE_MASK (15 << 11) +/* Unsure if these are opcodes, or some kind of bitfield, but this is how + * they were set when I checked + */ +# define R300_FPITX_OPCODE_SHIFT 15 +# define R300_FPITX_OP_TEX 1 +# define R300_FPITX_OP_KIL 2 +# define R300_FPITX_OP_TXP 3 +# define R300_FPITX_OP_TXB 4 +# define R300_FPITX_OPCODE_MASK (7 << 15) + +/* ALU + * The ALU instructions register blocks are enumerated according to the order + * in which fglrx. I assume there is space for 64 instructions, since + * each block has space for a maximum of 64 DWORDs, and this matches reported + * native limits. + * + * The basic functional block seems to be one MAD for each color and alpha, + * and an adder that adds all components after the MUL. + * - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands + * - DP4: Use OUTC_DP4, OUTA_DP4 + * - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands + * - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands + * - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1 + * - CMP: If ARG2 < 0, return ARG1, else return ARG0 + * - FLR: use FRC+MAD + * - XPD: use MAD+MAD + * - SGE, SLT: use MAD+CMP + * - RSQ: use ABS modifier for argument + * - Use OUTC_REPL_ALPHA to write results of an alpha-only operation + * (e.g. RCP) into color register + * - apparently, there's no quick DST operation + * - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2" + * - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0" + * - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1" + * + * Operand selection + * First stage selects three sources from the available registers and + * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha). + * fglrx sorts the three source fields: Registers before constants, + * lower indices before higher indices; I do not know whether this is + * necessary. + * + * fglrx fills unused sources with "read constant 0" + * According to specs, you cannot select more than two different constants. + * + * Second stage selects the operands from the sources. This is defined in + * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants + * zero and one. + * Swizzling and negation happens in this stage, as well. + * + * Important: Color and alpha seem to be mostly separate, i.e. their sources + * selection appears to be fully independent (the register storage is probably + * physically split into a color and an alpha section). + * However (because of the apparent physical split), there is some interaction + * WRT swizzling. If, for example, you want to load an R component into an + * Alpha operand, this R component is taken from a *color* source, not from + * an alpha source. The corresponding register doesn't even have to appear in + * the alpha sources list. (I hope this all makes sense to you) + * + * Destination selection + * The destination register index is in FPI1 (color) and FPI3 (alpha) + * together with enable bits. + * There are separate enable bits for writing into temporary registers + * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * /DSTA_OUTPUT). You can write to both at once, or not write at all (the + * same index must be used for both). + * + * Note: There is a special form for LRP + * - Argument order is the same as in ARB_fragment_program. + * - Operation is MAD + * - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP + * - Set FPI0/FPI2_SPECIAL_LRP + * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD + */ +#define R300_PFS_INSTR1_0 0x46C0 +# define R300_FPI1_SRC0C_SHIFT 0 +# define R300_FPI1_SRC0C_MASK (31 << 0) +# define R300_FPI1_SRC0C_CONST (1 << 5) +# define R300_FPI1_SRC1C_SHIFT 6 +# define R300_FPI1_SRC1C_MASK (31 << 6) +# define R300_FPI1_SRC1C_CONST (1 << 11) +# define R300_FPI1_SRC2C_SHIFT 12 +# define R300_FPI1_SRC2C_MASK (31 << 12) +# define R300_FPI1_SRC2C_CONST (1 << 17) +# define R300_FPI1_SRC_MASK 0x0003ffff +# define R300_FPI1_DSTC_SHIFT 18 +# define R300_FPI1_DSTC_MASK (31 << 18) +# define R300_FPI1_DSTC_REG_MASK_SHIFT 23 +# define R300_FPI1_DSTC_REG_X (1 << 23) +# define R300_FPI1_DSTC_REG_Y (1 << 24) +# define R300_FPI1_DSTC_REG_Z (1 << 25) +# define R300_FPI1_DSTC_OUTPUT_MASK_SHIFT 26 +# define R300_FPI1_DSTC_OUTPUT_X (1 << 26) +# define R300_FPI1_DSTC_OUTPUT_Y (1 << 27) +# define R300_FPI1_DSTC_OUTPUT_Z (1 << 28) + +#define R300_PFS_INSTR3_0 0x47C0 +# define R300_FPI3_SRC0A_SHIFT 0 +# define R300_FPI3_SRC0A_MASK (31 << 0) +# define R300_FPI3_SRC0A_CONST (1 << 5) +# define R300_FPI3_SRC1A_SHIFT 6 +# define R300_FPI3_SRC1A_MASK (31 << 6) +# define R300_FPI3_SRC1A_CONST (1 << 11) +# define R300_FPI3_SRC2A_SHIFT 12 +# define R300_FPI3_SRC2A_MASK (31 << 12) +# define R300_FPI3_SRC2A_CONST (1 << 17) +# define R300_FPI3_SRC_MASK 0x0003ffff +# define R300_FPI3_DSTA_SHIFT 18 +# define R300_FPI3_DSTA_MASK (31 << 18) +# define R300_FPI3_DSTA_REG (1 << 23) +# define R300_FPI3_DSTA_OUTPUT (1 << 24) +# define R300_FPI3_DSTA_DEPTH (1 << 27) + +#define R300_PFS_INSTR0_0 0x48C0 +# define R300_FPI0_ARGC_SRC0C_XYZ 0 +# define R300_FPI0_ARGC_SRC0C_XXX 1 +# define R300_FPI0_ARGC_SRC0C_YYY 2 +# define R300_FPI0_ARGC_SRC0C_ZZZ 3 +# define R300_FPI0_ARGC_SRC1C_XYZ 4 +# define R300_FPI0_ARGC_SRC1C_XXX 5 +# define R300_FPI0_ARGC_SRC1C_YYY 6 +# define R300_FPI0_ARGC_SRC1C_ZZZ 7 +# define R300_FPI0_ARGC_SRC2C_XYZ 8 +# define R300_FPI0_ARGC_SRC2C_XXX 9 +# define R300_FPI0_ARGC_SRC2C_YYY 10 +# define R300_FPI0_ARGC_SRC2C_ZZZ 11 +# define R300_FPI0_ARGC_SRC0A 12 +# define R300_FPI0_ARGC_SRC1A 13 +# define R300_FPI0_ARGC_SRC2A 14 +# define R300_FPI0_ARGC_SRC1C_LRP 15 +# define R300_FPI0_ARGC_ZERO 20 +# define R300_FPI0_ARGC_ONE 21 + /* GUESS */ +# define R300_FPI0_ARGC_HALF 22 +# define R300_FPI0_ARGC_SRC0C_YZX 23 +# define R300_FPI0_ARGC_SRC1C_YZX 24 +# define R300_FPI0_ARGC_SRC2C_YZX 25 +# define R300_FPI0_ARGC_SRC0C_ZXY 26 +# define R300_FPI0_ARGC_SRC1C_ZXY 27 +# define R300_FPI0_ARGC_SRC2C_ZXY 28 +# define R300_FPI0_ARGC_SRC0CA_WZY 29 +# define R300_FPI0_ARGC_SRC1CA_WZY 30 +# define R300_FPI0_ARGC_SRC2CA_WZY 31 + +# define R300_FPI0_ARG0C_SHIFT 0 +# define R300_FPI0_ARG0C_MASK (31 << 0) +# define R300_FPI0_ARG0C_NEG (1 << 5) +# define R300_FPI0_ARG0C_ABS (1 << 6) +# define R300_FPI0_ARG1C_SHIFT 7 +# define R300_FPI0_ARG1C_MASK (31 << 7) +# define R300_FPI0_ARG1C_NEG (1 << 12) +# define R300_FPI0_ARG1C_ABS (1 << 13) +# define R300_FPI0_ARG2C_SHIFT 14 +# define R300_FPI0_ARG2C_MASK (31 << 14) +# define R300_FPI0_ARG2C_NEG (1 << 19) +# define R300_FPI0_ARG2C_ABS (1 << 20) +# define R300_FPI0_SPECIAL_LRP (1 << 21) +# define R300_FPI0_OUTC_MAD (0 << 23) +# define R300_FPI0_OUTC_DP3 (1 << 23) +# define R300_FPI0_OUTC_DP4 (2 << 23) +# define R300_FPI0_OUTC_MIN (4 << 23) +# define R300_FPI0_OUTC_MAX (5 << 23) +# define R300_FPI0_OUTC_CMPH (7 << 23) +# define R300_FPI0_OUTC_CMP (8 << 23) +# define R300_FPI0_OUTC_FRC (9 << 23) +# define R300_FPI0_OUTC_REPL_ALPHA (10 << 23) +# define R300_FPI0_OUTC_SAT (1 << 30) +# define R300_FPI0_INSERT_NOP (1 << 31) + +#define R300_PFS_INSTR2_0 0x49C0 +# define R300_FPI2_ARGA_SRC0C_X 0 +# define R300_FPI2_ARGA_SRC0C_Y 1 +# define R300_FPI2_ARGA_SRC0C_Z 2 +# define R300_FPI2_ARGA_SRC1C_X 3 +# define R300_FPI2_ARGA_SRC1C_Y 4 +# define R300_FPI2_ARGA_SRC1C_Z 5 +# define R300_FPI2_ARGA_SRC2C_X 6 +# define R300_FPI2_ARGA_SRC2C_Y 7 +# define R300_FPI2_ARGA_SRC2C_Z 8 +# define R300_FPI2_ARGA_SRC0A 9 +# define R300_FPI2_ARGA_SRC1A 10 +# define R300_FPI2_ARGA_SRC2A 11 +# define R300_FPI2_ARGA_SRC1A_LRP 15 +# define R300_FPI2_ARGA_ZERO 16 +# define R300_FPI2_ARGA_ONE 17 + /* GUESS */ +# define R300_FPI2_ARGA_HALF 18 +# define R300_FPI2_ARG0A_SHIFT 0 +# define R300_FPI2_ARG0A_MASK (31 << 0) +# define R300_FPI2_ARG0A_NEG (1 << 5) + /* GUESS */ +# define R300_FPI2_ARG0A_ABS (1 << 6) +# define R300_FPI2_ARG1A_SHIFT 7 +# define R300_FPI2_ARG1A_MASK (31 << 7) +# define R300_FPI2_ARG1A_NEG (1 << 12) + /* GUESS */ +# define R300_FPI2_ARG1A_ABS (1 << 13) +# define R300_FPI2_ARG2A_SHIFT 14 +# define R300_FPI2_ARG2A_MASK (31 << 14) +# define R300_FPI2_ARG2A_NEG (1 << 19) + /* GUESS */ +# define R300_FPI2_ARG2A_ABS (1 << 20) +# define R300_FPI2_SPECIAL_LRP (1 << 21) +# define R300_FPI2_OUTA_MAD (0 << 23) +# define R300_FPI2_OUTA_DP4 (1 << 23) +# define R300_FPI2_OUTA_MIN (2 << 23) +# define R300_FPI2_OUTA_MAX (3 << 23) +# define R300_FPI2_OUTA_CMP (6 << 23) +# define R300_FPI2_OUTA_FRC (7 << 23) +# define R300_FPI2_OUTA_EX2 (8 << 23) +# define R300_FPI2_OUTA_LG2 (9 << 23) +# define R300_FPI2_OUTA_RCP (10 << 23) +# define R300_FPI2_OUTA_RSQ (11 << 23) +# define R300_FPI2_OUTA_SAT (1 << 30) +# define R300_FPI2_UNKNOWN_31 (1 << 31) +/* END: Fragment program instruction set */ + +/* Fog state and color */ +#define R300_RE_FOG_STATE 0x4BC0 +# define R300_FOG_ENABLE (1 << 0) +# define R300_FOG_MODE_LINEAR (0 << 1) +# define R300_FOG_MODE_EXP (1 << 1) +# define R300_FOG_MODE_EXP2 (2 << 1) +# define R300_FOG_MODE_MASK (3 << 1) +#define R300_FOG_COLOR_R 0x4BC8 +#define R300_FOG_COLOR_G 0x4BCC +#define R300_FOG_COLOR_B 0x4BD0 + +#define R300_PP_ALPHA_TEST 0x4BD4 +# define R300_REF_ALPHA_MASK 0x000000ff +# define R300_ALPHA_TEST_FAIL (0 << 8) +# define R300_ALPHA_TEST_LESS (1 << 8) +# define R300_ALPHA_TEST_LEQUAL (3 << 8) +# define R300_ALPHA_TEST_EQUAL (2 << 8) +# define R300_ALPHA_TEST_GEQUAL (6 << 8) +# define R300_ALPHA_TEST_GREATER (4 << 8) +# define R300_ALPHA_TEST_NEQUAL (5 << 8) +# define R300_ALPHA_TEST_PASS (7 << 8) +# define R300_ALPHA_TEST_OP_MASK (7 << 8) +# define R300_ALPHA_TEST_ENABLE (1 << 11) + +/* gap */ + +/* Fragment program parameters in 7.16 floating point */ +#define R300_PFS_PARAM_0_X 0x4C00 +#define R300_PFS_PARAM_0_Y 0x4C04 +#define R300_PFS_PARAM_0_Z 0x4C08 +#define R300_PFS_PARAM_0_W 0x4C0C +/* GUESS: PARAM_31 is last, based on native limits reported by fglrx */ +#define R300_PFS_PARAM_31_X 0x4DF0 +#define R300_PFS_PARAM_31_Y 0x4DF4 +#define R300_PFS_PARAM_31_Z 0x4DF8 +#define R300_PFS_PARAM_31_W 0x4DFC + +/* Notes: + * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in + * the application + * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND + * are set to the same + * function (both registers are always set up completely in any case) + * - Most blend flags are simply copied from R200 and not tested yet + */ +#define R300_RB3D_CBLEND 0x4E04 +#define R300_RB3D_ABLEND 0x4E08 +/* the following only appear in CBLEND */ +# define R300_BLEND_ENABLE (1 << 0) +# define R300_BLEND_UNKNOWN (3 << 1) +# define R300_BLEND_NO_SEPARATE (1 << 3) +/* the following are shared between CBLEND and ABLEND */ +# define R300_FCN_MASK (3 << 12) +# define R300_COMB_FCN_ADD_CLAMP (0 << 12) +# define R300_COMB_FCN_ADD_NOCLAMP (1 << 12) +# define R300_COMB_FCN_SUB_CLAMP (2 << 12) +# define R300_COMB_FCN_SUB_NOCLAMP (3 << 12) +# define R300_COMB_FCN_MIN (4 << 12) +# define R300_COMB_FCN_MAX (5 << 12) +# define R300_COMB_FCN_RSUB_CLAMP (6 << 12) +# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12) +# define R300_BLEND_GL_ZERO (32) +# define R300_BLEND_GL_ONE (33) +# define R300_BLEND_GL_SRC_COLOR (34) +# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35) +# define R300_BLEND_GL_DST_COLOR (36) +# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37) +# define R300_BLEND_GL_SRC_ALPHA (38) +# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39) +# define R300_BLEND_GL_DST_ALPHA (40) +# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41) +# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42) +# define R300_BLEND_GL_CONST_COLOR (43) +# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44) +# define R300_BLEND_GL_CONST_ALPHA (45) +# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46) +# define R300_BLEND_MASK (63) +# define R300_SRC_BLEND_SHIFT (16) +# define R300_DST_BLEND_SHIFT (24) +#define R300_RB3D_BLEND_COLOR 0x4E10 +#define R300_RB3D_COLORMASK 0x4E0C +# define R300_COLORMASK0_B (1<<0) +# define R300_COLORMASK0_G (1<<1) +# define R300_COLORMASK0_R (1<<2) +# define R300_COLORMASK0_A (1<<3) + +/* gap */ + +#define R300_RB3D_COLOROFFSET0 0x4E28 +# define R300_COLOROFFSET_MASK 0xFFFFFFF0 /* GUESS */ +#define R300_RB3D_COLOROFFSET1 0x4E2C /* GUESS */ +#define R300_RB3D_COLOROFFSET2 0x4E30 /* GUESS */ +#define R300_RB3D_COLOROFFSET3 0x4E34 /* GUESS */ + +/* gap */ + +/* Bit 16: Larger tiles + * Bit 17: 4x2 tiles + * Bit 18: Extremely weird tile like, but some pixels duplicated? + */ +#define R300_RB3D_COLORPITCH0 0x4E38 +# define R300_COLORPITCH_MASK 0x00001FF8 /* GUESS */ +# define R300_COLOR_TILE_ENABLE (1 << 16) /* GUESS */ +# define R300_COLOR_MICROTILE_ENABLE (1 << 17) /* GUESS */ +# define R300_COLOR_ENDIAN_NO_SWAP (0 << 18) /* GUESS */ +# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 18) /* GUESS */ +# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 18) /* GUESS */ +# define R300_COLOR_FORMAT_RGB565 (2 << 22) +# define R300_COLOR_FORMAT_ARGB8888 (3 << 22) +#define R300_RB3D_COLORPITCH1 0x4E3C /* GUESS */ +#define R300_RB3D_COLORPITCH2 0x4E40 /* GUESS */ +#define R300_RB3D_COLORPITCH3 0x4E44 /* GUESS */ + +/* gap */ + +/* Guess by Vladimir. + * Set to 0A before 3D operations, set to 02 afterwards. + */ +/*#define R300_RB3D_DSTCACHE_CTLSTAT 0x4E4C*/ +# define R300_RB3D_DSTCACHE_UNKNOWN_02 0x00000002 +# define R300_RB3D_DSTCACHE_UNKNOWN_0A 0x0000000A + +/* gap */ +/* There seems to be no "write only" setting, so use Z-test = ALWAYS + * for this. + * Bit (1<<8) is the "test" bit. so plain write is 6 - vd + */ +#define R300_ZB_CNTL 0x4F00 +# define R300_STENCIL_ENABLE (1 << 0) +# define R300_Z_ENABLE (1 << 1) +# define R300_Z_WRITE_ENABLE (1 << 2) +# define R300_Z_SIGNED_COMPARE (1 << 3) +# define R300_STENCIL_FRONT_BACK (1 << 4) + +#define R300_ZB_ZSTENCILCNTL 0x4f04 + /* functions */ +# define R300_ZS_NEVER 0 +# define R300_ZS_LESS 1 +# define R300_ZS_LEQUAL 2 +# define R300_ZS_EQUAL 3 +# define R300_ZS_GEQUAL 4 +# define R300_ZS_GREATER 5 +# define R300_ZS_NOTEQUAL 6 +# define R300_ZS_ALWAYS 7 +# define R300_ZS_MASK 7 + /* operations */ +# define R300_ZS_KEEP 0 +# define R300_ZS_ZERO 1 +# define R300_ZS_REPLACE 2 +# define R300_ZS_INCR 3 +# define R300_ZS_DECR 4 +# define R300_ZS_INVERT 5 +# define R300_ZS_INCR_WRAP 6 +# define R300_ZS_DECR_WRAP 7 +# define R300_Z_FUNC_SHIFT 0 + /* front and back refer to operations done for front + and back faces, i.e. separate stencil function support */ +# define R300_S_FRONT_FUNC_SHIFT 3 +# define R300_S_FRONT_SFAIL_OP_SHIFT 6 +# define R300_S_FRONT_ZPASS_OP_SHIFT 9 +# define R300_S_FRONT_ZFAIL_OP_SHIFT 12 +# define R300_S_BACK_FUNC_SHIFT 15 +# define R300_S_BACK_SFAIL_OP_SHIFT 18 +# define R300_S_BACK_ZPASS_OP_SHIFT 21 +# define R300_S_BACK_ZFAIL_OP_SHIFT 24 + +#define R300_ZB_STENCILREFMASK 0x4f08 +# define R300_STENCILREF_SHIFT 0 +# define R300_STENCILREF_MASK 0x000000ff +# define R300_STENCILMASK_SHIFT 8 +# define R300_STENCILMASK_MASK 0x0000ff00 +# define R300_STENCILWRITEMASK_SHIFT 16 +# define R300_STENCILWRITEMASK_MASK 0x00ff0000 + +/* gap */ + +#define R300_ZB_FORMAT 0x4f10 +# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0) +# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0) +# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0) +/* reserved up to (15 << 0) */ +# define R300_INVERT_13E3_LEADING_ONES (0 << 4) +# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4) + +#define R300_ZB_ZTOP 0x4F14 +# define R300_ZTOP_DISABLE (0 << 0) +# define R300_ZTOP_ENABLE (1 << 0) + +/* gap */ + +#define R300_ZB_ZCACHE_CTLSTAT 0x4f18 +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31) + +#define R300_ZB_BW_CNTL 0x4f1c +# define R300_HIZ_DISABLE (0 << 0) +# define R300_HIZ_ENABLE (1 << 0) +# define R300_HIZ_MIN (0 << 1) +# define R300_HIZ_MAX (1 << 1) +# define R300_FAST_FILL_DISABLE (0 << 2) +# define R300_FAST_FILL_ENABLE (1 << 2) +# define R300_RD_COMP_DISABLE (0 << 3) +# define R300_RD_COMP_ENABLE (1 << 3) +# define R300_WR_COMP_DISABLE (0 << 4) +# define R300_WR_COMP_ENABLE (1 << 4) +# define R300_ZB_CB_CLEAR_RMW (0 << 5) +# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5) +# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6) +# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6) + +# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7) +# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7) +# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8) +# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8) + +# define R500_BMASK_ENABLE (0 << 10) +# define R500_BMASK_DISABLE (1 << 10) +# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11) +# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11) +# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12) +# define R500_HIZ_FP_EXP_BITS_1 (1 << 12) +# define R500_HIZ_FP_EXP_BITS_2 (2 << 12) +# define R500_HIZ_FP_EXP_BITS_3 (3 << 12) +# define R500_HIZ_FP_EXP_BITS_4 (4 << 12) +# define R500_HIZ_FP_EXP_BITS_5 (5 << 12) +# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15) +# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15) +# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16) +# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16) +# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17) +# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17) +# define R500_PEQ_PACKING_DISABLE (0 << 18) +# define R500_PEQ_PACKING_ENABLE (1 << 18) +# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18) +# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18) + + +/* gap */ + +/* Z Buffer Address Offset. + * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles. + */ +#define R300_ZB_DEPTHOFFSET 0x4f20 + +/* Z Buffer Pitch and Endian Control */ +#define R300_ZB_DEPTHPITCH 0x4f24 +# define R300_DEPTHPITCH_MASK 0x00003FFC +# define R300_DEPTHMACROTILE_DISABLE (0 << 16) +# define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMICROTILE_LINEAR (0 << 17) +# define R300_DEPTHMICROTILE_TILED (1 << 17) +# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHENDIAN_NO_SWAP (0 << 18) +# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) +# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) +# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18) + +/* Z Buffer Clear Value */ +#define R300_ZB_DEPTHCLEARVALUE 0x4f28 + +#define R300_ZB_ZMASK_OFFSET 0x4f30 +#define R300_ZB_ZMASK_PITCH 0x4f34 +#define R300_ZB_ZMASK_WRINDEX 0x4f38 +#define R300_ZB_ZMASK_DWORD 0x4f3c +#define R300_ZB_ZMASK_RDINDEX 0x4f40 + +/* Hierarchical Z Memory Offset */ +#define R300_ZB_HIZ_OFFSET 0x4f44 + +/* Hierarchical Z Write Index */ +#define R300_ZB_HIZ_WRINDEX 0x4f48 + +/* Hierarchical Z Data */ +#define R300_ZB_HIZ_DWORD 0x4f4c + +/* Hierarchical Z Read Index */ +#define R300_ZB_HIZ_RDINDEX 0x4f50 + +/* Hierarchical Z Pitch */ +#define R300_ZB_HIZ_PITCH 0x4f54 + +/* Z Buffer Z Pass Counter Data */ +#define R300_ZB_ZPASS_DATA 0x4f58 + +/* Z Buffer Z Pass Counter Address */ +#define R300_ZB_ZPASS_ADDR 0x4f5c + +/* Depth buffer X and Y coordinate offset */ +#define R300_ZB_DEPTHXY_OFFSET 0x4f60 +# define R300_DEPTHX_OFFSET_SHIFT 1 +# define R300_DEPTHX_OFFSET_MASK 0x000007FE +# define R300_DEPTHY_OFFSET_SHIFT 17 +# define R300_DEPTHY_OFFSET_MASK 0x07FE0000 + +/* Sets the fifo sizes */ +#define R500_ZB_FIFO_SIZE 0x4fd0 +# define R500_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0) + +/* Stencil Reference Value and Mask for backfacing quads */ +/* R300_ZB_STENCILREFMASK handles front face */ +#define R500_ZB_STENCILREFMASK_BF 0x4fd4 +# define R500_STENCILREF_SHIFT 0 +# define R500_STENCILREF_MASK 0x000000ff +# define R500_STENCILMASK_SHIFT 8 +# define R500_STENCILMASK_MASK 0x0000ff00 +# define R500_STENCILWRITEMASK_SHIFT 16 +# define R500_STENCILWRITEMASK_MASK 0x00ff0000 + +/* BEGIN: Vertex program instruction set */ + +/* Every instruction is four dwords long: + * DWORD 0: output and opcode + * DWORD 1: first argument + * DWORD 2: second argument + * DWORD 3: third argument + * + * Notes: + * - ABS r, a is implemented as MAX r, a, -a + * - MOV is implemented as ADD to zero + * - XPD is implemented as MUL + MAD + * - FLR is implemented as FRC + ADD + * - apparently, fglrx tries to schedule instructions so that there is at + * least one instruction between the write to a temporary and the first + * read from said temporary; however, violations of this scheduling are + * allowed + * - register indices seem to be unrelated with OpenGL aliasing to + * conventional state + * - only one attribute and one parameter can be loaded at a time; however, + * the same attribute/parameter can be used for more than one argument + * - the second software argument for POW is the third hardware argument + * (no idea why) + * - MAD with only temporaries as input seems to use VPI_OUT_SELECT_MAD_2 + * + * There is some magic surrounding LIT: + * The single argument is replicated across all three inputs, but swizzled: + * First argument: xyzy + * Second argument: xyzx + * Third argument: xyzw + * Whenever the result is used later in the fragment program, fglrx forces + * x and w to be 1.0 in the input selection; I don't know whether this is + * strictly necessary + */ +#define R300_VPI_OUT_OP_DOT (1 << 0) +#define R300_VPI_OUT_OP_MUL (2 << 0) +#define R300_VPI_OUT_OP_ADD (3 << 0) +#define R300_VPI_OUT_OP_MAD (4 << 0) +#define R300_VPI_OUT_OP_DST (5 << 0) +#define R300_VPI_OUT_OP_FRC (6 << 0) +#define R300_VPI_OUT_OP_MAX (7 << 0) +#define R300_VPI_OUT_OP_MIN (8 << 0) +#define R300_VPI_OUT_OP_SGE (9 << 0) +#define R300_VPI_OUT_OP_SLT (10 << 0) + /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, vector(scalar, vector) */ +#define R300_VPI_OUT_OP_UNK12 (12 << 0) +#define R300_VPI_OUT_OP_ARL (13 << 0) +#define R300_VPI_OUT_OP_EXP (65 << 0) +#define R300_VPI_OUT_OP_LOG (66 << 0) + /* Used in fog computations, scalar(scalar) */ +#define R300_VPI_OUT_OP_UNK67 (67 << 0) +#define R300_VPI_OUT_OP_LIT (68 << 0) +#define R300_VPI_OUT_OP_POW (69 << 0) +#define R300_VPI_OUT_OP_RCP (70 << 0) +#define R300_VPI_OUT_OP_RSQ (72 << 0) + /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, scalar(scalar) */ +#define R300_VPI_OUT_OP_UNK73 (73 << 0) +#define R300_VPI_OUT_OP_EX2 (75 << 0) +#define R300_VPI_OUT_OP_LG2 (76 << 0) +#define R300_VPI_OUT_OP_MAD_2 (128 << 0) + /* all temps, vector(scalar, vector, vector) */ +#define R300_VPI_OUT_OP_UNK129 (129 << 0) + +#define R300_VPI_OUT_REG_CLASS_TEMPORARY (0 << 8) +#define R300_VPI_OUT_REG_CLASS_ADDR (1 << 8) +#define R300_VPI_OUT_REG_CLASS_RESULT (2 << 8) +#define R300_VPI_OUT_REG_CLASS_MASK (31 << 8) + +#define R300_VPI_OUT_REG_INDEX_SHIFT 13 + /* GUESS based on fglrx native limits */ +#define R300_VPI_OUT_REG_INDEX_MASK (31 << 13) + +#define R300_VPI_OUT_WRITE_X (1 << 20) +#define R300_VPI_OUT_WRITE_Y (1 << 21) +#define R300_VPI_OUT_WRITE_Z (1 << 22) +#define R300_VPI_OUT_WRITE_W (1 << 23) + +#define R300_VPI_IN_REG_CLASS_TEMPORARY (0 << 0) +#define R300_VPI_IN_REG_CLASS_ATTRIBUTE (1 << 0) +#define R300_VPI_IN_REG_CLASS_PARAMETER (2 << 0) +#define R300_VPI_IN_REG_CLASS_NONE (9 << 0) +#define R300_VPI_IN_REG_CLASS_MASK (31 << 0) + +#define R300_VPI_IN_REG_INDEX_SHIFT 5 + /* GUESS based on fglrx native limits */ +#define R300_VPI_IN_REG_INDEX_MASK (255 << 5) + +/* The R300 can select components from the input register arbitrarily. + * Use the following constants, shifted by the component shift you + * want to select + */ +#define R300_VPI_IN_SELECT_X 0 +#define R300_VPI_IN_SELECT_Y 1 +#define R300_VPI_IN_SELECT_Z 2 +#define R300_VPI_IN_SELECT_W 3 +#define R300_VPI_IN_SELECT_ZERO 4 +#define R300_VPI_IN_SELECT_ONE 5 +#define R300_VPI_IN_SELECT_MASK 7 + +#define R300_VPI_IN_X_SHIFT 13 +#define R300_VPI_IN_Y_SHIFT 16 +#define R300_VPI_IN_Z_SHIFT 19 +#define R300_VPI_IN_W_SHIFT 22 + +#define R300_VPI_IN_NEG_X (1 << 25) +#define R300_VPI_IN_NEG_Y (1 << 26) +#define R300_VPI_IN_NEG_Z (1 << 27) +#define R300_VPI_IN_NEG_W (1 << 28) +/* END: Vertex program instruction set */ + +/* BEGIN: Packet 3 commands */ + +/* A primitive emission dword. */ +#define R300_PRIM_TYPE_NONE (0 << 0) +#define R300_PRIM_TYPE_POINT (1 << 0) +#define R300_PRIM_TYPE_LINE (2 << 0) +#define R300_PRIM_TYPE_LINE_STRIP (3 << 0) +#define R300_PRIM_TYPE_TRI_LIST (4 << 0) +#define R300_PRIM_TYPE_TRI_FAN (5 << 0) +#define R300_PRIM_TYPE_TRI_STRIP (6 << 0) +#define R300_PRIM_TYPE_TRI_TYPE2 (7 << 0) +#define R300_PRIM_TYPE_RECT_LIST (8 << 0) +#define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0) +#define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0) + /* GUESS (based on r200) */ +#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0) +#define R300_PRIM_TYPE_LINE_LOOP (12 << 0) +#define R300_PRIM_TYPE_QUADS (13 << 0) +#define R300_PRIM_TYPE_QUAD_STRIP (14 << 0) +#define R300_PRIM_TYPE_POLYGON (15 << 0) +#define R300_PRIM_TYPE_MASK 0xF +#define R300_PRIM_WALK_IND (1 << 4) +#define R300_PRIM_WALK_LIST (2 << 4) +#define R300_PRIM_WALK_RING (3 << 4) +#define R300_PRIM_WALK_MASK (3 << 4) + /* GUESS (based on r200) */ +#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6) +#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6) +#define R300_PRIM_NUM_VERTICES_SHIFT 16 +#define R300_PRIM_NUM_VERTICES_MASK 0xffff + +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. + * Two parameter dwords: + * 0. The first parameter appears to be always 0 + * 1. The second parameter is a standard primitive emission dword. + */ +#define R300_PACKET3_3D_DRAW_VBUF 0x00002800 + +/* Specify the full set of vertex arrays as (address, stride). + * The first parameter is the number of vertex arrays specified. + * The rest of the command is a variable length list of blocks, where + * each block is three dwords long and specifies two arrays. + * The first dword of a block is split into two words, the lower significant + * word refers to the first array, the more significant word to the second + * array in the block. + * The low byte of each word contains the size of an array entry in dwords, + * the high byte contains the stride of the array. + * The second dword of a block contains the pointer to the first array, + * the third dword of a block contains the pointer to the second array. + * Note that if the total number of arrays is odd, the third dword of + * the last block is omitted. + */ +#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 + +#define R300_PACKET3_INDX_BUFFER 0x00003300 +# define R300_EB_UNK1_SHIFT 24 +# define R300_EB_UNK1 (0x80<<24) +# define R300_EB_UNK2 0x0810 +#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400 +#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600 + +/* END: Packet 3 commands */ + + +/* Color formats for 2d packets + */ +#define R300_CP_COLOR_FORMAT_CI8 2 +#define R300_CP_COLOR_FORMAT_ARGB1555 3 +#define R300_CP_COLOR_FORMAT_RGB565 4 +#define R300_CP_COLOR_FORMAT_ARGB8888 6 +#define R300_CP_COLOR_FORMAT_RGB332 7 +#define R300_CP_COLOR_FORMAT_RGB8 9 +#define R300_CP_COLOR_FORMAT_ARGB4444 15 + +/* + * CP type-3 packets + */ +#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00 + +#define R500_VAP_INDEX_OFFSET 0x208c + +#define R500_GA_US_VECTOR_INDEX 0x4250 +#define R500_GA_US_VECTOR_DATA 0x4254 + +#define R500_RS_IP_0 0x4074 +#define R500_RS_INST_0 0x4320 + +#define R500_US_CONFIG 0x4600 + +#define R500_US_FC_CTRL 0x4624 +#define R500_US_CODE_ADDR 0x4630 + +#define R500_RB3D_COLOR_CLEAR_VALUE_AR 0x46c0 +#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8 + +#endif /* _R300_REG_H */ diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c new file mode 100644 index 00000000000..f0de81a5689 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_cp.c @@ -0,0 +1,1773 @@ +/* radeon_cp.c -- CP support for Radeon -*- linux-c -*- */ +/* + * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Fremont, California. + * Copyright 2007 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + */ + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" +#include "r300_reg.h" + +#include "radeon_microcode.h" + +#define RADEON_FIFO_DEBUG 0 + +static int radeon_do_cleanup_cp(struct drm_device * dev); + +static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr) +{ + u32 ret; + RADEON_WRITE(R520_MC_IND_INDEX, 0x7f0000 | (addr & 0xff)); + ret = RADEON_READ(R520_MC_IND_DATA); + RADEON_WRITE(R520_MC_IND_INDEX, 0); + return ret; +} + +static u32 RS480_READ_MCIND(drm_radeon_private_t *dev_priv, int addr) +{ + u32 ret; + RADEON_WRITE(RS480_NB_MC_INDEX, addr & 0xff); + ret = RADEON_READ(RS480_NB_MC_DATA); + RADEON_WRITE(RS480_NB_MC_INDEX, 0xff); + return ret; +} + +static u32 RS690_READ_MCIND(drm_radeon_private_t *dev_priv, int addr) +{ + u32 ret; + RADEON_WRITE(RS690_MC_INDEX, (addr & RS690_MC_INDEX_MASK)); + ret = RADEON_READ(RS690_MC_DATA); + RADEON_WRITE(RS690_MC_INDEX, RS690_MC_INDEX_MASK); + return ret; +} + +static u32 IGP_READ_MCIND(drm_radeon_private_t *dev_priv, int addr) +{ + if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) + return RS690_READ_MCIND(dev_priv, addr); + else + return RS480_READ_MCIND(dev_priv, addr); +} + +u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv) +{ + + if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV515) + return R500_READ_MCIND(dev_priv, RV515_MC_FB_LOCATION); + else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) + return RS690_READ_MCIND(dev_priv, RS690_MC_FB_LOCATION); + else if ((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_RV515) + return R500_READ_MCIND(dev_priv, R520_MC_FB_LOCATION); + else + return RADEON_READ(RADEON_MC_FB_LOCATION); +} + +static void radeon_write_fb_location(drm_radeon_private_t *dev_priv, u32 fb_loc) +{ + if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV515) + R500_WRITE_MCIND(RV515_MC_FB_LOCATION, fb_loc); + else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) + RS690_WRITE_MCIND(RS690_MC_FB_LOCATION, fb_loc); + else if ((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_RV515) + R500_WRITE_MCIND(R520_MC_FB_LOCATION, fb_loc); + else + RADEON_WRITE(RADEON_MC_FB_LOCATION, fb_loc); +} + +static void radeon_write_agp_location(drm_radeon_private_t *dev_priv, u32 agp_loc) +{ + if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV515) + R500_WRITE_MCIND(RV515_MC_AGP_LOCATION, agp_loc); + else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) + RS690_WRITE_MCIND(RS690_MC_AGP_LOCATION, agp_loc); + else if ((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_RV515) + R500_WRITE_MCIND(R520_MC_AGP_LOCATION, agp_loc); + else + RADEON_WRITE(RADEON_MC_AGP_LOCATION, agp_loc); +} + +static void radeon_write_agp_base(drm_radeon_private_t *dev_priv, u64 agp_base) +{ + u32 agp_base_hi = upper_32_bits(agp_base); + u32 agp_base_lo = agp_base & 0xffffffff; + + if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV515) { + R500_WRITE_MCIND(RV515_MC_AGP_BASE, agp_base_lo); + R500_WRITE_MCIND(RV515_MC_AGP_BASE_2, agp_base_hi); + } else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) { + RS690_WRITE_MCIND(RS690_MC_AGP_BASE, agp_base_lo); + RS690_WRITE_MCIND(RS690_MC_AGP_BASE_2, agp_base_hi); + } else if ((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_RV515) { + R500_WRITE_MCIND(R520_MC_AGP_BASE, agp_base_lo); + R500_WRITE_MCIND(R520_MC_AGP_BASE_2, agp_base_hi); + } else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS480) { + RADEON_WRITE(RADEON_AGP_BASE, agp_base_lo); + RADEON_WRITE(RS480_AGP_BASE_2, 0); + } else { + RADEON_WRITE(RADEON_AGP_BASE, agp_base_lo); + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R200) + RADEON_WRITE(RADEON_AGP_BASE_2, agp_base_hi); + } +} + +static int RADEON_READ_PLL(struct drm_device * dev, int addr) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + RADEON_WRITE8(RADEON_CLOCK_CNTL_INDEX, addr & 0x1f); + return RADEON_READ(RADEON_CLOCK_CNTL_DATA); +} + +static u32 RADEON_READ_PCIE(drm_radeon_private_t *dev_priv, int addr) +{ + RADEON_WRITE8(RADEON_PCIE_INDEX, addr & 0xff); + return RADEON_READ(RADEON_PCIE_DATA); +} + +#if RADEON_FIFO_DEBUG +static void radeon_status(drm_radeon_private_t * dev_priv) +{ + printk("%s:\n", __func__); + printk("RBBM_STATUS = 0x%08x\n", + (unsigned int)RADEON_READ(RADEON_RBBM_STATUS)); + printk("CP_RB_RTPR = 0x%08x\n", + (unsigned int)RADEON_READ(RADEON_CP_RB_RPTR)); + printk("CP_RB_WTPR = 0x%08x\n", + (unsigned int)RADEON_READ(RADEON_CP_RB_WPTR)); + printk("AIC_CNTL = 0x%08x\n", + (unsigned int)RADEON_READ(RADEON_AIC_CNTL)); + printk("AIC_STAT = 0x%08x\n", + (unsigned int)RADEON_READ(RADEON_AIC_STAT)); + printk("AIC_PT_BASE = 0x%08x\n", + (unsigned int)RADEON_READ(RADEON_AIC_PT_BASE)); + printk("TLB_ADDR = 0x%08x\n", + (unsigned int)RADEON_READ(RADEON_AIC_TLB_ADDR)); + printk("TLB_DATA = 0x%08x\n", + (unsigned int)RADEON_READ(RADEON_AIC_TLB_DATA)); +} +#endif + +/* ================================================================ + * Engine, FIFO control + */ + +static int radeon_do_pixcache_flush(drm_radeon_private_t * dev_priv) +{ + u32 tmp; + int i; + + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + + if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { + tmp = RADEON_READ(RADEON_RB3D_DSTCACHE_CTLSTAT); + tmp |= RADEON_RB3D_DC_FLUSH_ALL; + RADEON_WRITE(RADEON_RB3D_DSTCACHE_CTLSTAT, tmp); + + for (i = 0; i < dev_priv->usec_timeout; i++) { + if (!(RADEON_READ(RADEON_RB3D_DSTCACHE_CTLSTAT) + & RADEON_RB3D_DC_BUSY)) { + return 0; + } + DRM_UDELAY(1); + } + } else { + /* 3D */ + tmp = RADEON_READ(R300_RB3D_DSTCACHE_CTLSTAT); + tmp |= RADEON_RB3D_DC_FLUSH_ALL; + RADEON_WRITE(R300_RB3D_DSTCACHE_CTLSTAT, tmp); + + /* 2D */ + tmp = RADEON_READ(R300_DSTCACHE_CTLSTAT); + tmp |= RADEON_RB3D_DC_FLUSH_ALL; + RADEON_WRITE(R300_DSTCACHE_CTLSTAT, tmp); + + for (i = 0; i < dev_priv->usec_timeout; i++) { + if (!(RADEON_READ(R300_DSTCACHE_CTLSTAT) + & RADEON_RB3D_DC_BUSY)) { + return 0; + } + DRM_UDELAY(1); + } + } + +#if RADEON_FIFO_DEBUG + DRM_ERROR("failed!\n"); + radeon_status(dev_priv); +#endif + return -EBUSY; +} + +static int radeon_do_wait_for_fifo(drm_radeon_private_t * dev_priv, int entries) +{ + int i; + + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + + for (i = 0; i < dev_priv->usec_timeout; i++) { + int slots = (RADEON_READ(RADEON_RBBM_STATUS) + & RADEON_RBBM_FIFOCNT_MASK); + if (slots >= entries) + return 0; + DRM_UDELAY(1); + } + +#if RADEON_FIFO_DEBUG + DRM_ERROR("failed!\n"); + radeon_status(dev_priv); +#endif + return -EBUSY; +} + +static int radeon_do_wait_for_idle(drm_radeon_private_t * dev_priv) +{ + int i, ret; + + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + + ret = radeon_do_wait_for_fifo(dev_priv, 64); + if (ret) + return ret; + + for (i = 0; i < dev_priv->usec_timeout; i++) { + if (!(RADEON_READ(RADEON_RBBM_STATUS) + & RADEON_RBBM_ACTIVE)) { + radeon_do_pixcache_flush(dev_priv); + return 0; + } + DRM_UDELAY(1); + } + +#if RADEON_FIFO_DEBUG + DRM_ERROR("failed!\n"); + radeon_status(dev_priv); +#endif + return -EBUSY; +} + +static void radeon_init_pipes(drm_radeon_private_t *dev_priv) +{ + uint32_t gb_tile_config, gb_pipe_sel = 0; + + /* RS4xx/RS6xx/R4xx/R5xx */ + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R420) { + gb_pipe_sel = RADEON_READ(R400_GB_PIPE_SELECT); + dev_priv->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1; + } else { + /* R3xx */ + if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R300) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R350)) { + dev_priv->num_gb_pipes = 2; + } else { + /* R3Vxx */ + dev_priv->num_gb_pipes = 1; + } + } + DRM_INFO("Num pipes: %d\n", dev_priv->num_gb_pipes); + + gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 /*| R300_SUBPIXEL_1_16*/); + + switch (dev_priv->num_gb_pipes) { + case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break; + case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break; + case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break; + default: + case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break; + } + + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) { + RADEON_WRITE_PLL(R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4)); + RADEON_WRITE(R500_SU_REG_DEST, ((1 << dev_priv->num_gb_pipes) - 1)); + } + RADEON_WRITE(R300_GB_TILE_CONFIG, gb_tile_config); + radeon_do_wait_for_idle(dev_priv); + RADEON_WRITE(R300_DST_PIPE_CONFIG, RADEON_READ(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG); + RADEON_WRITE(R300_RB2D_DSTCACHE_MODE, (RADEON_READ(R300_RB2D_DSTCACHE_MODE) | + R300_DC_AUTOFLUSH_ENABLE | + R300_DC_DC_DISABLE_IGNORE_PE)); + + +} + +/* ================================================================ + * CP control, initialization + */ + +/* Load the microcode for the CP */ +static void radeon_cp_load_microcode(drm_radeon_private_t * dev_priv) +{ + int i; + DRM_DEBUG("\n"); + + radeon_do_wait_for_idle(dev_priv); + + RADEON_WRITE(RADEON_CP_ME_RAM_ADDR, 0); + if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R100) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV100) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV200) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS100) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS200)) { + DRM_INFO("Loading R100 Microcode\n"); + for (i = 0; i < 256; i++) { + RADEON_WRITE(RADEON_CP_ME_RAM_DATAH, + R100_cp_microcode[i][1]); + RADEON_WRITE(RADEON_CP_ME_RAM_DATAL, + R100_cp_microcode[i][0]); + } + } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R200) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV250) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV280) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS300)) { + DRM_INFO("Loading R200 Microcode\n"); + for (i = 0; i < 256; i++) { + RADEON_WRITE(RADEON_CP_ME_RAM_DATAH, + R200_cp_microcode[i][1]); + RADEON_WRITE(RADEON_CP_ME_RAM_DATAL, + R200_cp_microcode[i][0]); + } + } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R300) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R350) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV350) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV380) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS480)) { + DRM_INFO("Loading R300 Microcode\n"); + for (i = 0; i < 256; i++) { + RADEON_WRITE(RADEON_CP_ME_RAM_DATAH, + R300_cp_microcode[i][1]); + RADEON_WRITE(RADEON_CP_ME_RAM_DATAL, + R300_cp_microcode[i][0]); + } + } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R420) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV410)) { + DRM_INFO("Loading R400 Microcode\n"); + for (i = 0; i < 256; i++) { + RADEON_WRITE(RADEON_CP_ME_RAM_DATAH, + R420_cp_microcode[i][1]); + RADEON_WRITE(RADEON_CP_ME_RAM_DATAL, + R420_cp_microcode[i][0]); + } + } else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) { + DRM_INFO("Loading RS690 Microcode\n"); + for (i = 0; i < 256; i++) { + RADEON_WRITE(RADEON_CP_ME_RAM_DATAH, + RS690_cp_microcode[i][1]); + RADEON_WRITE(RADEON_CP_ME_RAM_DATAL, + RS690_cp_microcode[i][0]); + } + } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV515) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R520) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R580) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV560) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV570)) { + DRM_INFO("Loading R500 Microcode\n"); + for (i = 0; i < 256; i++) { + RADEON_WRITE(RADEON_CP_ME_RAM_DATAH, + R520_cp_microcode[i][1]); + RADEON_WRITE(RADEON_CP_ME_RAM_DATAL, + R520_cp_microcode[i][0]); + } + } +} + +/* Flush any pending commands to the CP. This should only be used just + * prior to a wait for idle, as it informs the engine that the command + * stream is ending. + */ +static void radeon_do_cp_flush(drm_radeon_private_t * dev_priv) +{ + DRM_DEBUG("\n"); +#if 0 + u32 tmp; + + tmp = RADEON_READ(RADEON_CP_RB_WPTR) | (1 << 31); + RADEON_WRITE(RADEON_CP_RB_WPTR, tmp); +#endif +} + +/* Wait for the CP to go idle. + */ +int radeon_do_cp_idle(drm_radeon_private_t * dev_priv) +{ + RING_LOCALS; + DRM_DEBUG("\n"); + + BEGIN_RING(6); + + RADEON_PURGE_CACHE(); + RADEON_PURGE_ZCACHE(); + RADEON_WAIT_UNTIL_IDLE(); + + ADVANCE_RING(); + COMMIT_RING(); + + return radeon_do_wait_for_idle(dev_priv); +} + +/* Start the Command Processor. + */ +static void radeon_do_cp_start(drm_radeon_private_t * dev_priv) +{ + RING_LOCALS; + DRM_DEBUG("\n"); + + radeon_do_wait_for_idle(dev_priv); + + RADEON_WRITE(RADEON_CP_CSQ_CNTL, dev_priv->cp_mode); + + dev_priv->cp_running = 1; + + BEGIN_RING(6); + + RADEON_PURGE_CACHE(); + RADEON_PURGE_ZCACHE(); + RADEON_WAIT_UNTIL_IDLE(); + + ADVANCE_RING(); + COMMIT_RING(); +} + +/* Reset the Command Processor. This will not flush any pending + * commands, so you must wait for the CP command stream to complete + * before calling this routine. + */ +static void radeon_do_cp_reset(drm_radeon_private_t * dev_priv) +{ + u32 cur_read_ptr; + DRM_DEBUG("\n"); + + cur_read_ptr = RADEON_READ(RADEON_CP_RB_RPTR); + RADEON_WRITE(RADEON_CP_RB_WPTR, cur_read_ptr); + SET_RING_HEAD(dev_priv, cur_read_ptr); + dev_priv->ring.tail = cur_read_ptr; +} + +/* Stop the Command Processor. This will not flush any pending + * commands, so you must flush the command stream and wait for the CP + * to go idle before calling this routine. + */ +static void radeon_do_cp_stop(drm_radeon_private_t * dev_priv) +{ + DRM_DEBUG("\n"); + + RADEON_WRITE(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS); + + dev_priv->cp_running = 0; +} + +/* Reset the engine. This will stop the CP if it is running. + */ +static int radeon_do_engine_reset(struct drm_device * dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + u32 clock_cntl_index = 0, mclk_cntl = 0, rbbm_soft_reset; + DRM_DEBUG("\n"); + + radeon_do_pixcache_flush(dev_priv); + + if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV410) { + /* may need something similar for newer chips */ + clock_cntl_index = RADEON_READ(RADEON_CLOCK_CNTL_INDEX); + mclk_cntl = RADEON_READ_PLL(dev, RADEON_MCLK_CNTL); + + RADEON_WRITE_PLL(RADEON_MCLK_CNTL, (mclk_cntl | + RADEON_FORCEON_MCLKA | + RADEON_FORCEON_MCLKB | + RADEON_FORCEON_YCLKA | + RADEON_FORCEON_YCLKB | + RADEON_FORCEON_MC | + RADEON_FORCEON_AIC)); + } + + rbbm_soft_reset = RADEON_READ(RADEON_RBBM_SOFT_RESET); + + RADEON_WRITE(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset | + RADEON_SOFT_RESET_CP | + RADEON_SOFT_RESET_HI | + RADEON_SOFT_RESET_SE | + RADEON_SOFT_RESET_RE | + RADEON_SOFT_RESET_PP | + RADEON_SOFT_RESET_E2 | + RADEON_SOFT_RESET_RB)); + RADEON_READ(RADEON_RBBM_SOFT_RESET); + RADEON_WRITE(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & + ~(RADEON_SOFT_RESET_CP | + RADEON_SOFT_RESET_HI | + RADEON_SOFT_RESET_SE | + RADEON_SOFT_RESET_RE | + RADEON_SOFT_RESET_PP | + RADEON_SOFT_RESET_E2 | + RADEON_SOFT_RESET_RB))); + RADEON_READ(RADEON_RBBM_SOFT_RESET); + + if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV410) { + RADEON_WRITE_PLL(RADEON_MCLK_CNTL, mclk_cntl); + RADEON_WRITE(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index); + RADEON_WRITE(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset); + } + + /* setup the raster pipes */ + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R300) + radeon_init_pipes(dev_priv); + + /* Reset the CP ring */ + radeon_do_cp_reset(dev_priv); + + /* The CP is no longer running after an engine reset */ + dev_priv->cp_running = 0; + + /* Reset any pending vertex, indirect buffers */ + radeon_freelist_reset(dev); + + return 0; +} + +static void radeon_cp_init_ring_buffer(struct drm_device * dev, + drm_radeon_private_t * dev_priv) +{ + u32 ring_start, cur_read_ptr; + u32 tmp; + + /* Initialize the memory controller. With new memory map, the fb location + * is not changed, it should have been properly initialized already. Part + * of the problem is that the code below is bogus, assuming the GART is + * always appended to the fb which is not necessarily the case + */ + if (!dev_priv->new_memmap) + radeon_write_fb_location(dev_priv, + ((dev_priv->gart_vm_start - 1) & 0xffff0000) + | (dev_priv->fb_location >> 16)); + +#if __OS_HAS_AGP + if (dev_priv->flags & RADEON_IS_AGP) { + radeon_write_agp_base(dev_priv, dev->agp->base); + + radeon_write_agp_location(dev_priv, + (((dev_priv->gart_vm_start - 1 + + dev_priv->gart_size) & 0xffff0000) | + (dev_priv->gart_vm_start >> 16))); + + ring_start = (dev_priv->cp_ring->offset + - dev->agp->base + + dev_priv->gart_vm_start); + } else +#endif + ring_start = (dev_priv->cp_ring->offset + - (unsigned long)dev->sg->virtual + + dev_priv->gart_vm_start); + + RADEON_WRITE(RADEON_CP_RB_BASE, ring_start); + + /* Set the write pointer delay */ + RADEON_WRITE(RADEON_CP_RB_WPTR_DELAY, 0); + + /* Initialize the ring buffer's read and write pointers */ + cur_read_ptr = RADEON_READ(RADEON_CP_RB_RPTR); + RADEON_WRITE(RADEON_CP_RB_WPTR, cur_read_ptr); + SET_RING_HEAD(dev_priv, cur_read_ptr); + dev_priv->ring.tail = cur_read_ptr; + +#if __OS_HAS_AGP + if (dev_priv->flags & RADEON_IS_AGP) { + RADEON_WRITE(RADEON_CP_RB_RPTR_ADDR, + dev_priv->ring_rptr->offset + - dev->agp->base + dev_priv->gart_vm_start); + } else +#endif + { + struct drm_sg_mem *entry = dev->sg; + unsigned long tmp_ofs, page_ofs; + + tmp_ofs = dev_priv->ring_rptr->offset - + (unsigned long)dev->sg->virtual; + page_ofs = tmp_ofs >> PAGE_SHIFT; + + RADEON_WRITE(RADEON_CP_RB_RPTR_ADDR, entry->busaddr[page_ofs]); + DRM_DEBUG("ring rptr: offset=0x%08lx handle=0x%08lx\n", + (unsigned long)entry->busaddr[page_ofs], + entry->handle + tmp_ofs); + } + + /* Set ring buffer size */ +#ifdef __BIG_ENDIAN + RADEON_WRITE(RADEON_CP_RB_CNTL, + RADEON_BUF_SWAP_32BIT | + (dev_priv->ring.fetch_size_l2ow << 18) | + (dev_priv->ring.rptr_update_l2qw << 8) | + dev_priv->ring.size_l2qw); +#else + RADEON_WRITE(RADEON_CP_RB_CNTL, + (dev_priv->ring.fetch_size_l2ow << 18) | + (dev_priv->ring.rptr_update_l2qw << 8) | + dev_priv->ring.size_l2qw); +#endif + + /* Start with assuming that writeback doesn't work */ + dev_priv->writeback_works = 0; + + /* Initialize the scratch register pointer. This will cause + * the scratch register values to be written out to memory + * whenever they are updated. + * + * We simply put this behind the ring read pointer, this works + * with PCI GART as well as (whatever kind of) AGP GART + */ + RADEON_WRITE(RADEON_SCRATCH_ADDR, RADEON_READ(RADEON_CP_RB_RPTR_ADDR) + + RADEON_SCRATCH_REG_OFFSET); + + dev_priv->scratch = ((__volatile__ u32 *) + dev_priv->ring_rptr->handle + + (RADEON_SCRATCH_REG_OFFSET / sizeof(u32))); + + RADEON_WRITE(RADEON_SCRATCH_UMSK, 0x7); + + /* Turn on bus mastering */ + tmp = RADEON_READ(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; + RADEON_WRITE(RADEON_BUS_CNTL, tmp); + + dev_priv->sarea_priv->last_frame = dev_priv->scratch[0] = 0; + RADEON_WRITE(RADEON_LAST_FRAME_REG, dev_priv->sarea_priv->last_frame); + + dev_priv->sarea_priv->last_dispatch = dev_priv->scratch[1] = 0; + RADEON_WRITE(RADEON_LAST_DISPATCH_REG, + dev_priv->sarea_priv->last_dispatch); + + dev_priv->sarea_priv->last_clear = dev_priv->scratch[2] = 0; + RADEON_WRITE(RADEON_LAST_CLEAR_REG, dev_priv->sarea_priv->last_clear); + + radeon_do_wait_for_idle(dev_priv); + + /* Sync everything up */ + RADEON_WRITE(RADEON_ISYNC_CNTL, + (RADEON_ISYNC_ANY2D_IDLE3D | + RADEON_ISYNC_ANY3D_IDLE2D | + RADEON_ISYNC_WAIT_IDLEGUI | + RADEON_ISYNC_CPSCRATCH_IDLEGUI)); + +} + +static void radeon_test_writeback(drm_radeon_private_t * dev_priv) +{ + u32 tmp; + + /* Writeback doesn't seem to work everywhere, test it here and possibly + * enable it if it appears to work + */ + DRM_WRITE32(dev_priv->ring_rptr, RADEON_SCRATCHOFF(1), 0); + RADEON_WRITE(RADEON_SCRATCH_REG1, 0xdeadbeef); + + for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) { + if (DRM_READ32(dev_priv->ring_rptr, RADEON_SCRATCHOFF(1)) == + 0xdeadbeef) + break; + DRM_UDELAY(1); + } + + if (tmp < dev_priv->usec_timeout) { + dev_priv->writeback_works = 1; + DRM_INFO("writeback test succeeded in %d usecs\n", tmp); + } else { + dev_priv->writeback_works = 0; + DRM_INFO("writeback test failed\n"); + } + if (radeon_no_wb == 1) { + dev_priv->writeback_works = 0; + DRM_INFO("writeback forced off\n"); + } + + if (!dev_priv->writeback_works) { + /* Disable writeback to avoid unnecessary bus master transfer */ + RADEON_WRITE(RADEON_CP_RB_CNTL, RADEON_READ(RADEON_CP_RB_CNTL) | + RADEON_RB_NO_UPDATE); + RADEON_WRITE(RADEON_SCRATCH_UMSK, 0); + } +} + +/* Enable or disable IGP GART on the chip */ +static void radeon_set_igpgart(drm_radeon_private_t * dev_priv, int on) +{ + u32 temp; + + if (on) { + DRM_DEBUG("programming igp gart %08X %08lX %08X\n", + dev_priv->gart_vm_start, + (long)dev_priv->gart_info.bus_addr, + dev_priv->gart_size); + + temp = IGP_READ_MCIND(dev_priv, RS480_MC_MISC_CNTL); + if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) + IGP_WRITE_MCIND(RS480_MC_MISC_CNTL, (RS480_GART_INDEX_REG_EN | + RS690_BLOCK_GFX_D3_EN)); + else + IGP_WRITE_MCIND(RS480_MC_MISC_CNTL, RS480_GART_INDEX_REG_EN); + + IGP_WRITE_MCIND(RS480_AGP_ADDRESS_SPACE_SIZE, (RS480_GART_EN | + RS480_VA_SIZE_32MB)); + + temp = IGP_READ_MCIND(dev_priv, RS480_GART_FEATURE_ID); + IGP_WRITE_MCIND(RS480_GART_FEATURE_ID, (RS480_HANG_EN | + RS480_TLB_ENABLE | + RS480_GTW_LAC_EN | + RS480_1LEVEL_GART)); + + temp = dev_priv->gart_info.bus_addr & 0xfffff000; + temp |= (upper_32_bits(dev_priv->gart_info.bus_addr) & 0xff) << 4; + IGP_WRITE_MCIND(RS480_GART_BASE, temp); + + temp = IGP_READ_MCIND(dev_priv, RS480_AGP_MODE_CNTL); + IGP_WRITE_MCIND(RS480_AGP_MODE_CNTL, ((1 << RS480_REQ_TYPE_SNOOP_SHIFT) | + RS480_REQ_TYPE_SNOOP_DIS)); + + radeon_write_agp_base(dev_priv, dev_priv->gart_vm_start); + + dev_priv->gart_size = 32*1024*1024; + temp = (((dev_priv->gart_vm_start - 1 + dev_priv->gart_size) & + 0xffff0000) | (dev_priv->gart_vm_start >> 16)); + + radeon_write_agp_location(dev_priv, temp); + + temp = IGP_READ_MCIND(dev_priv, RS480_AGP_ADDRESS_SPACE_SIZE); + IGP_WRITE_MCIND(RS480_AGP_ADDRESS_SPACE_SIZE, (RS480_GART_EN | + RS480_VA_SIZE_32MB)); + + do { + temp = IGP_READ_MCIND(dev_priv, RS480_GART_CACHE_CNTRL); + if ((temp & RS480_GART_CACHE_INVALIDATE) == 0) + break; + DRM_UDELAY(1); + } while (1); + + IGP_WRITE_MCIND(RS480_GART_CACHE_CNTRL, + RS480_GART_CACHE_INVALIDATE); + + do { + temp = IGP_READ_MCIND(dev_priv, RS480_GART_CACHE_CNTRL); + if ((temp & RS480_GART_CACHE_INVALIDATE) == 0) + break; + DRM_UDELAY(1); + } while (1); + + IGP_WRITE_MCIND(RS480_GART_CACHE_CNTRL, 0); + } else { + IGP_WRITE_MCIND(RS480_AGP_ADDRESS_SPACE_SIZE, 0); + } +} + +static void radeon_set_pciegart(drm_radeon_private_t * dev_priv, int on) +{ + u32 tmp = RADEON_READ_PCIE(dev_priv, RADEON_PCIE_TX_GART_CNTL); + if (on) { + + DRM_DEBUG("programming pcie %08X %08lX %08X\n", + dev_priv->gart_vm_start, + (long)dev_priv->gart_info.bus_addr, + dev_priv->gart_size); + RADEON_WRITE_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, + dev_priv->gart_vm_start); + RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_BASE, + dev_priv->gart_info.bus_addr); + RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_START_LO, + dev_priv->gart_vm_start); + RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_END_LO, + dev_priv->gart_vm_start + + dev_priv->gart_size - 1); + + radeon_write_agp_location(dev_priv, 0xffffffc0); /* ?? */ + + RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_CNTL, + RADEON_PCIE_TX_GART_EN); + } else { + RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_CNTL, + tmp & ~RADEON_PCIE_TX_GART_EN); + } +} + +/* Enable or disable PCI GART on the chip */ +static void radeon_set_pcigart(drm_radeon_private_t * dev_priv, int on) +{ + u32 tmp; + + if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) || + (dev_priv->flags & RADEON_IS_IGPGART)) { + radeon_set_igpgart(dev_priv, on); + return; + } + + if (dev_priv->flags & RADEON_IS_PCIE) { + radeon_set_pciegart(dev_priv, on); + return; + } + + tmp = RADEON_READ(RADEON_AIC_CNTL); + + if (on) { + RADEON_WRITE(RADEON_AIC_CNTL, + tmp | RADEON_PCIGART_TRANSLATE_EN); + + /* set PCI GART page-table base address + */ + RADEON_WRITE(RADEON_AIC_PT_BASE, dev_priv->gart_info.bus_addr); + + /* set address range for PCI address translate + */ + RADEON_WRITE(RADEON_AIC_LO_ADDR, dev_priv->gart_vm_start); + RADEON_WRITE(RADEON_AIC_HI_ADDR, dev_priv->gart_vm_start + + dev_priv->gart_size - 1); + + /* Turn off AGP aperture -- is this required for PCI GART? + */ + radeon_write_agp_location(dev_priv, 0xffffffc0); + RADEON_WRITE(RADEON_AGP_COMMAND, 0); /* clear AGP_COMMAND */ + } else { + RADEON_WRITE(RADEON_AIC_CNTL, + tmp & ~RADEON_PCIGART_TRANSLATE_EN); + } +} + +static int radeon_do_init_cp(struct drm_device * dev, drm_radeon_init_t * init) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + DRM_DEBUG("\n"); + + /* if we require new memory map but we don't have it fail */ + if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) { + DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n"); + radeon_do_cleanup_cp(dev); + return -EINVAL; + } + + if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) { + DRM_DEBUG("Forcing AGP card to PCI mode\n"); + dev_priv->flags &= ~RADEON_IS_AGP; + } else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE)) + && !init->is_pci) { + DRM_DEBUG("Restoring AGP flag\n"); + dev_priv->flags |= RADEON_IS_AGP; + } + + if ((!(dev_priv->flags & RADEON_IS_AGP)) && !dev->sg) { + DRM_ERROR("PCI GART memory not allocated!\n"); + radeon_do_cleanup_cp(dev); + return -EINVAL; + } + + dev_priv->usec_timeout = init->usec_timeout; + if (dev_priv->usec_timeout < 1 || + dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) { + DRM_DEBUG("TIMEOUT problem!\n"); + radeon_do_cleanup_cp(dev); + return -EINVAL; + } + + /* Enable vblank on CRTC1 for older X servers + */ + dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1; + + switch(init->func) { + case RADEON_INIT_R200_CP: + dev_priv->microcode_version = UCODE_R200; + break; + case RADEON_INIT_R300_CP: + dev_priv->microcode_version = UCODE_R300; + break; + default: + dev_priv->microcode_version = UCODE_R100; + } + + dev_priv->do_boxes = 0; + dev_priv->cp_mode = init->cp_mode; + + /* We don't support anything other than bus-mastering ring mode, + * but the ring can be in either AGP or PCI space for the ring + * read pointer. + */ + if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) && + (init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) { + DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode); + radeon_do_cleanup_cp(dev); + return -EINVAL; + } + + switch (init->fb_bpp) { + case 16: + dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565; + break; + case 32: + default: + dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888; + break; + } + dev_priv->front_offset = init->front_offset; + dev_priv->front_pitch = init->front_pitch; + dev_priv->back_offset = init->back_offset; + dev_priv->back_pitch = init->back_pitch; + + switch (init->depth_bpp) { + case 16: + dev_priv->depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; + break; + case 32: + default: + dev_priv->depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; + break; + } + dev_priv->depth_offset = init->depth_offset; + dev_priv->depth_pitch = init->depth_pitch; + + /* Hardware state for depth clears. Remove this if/when we no + * longer clear the depth buffer with a 3D rectangle. Hard-code + * all values to prevent unwanted 3D state from slipping through + * and screwing with the clear operation. + */ + dev_priv->depth_clear.rb3d_cntl = (RADEON_PLANE_MASK_ENABLE | + (dev_priv->color_fmt << 10) | + (dev_priv->microcode_version == + UCODE_R100 ? RADEON_ZBLOCK16 : 0)); + + dev_priv->depth_clear.rb3d_zstencilcntl = + (dev_priv->depth_fmt | + RADEON_Z_TEST_ALWAYS | + RADEON_STENCIL_TEST_ALWAYS | + RADEON_STENCIL_S_FAIL_REPLACE | + RADEON_STENCIL_ZPASS_REPLACE | + RADEON_STENCIL_ZFAIL_REPLACE | RADEON_Z_WRITE_ENABLE); + + dev_priv->depth_clear.se_cntl = (RADEON_FFACE_CULL_CW | + RADEON_BFACE_SOLID | + RADEON_FFACE_SOLID | + RADEON_FLAT_SHADE_VTX_LAST | + RADEON_DIFFUSE_SHADE_FLAT | + RADEON_ALPHA_SHADE_FLAT | + RADEON_SPECULAR_SHADE_FLAT | + RADEON_FOG_SHADE_FLAT | + RADEON_VTX_PIX_CENTER_OGL | + RADEON_ROUND_MODE_TRUNC | + RADEON_ROUND_PREC_8TH_PIX); + + + dev_priv->ring_offset = init->ring_offset; + dev_priv->ring_rptr_offset = init->ring_rptr_offset; + dev_priv->buffers_offset = init->buffers_offset; + dev_priv->gart_textures_offset = init->gart_textures_offset; + + dev_priv->sarea = drm_getsarea(dev); + if (!dev_priv->sarea) { + DRM_ERROR("could not find sarea!\n"); + radeon_do_cleanup_cp(dev); + return -EINVAL; + } + + dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset); + if (!dev_priv->cp_ring) { + DRM_ERROR("could not find cp ring region!\n"); + radeon_do_cleanup_cp(dev); + return -EINVAL; + } + dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset); + if (!dev_priv->ring_rptr) { + DRM_ERROR("could not find ring read pointer!\n"); + radeon_do_cleanup_cp(dev); + return -EINVAL; + } + dev->agp_buffer_token = init->buffers_offset; + dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); + if (!dev->agp_buffer_map) { + DRM_ERROR("could not find dma buffer region!\n"); + radeon_do_cleanup_cp(dev); + return -EINVAL; + } + + if (init->gart_textures_offset) { + dev_priv->gart_textures = + drm_core_findmap(dev, init->gart_textures_offset); + if (!dev_priv->gart_textures) { + DRM_ERROR("could not find GART texture region!\n"); + radeon_do_cleanup_cp(dev); + return -EINVAL; + } + } + + dev_priv->sarea_priv = + (drm_radeon_sarea_t *) ((u8 *) dev_priv->sarea->handle + + init->sarea_priv_offset); + +#if __OS_HAS_AGP + if (dev_priv->flags & RADEON_IS_AGP) { + drm_core_ioremap(dev_priv->cp_ring, dev); + drm_core_ioremap(dev_priv->ring_rptr, dev); + drm_core_ioremap(dev->agp_buffer_map, dev); + if (!dev_priv->cp_ring->handle || + !dev_priv->ring_rptr->handle || + !dev->agp_buffer_map->handle) { + DRM_ERROR("could not find ioremap agp regions!\n"); + radeon_do_cleanup_cp(dev); + return -EINVAL; + } + } else +#endif + { + dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset; + dev_priv->ring_rptr->handle = + (void *)dev_priv->ring_rptr->offset; + dev->agp_buffer_map->handle = + (void *)dev->agp_buffer_map->offset; + + DRM_DEBUG("dev_priv->cp_ring->handle %p\n", + dev_priv->cp_ring->handle); + DRM_DEBUG("dev_priv->ring_rptr->handle %p\n", + dev_priv->ring_rptr->handle); + DRM_DEBUG("dev->agp_buffer_map->handle %p\n", + dev->agp_buffer_map->handle); + } + + dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 16; + dev_priv->fb_size = + ((radeon_read_fb_location(dev_priv) & 0xffff0000u) + 0x10000) + - dev_priv->fb_location; + + dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) | + ((dev_priv->front_offset + + dev_priv->fb_location) >> 10)); + + dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) | + ((dev_priv->back_offset + + dev_priv->fb_location) >> 10)); + + dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) | + ((dev_priv->depth_offset + + dev_priv->fb_location) >> 10)); + + dev_priv->gart_size = init->gart_size; + + /* New let's set the memory map ... */ + if (dev_priv->new_memmap) { + u32 base = 0; + + DRM_INFO("Setting GART location based on new memory map\n"); + + /* If using AGP, try to locate the AGP aperture at the same + * location in the card and on the bus, though we have to + * align it down. + */ +#if __OS_HAS_AGP + if (dev_priv->flags & RADEON_IS_AGP) { + base = dev->agp->base; + /* Check if valid */ + if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location && + base < (dev_priv->fb_location + dev_priv->fb_size - 1)) { + DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n", + dev->agp->base); + base = 0; + } + } +#endif + /* If not or if AGP is at 0 (Macs), try to put it elsewhere */ + if (base == 0) { + base = dev_priv->fb_location + dev_priv->fb_size; + if (base < dev_priv->fb_location || + ((base + dev_priv->gart_size) & 0xfffffffful) < base) + base = dev_priv->fb_location + - dev_priv->gart_size; + } + dev_priv->gart_vm_start = base & 0xffc00000u; + if (dev_priv->gart_vm_start != base) + DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n", + base, dev_priv->gart_vm_start); + } else { + DRM_INFO("Setting GART location based on old memory map\n"); + dev_priv->gart_vm_start = dev_priv->fb_location + + RADEON_READ(RADEON_CONFIG_APER_SIZE); + } + +#if __OS_HAS_AGP + if (dev_priv->flags & RADEON_IS_AGP) + dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset + - dev->agp->base + + dev_priv->gart_vm_start); + else +#endif + dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset + - (unsigned long)dev->sg->virtual + + dev_priv->gart_vm_start); + + DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size); + DRM_DEBUG("dev_priv->gart_vm_start 0x%x\n", dev_priv->gart_vm_start); + DRM_DEBUG("dev_priv->gart_buffers_offset 0x%lx\n", + dev_priv->gart_buffers_offset); + + dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle; + dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle + + init->ring_size / sizeof(u32)); + dev_priv->ring.size = init->ring_size; + dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8); + + dev_priv->ring.rptr_update = /* init->rptr_update */ 4096; + dev_priv->ring.rptr_update_l2qw = drm_order( /* init->rptr_update */ 4096 / 8); + + dev_priv->ring.fetch_size = /* init->fetch_size */ 32; + dev_priv->ring.fetch_size_l2ow = drm_order( /* init->fetch_size */ 32 / 16); + dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1; + + dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK; + +#if __OS_HAS_AGP + if (dev_priv->flags & RADEON_IS_AGP) { + /* Turn off PCI GART */ + radeon_set_pcigart(dev_priv, 0); + } else +#endif + { + dev_priv->gart_info.table_mask = DMA_BIT_MASK(32); + /* if we have an offset set from userspace */ + if (dev_priv->pcigart_offset_set) { + dev_priv->gart_info.bus_addr = + dev_priv->pcigart_offset + dev_priv->fb_location; + dev_priv->gart_info.mapping.offset = + dev_priv->pcigart_offset + dev_priv->fb_aper_offset; + dev_priv->gart_info.mapping.size = + dev_priv->gart_info.table_size; + + drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev); + dev_priv->gart_info.addr = + dev_priv->gart_info.mapping.handle; + + if (dev_priv->flags & RADEON_IS_PCIE) + dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_PCIE; + else + dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_PCI; + dev_priv->gart_info.gart_table_location = + DRM_ATI_GART_FB; + + DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n", + dev_priv->gart_info.addr, + dev_priv->pcigart_offset); + } else { + if (dev_priv->flags & RADEON_IS_IGPGART) + dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_IGP; + else + dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_PCI; + dev_priv->gart_info.gart_table_location = + DRM_ATI_GART_MAIN; + dev_priv->gart_info.addr = NULL; + dev_priv->gart_info.bus_addr = 0; + if (dev_priv->flags & RADEON_IS_PCIE) { + DRM_ERROR + ("Cannot use PCI Express without GART in FB memory\n"); + radeon_do_cleanup_cp(dev); + return -EINVAL; + } + } + + if (!drm_ati_pcigart_init(dev, &dev_priv->gart_info)) { + DRM_ERROR("failed to init PCI GART!\n"); + radeon_do_cleanup_cp(dev); + return -ENOMEM; + } + + /* Turn on PCI GART */ + radeon_set_pcigart(dev_priv, 1); + } + + radeon_cp_load_microcode(dev_priv); + radeon_cp_init_ring_buffer(dev, dev_priv); + + dev_priv->last_buf = 0; + + radeon_do_engine_reset(dev); + radeon_test_writeback(dev_priv); + + return 0; +} + +static int radeon_do_cleanup_cp(struct drm_device * dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + DRM_DEBUG("\n"); + + /* Make sure interrupts are disabled here because the uninstall ioctl + * may not have been called from userspace and after dev_private + * is freed, it's too late. + */ + if (dev->irq_enabled) + drm_irq_uninstall(dev); + +#if __OS_HAS_AGP + if (dev_priv->flags & RADEON_IS_AGP) { + if (dev_priv->cp_ring != NULL) { + drm_core_ioremapfree(dev_priv->cp_ring, dev); + dev_priv->cp_ring = NULL; + } + if (dev_priv->ring_rptr != NULL) { + drm_core_ioremapfree(dev_priv->ring_rptr, dev); + dev_priv->ring_rptr = NULL; + } + if (dev->agp_buffer_map != NULL) { + drm_core_ioremapfree(dev->agp_buffer_map, dev); + dev->agp_buffer_map = NULL; + } + } else +#endif + { + + if (dev_priv->gart_info.bus_addr) { + /* Turn off PCI GART */ + radeon_set_pcigart(dev_priv, 0); + if (!drm_ati_pcigart_cleanup(dev, &dev_priv->gart_info)) + DRM_ERROR("failed to cleanup PCI GART!\n"); + } + + if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) + { + drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev); + dev_priv->gart_info.addr = 0; + } + } + /* only clear to the start of flags */ + memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags)); + + return 0; +} + +/* This code will reinit the Radeon CP hardware after a resume from disc. + * AFAIK, it would be very difficult to pickle the state at suspend time, so + * here we make sure that all Radeon hardware initialisation is re-done without + * affecting running applications. + * + * Charl P. Botha <http://cpbotha.net> + */ +static int radeon_do_resume_cp(struct drm_device * dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + if (!dev_priv) { + DRM_ERROR("Called with no initialization\n"); + return -EINVAL; + } + + DRM_DEBUG("Starting radeon_do_resume_cp()\n"); + +#if __OS_HAS_AGP + if (dev_priv->flags & RADEON_IS_AGP) { + /* Turn off PCI GART */ + radeon_set_pcigart(dev_priv, 0); + } else +#endif + { + /* Turn on PCI GART */ + radeon_set_pcigart(dev_priv, 1); + } + + radeon_cp_load_microcode(dev_priv); + radeon_cp_init_ring_buffer(dev, dev_priv); + + radeon_do_engine_reset(dev); + radeon_enable_interrupt(dev); + + DRM_DEBUG("radeon_do_resume_cp() complete\n"); + + return 0; +} + +int radeon_cp_init(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_init_t *init = data; + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + if (init->func == RADEON_INIT_R300_CP) + r300_init_reg_flags(dev); + + switch (init->func) { + case RADEON_INIT_CP: + case RADEON_INIT_R200_CP: + case RADEON_INIT_R300_CP: + return radeon_do_init_cp(dev, init); + case RADEON_CLEANUP_CP: + return radeon_do_cleanup_cp(dev); + } + + return -EINVAL; +} + +int radeon_cp_start(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + DRM_DEBUG("\n"); + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + if (dev_priv->cp_running) { + DRM_DEBUG("while CP running\n"); + return 0; + } + if (dev_priv->cp_mode == RADEON_CSQ_PRIDIS_INDDIS) { + DRM_DEBUG("called with bogus CP mode (%d)\n", + dev_priv->cp_mode); + return 0; + } + + radeon_do_cp_start(dev_priv); + + return 0; +} + +/* Stop the CP. The engine must have been idled before calling this + * routine. + */ +int radeon_cp_stop(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_cp_stop_t *stop = data; + int ret; + DRM_DEBUG("\n"); + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + if (!dev_priv->cp_running) + return 0; + + /* Flush any pending CP commands. This ensures any outstanding + * commands are exectuted by the engine before we turn it off. + */ + if (stop->flush) { + radeon_do_cp_flush(dev_priv); + } + + /* If we fail to make the engine go idle, we return an error + * code so that the DRM ioctl wrapper can try again. + */ + if (stop->idle) { + ret = radeon_do_cp_idle(dev_priv); + if (ret) + return ret; + } + + /* Finally, we can turn off the CP. If the engine isn't idle, + * we will get some dropped triangles as they won't be fully + * rendered before the CP is shut down. + */ + radeon_do_cp_stop(dev_priv); + + /* Reset the engine */ + radeon_do_engine_reset(dev); + + return 0; +} + +void radeon_do_release(struct drm_device * dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + int i, ret; + + if (dev_priv) { + if (dev_priv->cp_running) { + /* Stop the cp */ + while ((ret = radeon_do_cp_idle(dev_priv)) != 0) { + DRM_DEBUG("radeon_do_cp_idle %d\n", ret); +#ifdef __linux__ + schedule(); +#else + tsleep(&ret, PZERO, "rdnrel", 1); +#endif + } + radeon_do_cp_stop(dev_priv); + radeon_do_engine_reset(dev); + } + + /* Disable *all* interrupts */ + if (dev_priv->mmio) /* remove this after permanent addmaps */ + RADEON_WRITE(RADEON_GEN_INT_CNTL, 0); + + if (dev_priv->mmio) { /* remove all surfaces */ + for (i = 0; i < RADEON_MAX_SURFACES; i++) { + RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * i, 0); + RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + + 16 * i, 0); + RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + + 16 * i, 0); + } + } + + /* Free memory heap structures */ + radeon_mem_takedown(&(dev_priv->gart_heap)); + radeon_mem_takedown(&(dev_priv->fb_heap)); + + /* deallocate kernel resources */ + radeon_do_cleanup_cp(dev); + } +} + +/* Just reset the CP ring. Called as part of an X Server engine reset. + */ +int radeon_cp_reset(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + DRM_DEBUG("\n"); + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + if (!dev_priv) { + DRM_DEBUG("called before init done\n"); + return -EINVAL; + } + + radeon_do_cp_reset(dev_priv); + + /* The CP is no longer running after an engine reset */ + dev_priv->cp_running = 0; + + return 0; +} + +int radeon_cp_idle(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + DRM_DEBUG("\n"); + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + return radeon_do_cp_idle(dev_priv); +} + +/* Added by Charl P. Botha to call radeon_do_resume_cp(). + */ +int radeon_cp_resume(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + + return radeon_do_resume_cp(dev); +} + +int radeon_engine_reset(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + DRM_DEBUG("\n"); + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + return radeon_do_engine_reset(dev); +} + +/* ================================================================ + * Fullscreen mode + */ + +/* KW: Deprecated to say the least: + */ +int radeon_fullscreen(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + return 0; +} + +/* ================================================================ + * Freelist management + */ + +/* Original comment: FIXME: ROTATE_BUFS is a hack to cycle through + * bufs until freelist code is used. Note this hides a problem with + * the scratch register * (used to keep track of last buffer + * completed) being written to before * the last buffer has actually + * completed rendering. + * + * KW: It's also a good way to find free buffers quickly. + * + * KW: Ideally this loop wouldn't exist, and freelist_get wouldn't + * sleep. However, bugs in older versions of radeon_accel.c mean that + * we essentially have to do this, else old clients will break. + * + * However, it does leave open a potential deadlock where all the + * buffers are held by other clients, which can't release them because + * they can't get the lock. + */ + +struct drm_buf *radeon_freelist_get(struct drm_device * dev) +{ + struct drm_device_dma *dma = dev->dma; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_buf_priv_t *buf_priv; + struct drm_buf *buf; + int i, t; + int start; + + if (++dev_priv->last_buf >= dma->buf_count) + dev_priv->last_buf = 0; + + start = dev_priv->last_buf; + + for (t = 0; t < dev_priv->usec_timeout; t++) { + u32 done_age = GET_SCRATCH(1); + DRM_DEBUG("done_age = %d\n", done_age); + for (i = start; i < dma->buf_count; i++) { + buf = dma->buflist[i]; + buf_priv = buf->dev_private; + if (buf->file_priv == NULL || (buf->pending && + buf_priv->age <= + done_age)) { + dev_priv->stats.requested_bufs++; + buf->pending = 0; + return buf; + } + start = 0; + } + + if (t) { + DRM_UDELAY(1); + dev_priv->stats.freelist_loops++; + } + } + + DRM_DEBUG("returning NULL!\n"); + return NULL; +} + +#if 0 +struct drm_buf *radeon_freelist_get(struct drm_device * dev) +{ + struct drm_device_dma *dma = dev->dma; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_buf_priv_t *buf_priv; + struct drm_buf *buf; + int i, t; + int start; + u32 done_age = DRM_READ32(dev_priv->ring_rptr, RADEON_SCRATCHOFF(1)); + + if (++dev_priv->last_buf >= dma->buf_count) + dev_priv->last_buf = 0; + + start = dev_priv->last_buf; + dev_priv->stats.freelist_loops++; + + for (t = 0; t < 2; t++) { + for (i = start; i < dma->buf_count; i++) { + buf = dma->buflist[i]; + buf_priv = buf->dev_private; + if (buf->file_priv == 0 || (buf->pending && + buf_priv->age <= + done_age)) { + dev_priv->stats.requested_bufs++; + buf->pending = 0; + return buf; + } + } + start = 0; + } + + return NULL; +} +#endif + +void radeon_freelist_reset(struct drm_device * dev) +{ + struct drm_device_dma *dma = dev->dma; + drm_radeon_private_t *dev_priv = dev->dev_private; + int i; + + dev_priv->last_buf = 0; + for (i = 0; i < dma->buf_count; i++) { + struct drm_buf *buf = dma->buflist[i]; + drm_radeon_buf_priv_t *buf_priv = buf->dev_private; + buf_priv->age = 0; + } +} + +/* ================================================================ + * CP command submission + */ + +int radeon_wait_ring(drm_radeon_private_t * dev_priv, int n) +{ + drm_radeon_ring_buffer_t *ring = &dev_priv->ring; + int i; + u32 last_head = GET_RING_HEAD(dev_priv); + + for (i = 0; i < dev_priv->usec_timeout; i++) { + u32 head = GET_RING_HEAD(dev_priv); + + ring->space = (head - ring->tail) * sizeof(u32); + if (ring->space <= 0) + ring->space += ring->size; + if (ring->space > n) + return 0; + + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + + if (head != last_head) + i = 0; + last_head = head; + + DRM_UDELAY(1); + } + + /* FIXME: This return value is ignored in the BEGIN_RING macro! */ +#if RADEON_FIFO_DEBUG + radeon_status(dev_priv); + DRM_ERROR("failed!\n"); +#endif + return -EBUSY; +} + +static int radeon_cp_get_buffers(struct drm_device *dev, + struct drm_file *file_priv, + struct drm_dma * d) +{ + int i; + struct drm_buf *buf; + + for (i = d->granted_count; i < d->request_count; i++) { + buf = radeon_freelist_get(dev); + if (!buf) + return -EBUSY; /* NOTE: broken client */ + + buf->file_priv = file_priv; + + if (DRM_COPY_TO_USER(&d->request_indices[i], &buf->idx, + sizeof(buf->idx))) + return -EFAULT; + if (DRM_COPY_TO_USER(&d->request_sizes[i], &buf->total, + sizeof(buf->total))) + return -EFAULT; + + d->granted_count++; + } + return 0; +} + +int radeon_cp_buffers(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct drm_device_dma *dma = dev->dma; + int ret = 0; + struct drm_dma *d = data; + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + /* Please don't send us buffers. + */ + if (d->send_count != 0) { + DRM_ERROR("Process %d trying to send %d buffers via drmDMA\n", + DRM_CURRENTPID, d->send_count); + return -EINVAL; + } + + /* We'll send you buffers. + */ + if (d->request_count < 0 || d->request_count > dma->buf_count) { + DRM_ERROR("Process %d trying to get %d buffers (of %d max)\n", + DRM_CURRENTPID, d->request_count, dma->buf_count); + return -EINVAL; + } + + d->granted_count = 0; + + if (d->request_count) { + ret = radeon_cp_get_buffers(dev, file_priv, d); + } + + return ret; +} + +int radeon_driver_load(struct drm_device *dev, unsigned long flags) +{ + drm_radeon_private_t *dev_priv; + int ret = 0; + + dev_priv = drm_alloc(sizeof(drm_radeon_private_t), DRM_MEM_DRIVER); + if (dev_priv == NULL) + return -ENOMEM; + + memset(dev_priv, 0, sizeof(drm_radeon_private_t)); + dev->dev_private = (void *)dev_priv; + dev_priv->flags = flags; + + switch (flags & RADEON_FAMILY_MASK) { + case CHIP_R100: + case CHIP_RV200: + case CHIP_R200: + case CHIP_R300: + case CHIP_R350: + case CHIP_R420: + case CHIP_RV410: + case CHIP_RV515: + case CHIP_R520: + case CHIP_RV570: + case CHIP_R580: + dev_priv->flags |= RADEON_HAS_HIERZ; + break; + default: + /* all other chips have no hierarchical z buffer */ + break; + } + + if (drm_device_is_agp(dev)) + dev_priv->flags |= RADEON_IS_AGP; + else if (drm_device_is_pcie(dev)) + dev_priv->flags |= RADEON_IS_PCIE; + else + dev_priv->flags |= RADEON_IS_PCI; + + DRM_DEBUG("%s card detected\n", + ((dev_priv->flags & RADEON_IS_AGP) ? "AGP" : (((dev_priv->flags & RADEON_IS_PCIE) ? "PCIE" : "PCI")))); + return ret; +} + +/* Create mappings for registers and framebuffer so userland doesn't necessarily + * have to find them. + */ +int radeon_driver_firstopen(struct drm_device *dev) +{ + int ret; + drm_local_map_t *map; + drm_radeon_private_t *dev_priv = dev->dev_private; + + dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE; + + ret = drm_addmap(dev, drm_get_resource_start(dev, 2), + drm_get_resource_len(dev, 2), _DRM_REGISTERS, + _DRM_READ_ONLY, &dev_priv->mmio); + if (ret != 0) + return ret; + + dev_priv->fb_aper_offset = drm_get_resource_start(dev, 0); + ret = drm_addmap(dev, dev_priv->fb_aper_offset, + drm_get_resource_len(dev, 0), _DRM_FRAME_BUFFER, + _DRM_WRITE_COMBINING, &map); + if (ret != 0) + return ret; + + return 0; +} + +int radeon_driver_unload(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + DRM_DEBUG("\n"); + drm_free(dev_priv, sizeof(*dev_priv), DRM_MEM_DRIVER); + + dev->dev_private = NULL; + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c new file mode 100644 index 00000000000..349ac3d3b84 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -0,0 +1,126 @@ +/** + * \file radeon_drv.c + * ATI Radeon driver + * + * \author Gareth Hughes <gareth@valinux.com> + */ + +/* + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" + +#include "drm_pciids.h" + +int radeon_no_wb; + +MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers\n"); +module_param_named(no_wb, radeon_no_wb, int, 0444); + +static int dri_library_name(struct drm_device *dev, char *buf) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + int family = dev_priv->flags & RADEON_FAMILY_MASK; + + return snprintf(buf, PAGE_SIZE, "%s\n", + (family < CHIP_R200) ? "radeon" : + ((family < CHIP_R300) ? "r200" : + "r300")); +} + +static struct pci_device_id pciidlist[] = { + radeon_PCI_IDS +}; + +static struct drm_driver driver = { + .driver_features = + DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG | + DRIVER_HAVE_IRQ | DRIVER_HAVE_DMA | DRIVER_IRQ_SHARED | + DRIVER_IRQ_VBL | DRIVER_IRQ_VBL2, + .dev_priv_size = sizeof(drm_radeon_buf_priv_t), + .load = radeon_driver_load, + .firstopen = radeon_driver_firstopen, + .open = radeon_driver_open, + .preclose = radeon_driver_preclose, + .postclose = radeon_driver_postclose, + .lastclose = radeon_driver_lastclose, + .unload = radeon_driver_unload, + .vblank_wait = radeon_driver_vblank_wait, + .vblank_wait2 = radeon_driver_vblank_wait2, + .dri_library_name = dri_library_name, + .irq_preinstall = radeon_driver_irq_preinstall, + .irq_postinstall = radeon_driver_irq_postinstall, + .irq_uninstall = radeon_driver_irq_uninstall, + .irq_handler = radeon_driver_irq_handler, + .reclaim_buffers = drm_core_reclaim_buffers, + .get_map_ofs = drm_core_get_map_ofs, + .get_reg_ofs = drm_core_get_reg_ofs, + .ioctls = radeon_ioctls, + .dma_ioctl = radeon_cp_buffers, + .fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .ioctl = drm_ioctl, + .mmap = drm_mmap, + .poll = drm_poll, + .fasync = drm_fasync, +#ifdef CONFIG_COMPAT + .compat_ioctl = radeon_compat_ioctl, +#endif + }, + + .pci_driver = { + .name = DRIVER_NAME, + .id_table = pciidlist, + }, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +static int __init radeon_init(void) +{ + driver.num_ioctls = radeon_max_ioctl; + return drm_init(&driver); +} + +static void __exit radeon_exit(void) +{ + drm_exit(&driver); +} + +module_init(radeon_init); +module_exit(radeon_exit); + +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h new file mode 100644 index 00000000000..3f0eca957aa --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -0,0 +1,1406 @@ +/* radeon_drv.h -- Private header for radeon driver -*- linux-c -*- + * + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Fremont, California. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + */ + +#ifndef __RADEON_DRV_H__ +#define __RADEON_DRV_H__ + +/* General customization: + */ + +#define DRIVER_AUTHOR "Gareth Hughes, Keith Whitwell, others." + +#define DRIVER_NAME "radeon" +#define DRIVER_DESC "ATI Radeon" +#define DRIVER_DATE "20080528" + +/* Interface history: + * + * 1.1 - ?? + * 1.2 - Add vertex2 ioctl (keith) + * - Add stencil capability to clear ioctl (gareth, keith) + * - Increase MAX_TEXTURE_LEVELS (brian) + * 1.3 - Add cmdbuf ioctl (keith) + * - Add support for new radeon packets (keith) + * - Add getparam ioctl (keith) + * - Add flip-buffers ioctl, deprecate fullscreen foo (keith). + * 1.4 - Add scratch registers to get_param ioctl. + * 1.5 - Add r200 packets to cmdbuf ioctl + * - Add r200 function to init ioctl + * - Add 'scalar2' instruction to cmdbuf + * 1.6 - Add static GART memory manager + * Add irq handler (won't be turned on unless X server knows to) + * Add irq ioctls and irq_active getparam. + * Add wait command for cmdbuf ioctl + * Add GART offset query for getparam + * 1.7 - Add support for cube map registers: R200_PP_CUBIC_FACES_[0..5] + * and R200_PP_CUBIC_OFFSET_F1_[0..5]. + * Added packets R200_EMIT_PP_CUBIC_FACES_[0..5] and + * R200_EMIT_PP_CUBIC_OFFSETS_[0..5]. (brian) + * 1.8 - Remove need to call cleanup ioctls on last client exit (keith) + * Add 'GET' queries for starting additional clients on different VT's. + * 1.9 - Add DRM_IOCTL_RADEON_CP_RESUME ioctl. + * Add texture rectangle support for r100. + * 1.10- Add SETPARAM ioctl; first parameter to set is FB_LOCATION, which + * clients use to tell the DRM where they think the framebuffer is + * located in the card's address space + * 1.11- Add packet R200_EMIT_RB3D_BLENDCOLOR to support GL_EXT_blend_color + * and GL_EXT_blend_[func|equation]_separate on r200 + * 1.12- Add R300 CP microcode support - this just loads the CP on r300 + * (No 3D support yet - just microcode loading). + * 1.13- Add packet R200_EMIT_TCL_POINT_SPRITE_CNTL for ARB_point_parameters + * - Add hyperz support, add hyperz flags to clear ioctl. + * 1.14- Add support for color tiling + * - Add R100/R200 surface allocation/free support + * 1.15- Add support for texture micro tiling + * - Add support for r100 cube maps + * 1.16- Add R200_EMIT_PP_TRI_PERF_CNTL packet to support brilinear + * texture filtering on r200 + * 1.17- Add initial support for R300 (3D). + * 1.18- Add support for GL_ATI_fragment_shader, new packets + * R200_EMIT_PP_AFS_0/1, R200_EMIT_PP_TXCTLALL_0-5 (replaces + * R200_EMIT_PP_TXFILTER_0-5, 2 more regs) and R200_EMIT_ATF_TFACTOR + * (replaces R200_EMIT_TFACTOR_0 (8 consts instead of 6) + * 1.19- Add support for gart table in FB memory and PCIE r300 + * 1.20- Add support for r300 texrect + * 1.21- Add support for card type getparam + * 1.22- Add support for texture cache flushes (R300_TX_CNTL) + * 1.23- Add new radeon memory map work from benh + * 1.24- Add general-purpose packet for manipulating scratch registers (r300) + * 1.25- Add support for r200 vertex programs (R200_EMIT_VAP_PVS_CNTL, + * new packet type) + * 1.26- Add support for variable size PCI(E) gart aperture + * 1.27- Add support for IGP GART + * 1.28- Add support for VBL on CRTC2 + * 1.29- R500 3D cmd buffer support + */ +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 29 +#define DRIVER_PATCHLEVEL 0 + +/* + * Radeon chip families + */ +enum radeon_family { + CHIP_R100, + CHIP_RV100, + CHIP_RS100, + CHIP_RV200, + CHIP_RS200, + CHIP_R200, + CHIP_RV250, + CHIP_RS300, + CHIP_RV280, + CHIP_R300, + CHIP_R350, + CHIP_RV350, + CHIP_RV380, + CHIP_R420, + CHIP_RV410, + CHIP_RS480, + CHIP_RS690, + CHIP_RV515, + CHIP_R520, + CHIP_RV530, + CHIP_RV560, + CHIP_RV570, + CHIP_R580, + CHIP_LAST, +}; + +enum radeon_cp_microcode_version { + UCODE_R100, + UCODE_R200, + UCODE_R300, +}; + +/* + * Chip flags + */ +enum radeon_chip_flags { + RADEON_FAMILY_MASK = 0x0000ffffUL, + RADEON_FLAGS_MASK = 0xffff0000UL, + RADEON_IS_MOBILITY = 0x00010000UL, + RADEON_IS_IGP = 0x00020000UL, + RADEON_SINGLE_CRTC = 0x00040000UL, + RADEON_IS_AGP = 0x00080000UL, + RADEON_HAS_HIERZ = 0x00100000UL, + RADEON_IS_PCIE = 0x00200000UL, + RADEON_NEW_MEMMAP = 0x00400000UL, + RADEON_IS_PCI = 0x00800000UL, + RADEON_IS_IGPGART = 0x01000000UL, +}; + +#define GET_RING_HEAD(dev_priv) (dev_priv->writeback_works ? \ + DRM_READ32( (dev_priv)->ring_rptr, 0 ) : RADEON_READ(RADEON_CP_RB_RPTR)) +#define SET_RING_HEAD(dev_priv,val) DRM_WRITE32( (dev_priv)->ring_rptr, 0, (val) ) + +typedef struct drm_radeon_freelist { + unsigned int age; + struct drm_buf *buf; + struct drm_radeon_freelist *next; + struct drm_radeon_freelist *prev; +} drm_radeon_freelist_t; + +typedef struct drm_radeon_ring_buffer { + u32 *start; + u32 *end; + int size; + int size_l2qw; + + int rptr_update; /* Double Words */ + int rptr_update_l2qw; /* log2 Quad Words */ + + int fetch_size; /* Double Words */ + int fetch_size_l2ow; /* log2 Oct Words */ + + u32 tail; + u32 tail_mask; + int space; + + int high_mark; +} drm_radeon_ring_buffer_t; + +typedef struct drm_radeon_depth_clear_t { + u32 rb3d_cntl; + u32 rb3d_zstencilcntl; + u32 se_cntl; +} drm_radeon_depth_clear_t; + +struct drm_radeon_driver_file_fields { + int64_t radeon_fb_delta; +}; + +struct mem_block { + struct mem_block *next; + struct mem_block *prev; + int start; + int size; + struct drm_file *file_priv; /* NULL: free, -1: heap, other: real files */ +}; + +struct radeon_surface { + int refcount; + u32 lower; + u32 upper; + u32 flags; +}; + +struct radeon_virt_surface { + int surface_index; + u32 lower; + u32 upper; + u32 flags; + struct drm_file *file_priv; +}; + +typedef struct drm_radeon_private { + drm_radeon_ring_buffer_t ring; + drm_radeon_sarea_t *sarea_priv; + + u32 fb_location; + u32 fb_size; + int new_memmap; + + int gart_size; + u32 gart_vm_start; + unsigned long gart_buffers_offset; + + int cp_mode; + int cp_running; + + drm_radeon_freelist_t *head; + drm_radeon_freelist_t *tail; + int last_buf; + volatile u32 *scratch; + int writeback_works; + + int usec_timeout; + + int microcode_version; + + struct { + u32 boxes; + int freelist_timeouts; + int freelist_loops; + int requested_bufs; + int last_frame_reads; + int last_clear_reads; + int clears; + int texture_uploads; + } stats; + + int do_boxes; + int page_flipping; + + u32 color_fmt; + unsigned int front_offset; + unsigned int front_pitch; + unsigned int back_offset; + unsigned int back_pitch; + + u32 depth_fmt; + unsigned int depth_offset; + unsigned int depth_pitch; + + u32 front_pitch_offset; + u32 back_pitch_offset; + u32 depth_pitch_offset; + + drm_radeon_depth_clear_t depth_clear; + + unsigned long ring_offset; + unsigned long ring_rptr_offset; + unsigned long buffers_offset; + unsigned long gart_textures_offset; + + drm_local_map_t *sarea; + drm_local_map_t *mmio; + drm_local_map_t *cp_ring; + drm_local_map_t *ring_rptr; + drm_local_map_t *gart_textures; + + struct mem_block *gart_heap; + struct mem_block *fb_heap; + + /* SW interrupt */ + wait_queue_head_t swi_queue; + atomic_t swi_emitted; + int vblank_crtc; + uint32_t irq_enable_reg; + int irq_enabled; + uint32_t r500_disp_irq_reg; + + struct radeon_surface surfaces[RADEON_MAX_SURFACES]; + struct radeon_virt_surface virt_surfaces[2 * RADEON_MAX_SURFACES]; + + unsigned long pcigart_offset; + unsigned int pcigart_offset_set; + struct drm_ati_pcigart_info gart_info; + + u32 scratch_ages[5]; + + /* starting from here on, data is preserved accross an open */ + uint32_t flags; /* see radeon_chip_flags */ + unsigned long fb_aper_offset; + + int num_gb_pipes; +} drm_radeon_private_t; + +typedef struct drm_radeon_buf_priv { + u32 age; +} drm_radeon_buf_priv_t; + +typedef struct drm_radeon_kcmd_buffer { + int bufsz; + char *buf; + int nbox; + struct drm_clip_rect __user *boxes; +} drm_radeon_kcmd_buffer_t; + +extern int radeon_no_wb; +extern struct drm_ioctl_desc radeon_ioctls[]; +extern int radeon_max_ioctl; + +/* Check whether the given hardware address is inside the framebuffer or the + * GART area. + */ +static __inline__ int radeon_check_offset(drm_radeon_private_t *dev_priv, + u64 off) +{ + u32 fb_start = dev_priv->fb_location; + u32 fb_end = fb_start + dev_priv->fb_size - 1; + u32 gart_start = dev_priv->gart_vm_start; + u32 gart_end = gart_start + dev_priv->gart_size - 1; + + return ((off >= fb_start && off <= fb_end) || + (off >= gart_start && off <= gart_end)); +} + + /* radeon_cp.c */ +extern int radeon_cp_init(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int radeon_cp_start(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int radeon_cp_stop(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int radeon_cp_reset(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int radeon_cp_idle(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int radeon_cp_resume(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int radeon_engine_reset(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int radeon_fullscreen(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int radeon_cp_buffers(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv); + +extern void radeon_freelist_reset(struct drm_device * dev); +extern struct drm_buf *radeon_freelist_get(struct drm_device * dev); + +extern int radeon_wait_ring(drm_radeon_private_t * dev_priv, int n); + +extern int radeon_do_cp_idle(drm_radeon_private_t * dev_priv); + +extern int radeon_driver_preinit(struct drm_device *dev, unsigned long flags); +extern int radeon_presetup(struct drm_device *dev); +extern int radeon_driver_postcleanup(struct drm_device *dev); + +extern int radeon_mem_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int radeon_mem_free(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int radeon_mem_init_heap(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern void radeon_mem_takedown(struct mem_block **heap); +extern void radeon_mem_release(struct drm_file *file_priv, + struct mem_block *heap); + + /* radeon_irq.c */ +extern int radeon_irq_emit(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int radeon_irq_wait(struct drm_device *dev, void *data, struct drm_file *file_priv); + +extern void radeon_do_release(struct drm_device * dev); +extern int radeon_driver_vblank_wait(struct drm_device * dev, + unsigned int *sequence); +extern int radeon_driver_vblank_wait2(struct drm_device * dev, + unsigned int *sequence); +extern irqreturn_t radeon_driver_irq_handler(DRM_IRQ_ARGS); +extern void radeon_driver_irq_preinstall(struct drm_device * dev); +extern void radeon_driver_irq_postinstall(struct drm_device * dev); +extern void radeon_driver_irq_uninstall(struct drm_device * dev); +extern void radeon_enable_interrupt(struct drm_device *dev); +extern int radeon_vblank_crtc_get(struct drm_device *dev); +extern int radeon_vblank_crtc_set(struct drm_device *dev, int64_t value); + +extern int radeon_driver_load(struct drm_device *dev, unsigned long flags); +extern int radeon_driver_unload(struct drm_device *dev); +extern int radeon_driver_firstopen(struct drm_device *dev); +extern void radeon_driver_preclose(struct drm_device * dev, struct drm_file *file_priv); +extern void radeon_driver_postclose(struct drm_device * dev, struct drm_file * filp); +extern void radeon_driver_lastclose(struct drm_device * dev); +extern int radeon_driver_open(struct drm_device * dev, struct drm_file * filp_priv); +extern long radeon_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg); + +/* r300_cmdbuf.c */ +extern void r300_init_reg_flags(struct drm_device *dev); + +extern int r300_do_cp_cmdbuf(struct drm_device * dev, + struct drm_file *file_priv, + drm_radeon_kcmd_buffer_t * cmdbuf); + +/* Flags for stats.boxes + */ +#define RADEON_BOX_DMA_IDLE 0x1 +#define RADEON_BOX_RING_FULL 0x2 +#define RADEON_BOX_FLIP 0x4 +#define RADEON_BOX_WAIT_IDLE 0x8 +#define RADEON_BOX_TEXTURE_LOAD 0x10 + +/* Register definitions, register access macros and drmAddMap constants + * for Radeon kernel driver. + */ + +#define RADEON_AGP_COMMAND 0x0f60 +#define RADEON_AGP_COMMAND_PCI_CONFIG 0x0060 /* offset in PCI config */ +# define RADEON_AGP_ENABLE (1<<8) +#define RADEON_AUX_SCISSOR_CNTL 0x26f0 +# define RADEON_EXCLUSIVE_SCISSOR_0 (1 << 24) +# define RADEON_EXCLUSIVE_SCISSOR_1 (1 << 25) +# define RADEON_EXCLUSIVE_SCISSOR_2 (1 << 26) +# define RADEON_SCISSOR_0_ENABLE (1 << 28) +# define RADEON_SCISSOR_1_ENABLE (1 << 29) +# define RADEON_SCISSOR_2_ENABLE (1 << 30) + +#define RADEON_BUS_CNTL 0x0030 +# define RADEON_BUS_MASTER_DIS (1 << 6) + +#define RADEON_CLOCK_CNTL_DATA 0x000c +# define RADEON_PLL_WR_EN (1 << 7) +#define RADEON_CLOCK_CNTL_INDEX 0x0008 +#define RADEON_CONFIG_APER_SIZE 0x0108 +#define RADEON_CONFIG_MEMSIZE 0x00f8 +#define RADEON_CRTC_OFFSET 0x0224 +#define RADEON_CRTC_OFFSET_CNTL 0x0228 +# define RADEON_CRTC_TILE_EN (1 << 15) +# define RADEON_CRTC_OFFSET_FLIP_CNTL (1 << 16) +#define RADEON_CRTC2_OFFSET 0x0324 +#define RADEON_CRTC2_OFFSET_CNTL 0x0328 + +#define RADEON_PCIE_INDEX 0x0030 +#define RADEON_PCIE_DATA 0x0034 +#define RADEON_PCIE_TX_GART_CNTL 0x10 +# define RADEON_PCIE_TX_GART_EN (1 << 0) +# define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_PASS_THRU (0 << 1) +# define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_CLAMP_LO (1 << 1) +# define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD (3 << 1) +# define RADEON_PCIE_TX_GART_MODE_32_128_CACHE (0 << 3) +# define RADEON_PCIE_TX_GART_MODE_8_4_128_CACHE (1 << 3) +# define RADEON_PCIE_TX_GART_CHK_RW_VALID_EN (1 << 5) +# define RADEON_PCIE_TX_GART_INVALIDATE_TLB (1 << 8) +#define RADEON_PCIE_TX_DISCARD_RD_ADDR_LO 0x11 +#define RADEON_PCIE_TX_DISCARD_RD_ADDR_HI 0x12 +#define RADEON_PCIE_TX_GART_BASE 0x13 +#define RADEON_PCIE_TX_GART_START_LO 0x14 +#define RADEON_PCIE_TX_GART_START_HI 0x15 +#define RADEON_PCIE_TX_GART_END_LO 0x16 +#define RADEON_PCIE_TX_GART_END_HI 0x17 + +#define RS480_NB_MC_INDEX 0x168 +# define RS480_NB_MC_IND_WR_EN (1 << 8) +#define RS480_NB_MC_DATA 0x16c + +#define RS690_MC_INDEX 0x78 +# define RS690_MC_INDEX_MASK 0x1ff +# define RS690_MC_INDEX_WR_EN (1 << 9) +# define RS690_MC_INDEX_WR_ACK 0x7f +#define RS690_MC_DATA 0x7c + +/* MC indirect registers */ +#define RS480_MC_MISC_CNTL 0x18 +# define RS480_DISABLE_GTW (1 << 1) +/* switch between MCIND GART and MM GART registers. 0 = mmgart, 1 = mcind gart */ +# define RS480_GART_INDEX_REG_EN (1 << 12) +# define RS690_BLOCK_GFX_D3_EN (1 << 14) +#define RS480_K8_FB_LOCATION 0x1e +#define RS480_GART_FEATURE_ID 0x2b +# define RS480_HANG_EN (1 << 11) +# define RS480_TLB_ENABLE (1 << 18) +# define RS480_P2P_ENABLE (1 << 19) +# define RS480_GTW_LAC_EN (1 << 25) +# define RS480_2LEVEL_GART (0 << 30) +# define RS480_1LEVEL_GART (1 << 30) +# define RS480_PDC_EN (1 << 31) +#define RS480_GART_BASE 0x2c +#define RS480_GART_CACHE_CNTRL 0x2e +# define RS480_GART_CACHE_INVALIDATE (1 << 0) /* wait for it to clear */ +#define RS480_AGP_ADDRESS_SPACE_SIZE 0x38 +# define RS480_GART_EN (1 << 0) +# define RS480_VA_SIZE_32MB (0 << 1) +# define RS480_VA_SIZE_64MB (1 << 1) +# define RS480_VA_SIZE_128MB (2 << 1) +# define RS480_VA_SIZE_256MB (3 << 1) +# define RS480_VA_SIZE_512MB (4 << 1) +# define RS480_VA_SIZE_1GB (5 << 1) +# define RS480_VA_SIZE_2GB (6 << 1) +#define RS480_AGP_MODE_CNTL 0x39 +# define RS480_POST_GART_Q_SIZE (1 << 18) +# define RS480_NONGART_SNOOP (1 << 19) +# define RS480_AGP_RD_BUF_SIZE (1 << 20) +# define RS480_REQ_TYPE_SNOOP_SHIFT 22 +# define RS480_REQ_TYPE_SNOOP_MASK 0x3 +# define RS480_REQ_TYPE_SNOOP_DIS (1 << 24) +#define RS480_MC_MISC_UMA_CNTL 0x5f +#define RS480_MC_MCLK_CNTL 0x7a +#define RS480_MC_UMA_DUALCH_CNTL 0x86 + +#define RS690_MC_FB_LOCATION 0x100 +#define RS690_MC_AGP_LOCATION 0x101 +#define RS690_MC_AGP_BASE 0x102 +#define RS690_MC_AGP_BASE_2 0x103 + +#define R520_MC_IND_INDEX 0x70 +#define R520_MC_IND_WR_EN (1 << 24) +#define R520_MC_IND_DATA 0x74 + +#define RV515_MC_FB_LOCATION 0x01 +#define RV515_MC_AGP_LOCATION 0x02 +#define RV515_MC_AGP_BASE 0x03 +#define RV515_MC_AGP_BASE_2 0x04 + +#define R520_MC_FB_LOCATION 0x04 +#define R520_MC_AGP_LOCATION 0x05 +#define R520_MC_AGP_BASE 0x06 +#define R520_MC_AGP_BASE_2 0x07 + +#define RADEON_MPP_TB_CONFIG 0x01c0 +#define RADEON_MEM_CNTL 0x0140 +#define RADEON_MEM_SDRAM_MODE_REG 0x0158 +#define RADEON_AGP_BASE_2 0x015c /* r200+ only */ +#define RS480_AGP_BASE_2 0x0164 +#define RADEON_AGP_BASE 0x0170 + +/* pipe config regs */ +#define R400_GB_PIPE_SELECT 0x402c +#define R500_DYN_SCLK_PWMEM_PIPE 0x000d /* PLL */ +#define R500_SU_REG_DEST 0x42c8 +#define R300_GB_TILE_CONFIG 0x4018 +# define R300_ENABLE_TILING (1 << 0) +# define R300_PIPE_COUNT_RV350 (0 << 1) +# define R300_PIPE_COUNT_R300 (3 << 1) +# define R300_PIPE_COUNT_R420_3P (6 << 1) +# define R300_PIPE_COUNT_R420 (7 << 1) +# define R300_TILE_SIZE_8 (0 << 4) +# define R300_TILE_SIZE_16 (1 << 4) +# define R300_TILE_SIZE_32 (2 << 4) +# define R300_SUBPIXEL_1_12 (0 << 16) +# define R300_SUBPIXEL_1_16 (1 << 16) +#define R300_DST_PIPE_CONFIG 0x170c +# define R300_PIPE_AUTO_CONFIG (1 << 31) +#define R300_RB2D_DSTCACHE_MODE 0x3428 +# define R300_DC_AUTOFLUSH_ENABLE (1 << 8) +# define R300_DC_DC_DISABLE_IGNORE_PE (1 << 17) + +#define RADEON_RB3D_COLOROFFSET 0x1c40 +#define RADEON_RB3D_COLORPITCH 0x1c48 + +#define RADEON_SRC_X_Y 0x1590 + +#define RADEON_DP_GUI_MASTER_CNTL 0x146c +# define RADEON_GMC_SRC_PITCH_OFFSET_CNTL (1 << 0) +# define RADEON_GMC_DST_PITCH_OFFSET_CNTL (1 << 1) +# define RADEON_GMC_BRUSH_SOLID_COLOR (13 << 4) +# define RADEON_GMC_BRUSH_NONE (15 << 4) +# define RADEON_GMC_DST_16BPP (4 << 8) +# define RADEON_GMC_DST_24BPP (5 << 8) +# define RADEON_GMC_DST_32BPP (6 << 8) +# define RADEON_GMC_DST_DATATYPE_SHIFT 8 +# define RADEON_GMC_SRC_DATATYPE_COLOR (3 << 12) +# define RADEON_DP_SRC_SOURCE_MEMORY (2 << 24) +# define RADEON_DP_SRC_SOURCE_HOST_DATA (3 << 24) +# define RADEON_GMC_CLR_CMP_CNTL_DIS (1 << 28) +# define RADEON_GMC_WR_MSK_DIS (1 << 30) +# define RADEON_ROP3_S 0x00cc0000 +# define RADEON_ROP3_P 0x00f00000 +#define RADEON_DP_WRITE_MASK 0x16cc +#define RADEON_SRC_PITCH_OFFSET 0x1428 +#define RADEON_DST_PITCH_OFFSET 0x142c +#define RADEON_DST_PITCH_OFFSET_C 0x1c80 +# define RADEON_DST_TILE_LINEAR (0 << 30) +# define RADEON_DST_TILE_MACRO (1 << 30) +# define RADEON_DST_TILE_MICRO (2 << 30) +# define RADEON_DST_TILE_BOTH (3 << 30) + +#define RADEON_SCRATCH_REG0 0x15e0 +#define RADEON_SCRATCH_REG1 0x15e4 +#define RADEON_SCRATCH_REG2 0x15e8 +#define RADEON_SCRATCH_REG3 0x15ec +#define RADEON_SCRATCH_REG4 0x15f0 +#define RADEON_SCRATCH_REG5 0x15f4 +#define RADEON_SCRATCH_UMSK 0x0770 +#define RADEON_SCRATCH_ADDR 0x0774 + +#define RADEON_SCRATCHOFF( x ) (RADEON_SCRATCH_REG_OFFSET + 4*(x)) + +#define GET_SCRATCH( x ) (dev_priv->writeback_works \ + ? DRM_READ32( dev_priv->ring_rptr, RADEON_SCRATCHOFF(x) ) \ + : RADEON_READ( RADEON_SCRATCH_REG0 + 4*(x) ) ) + +#define RADEON_GEN_INT_CNTL 0x0040 +# define RADEON_CRTC_VBLANK_MASK (1 << 0) +# define RADEON_CRTC2_VBLANK_MASK (1 << 9) +# define RADEON_GUI_IDLE_INT_ENABLE (1 << 19) +# define RADEON_SW_INT_ENABLE (1 << 25) + +#define RADEON_GEN_INT_STATUS 0x0044 +# define RADEON_CRTC_VBLANK_STAT (1 << 0) +# define RADEON_CRTC_VBLANK_STAT_ACK (1 << 0) +# define RADEON_CRTC2_VBLANK_STAT (1 << 9) +# define RADEON_CRTC2_VBLANK_STAT_ACK (1 << 9) +# define RADEON_GUI_IDLE_INT_TEST_ACK (1 << 19) +# define RADEON_SW_INT_TEST (1 << 25) +# define RADEON_SW_INT_TEST_ACK (1 << 25) +# define RADEON_SW_INT_FIRE (1 << 26) + +#define RADEON_HOST_PATH_CNTL 0x0130 +# define RADEON_HDP_SOFT_RESET (1 << 26) +# define RADEON_HDP_WC_TIMEOUT_MASK (7 << 28) +# define RADEON_HDP_WC_TIMEOUT_28BCLK (7 << 28) + +#define RADEON_ISYNC_CNTL 0x1724 +# define RADEON_ISYNC_ANY2D_IDLE3D (1 << 0) +# define RADEON_ISYNC_ANY3D_IDLE2D (1 << 1) +# define RADEON_ISYNC_TRIG2D_IDLE3D (1 << 2) +# define RADEON_ISYNC_TRIG3D_IDLE2D (1 << 3) +# define RADEON_ISYNC_WAIT_IDLEGUI (1 << 4) +# define RADEON_ISYNC_CPSCRATCH_IDLEGUI (1 << 5) + +#define RADEON_RBBM_GUICNTL 0x172c +# define RADEON_HOST_DATA_SWAP_NONE (0 << 0) +# define RADEON_HOST_DATA_SWAP_16BIT (1 << 0) +# define RADEON_HOST_DATA_SWAP_32BIT (2 << 0) +# define RADEON_HOST_DATA_SWAP_HDW (3 << 0) + +#define RADEON_MC_AGP_LOCATION 0x014c +#define RADEON_MC_FB_LOCATION 0x0148 +#define RADEON_MCLK_CNTL 0x0012 +# define RADEON_FORCEON_MCLKA (1 << 16) +# define RADEON_FORCEON_MCLKB (1 << 17) +# define RADEON_FORCEON_YCLKA (1 << 18) +# define RADEON_FORCEON_YCLKB (1 << 19) +# define RADEON_FORCEON_MC (1 << 20) +# define RADEON_FORCEON_AIC (1 << 21) + +#define RADEON_PP_BORDER_COLOR_0 0x1d40 +#define RADEON_PP_BORDER_COLOR_1 0x1d44 +#define RADEON_PP_BORDER_COLOR_2 0x1d48 +#define RADEON_PP_CNTL 0x1c38 +# define RADEON_SCISSOR_ENABLE (1 << 1) +#define RADEON_PP_LUM_MATRIX 0x1d00 +#define RADEON_PP_MISC 0x1c14 +#define RADEON_PP_ROT_MATRIX_0 0x1d58 +#define RADEON_PP_TXFILTER_0 0x1c54 +#define RADEON_PP_TXOFFSET_0 0x1c5c +#define RADEON_PP_TXFILTER_1 0x1c6c +#define RADEON_PP_TXFILTER_2 0x1c84 + +#define R300_RB2D_DSTCACHE_CTLSTAT 0x342c /* use R300_DSTCACHE_CTLSTAT */ +#define R300_DSTCACHE_CTLSTAT 0x1714 +# define R300_RB2D_DC_FLUSH (3 << 0) +# define R300_RB2D_DC_FREE (3 << 2) +# define R300_RB2D_DC_FLUSH_ALL 0xf +# define R300_RB2D_DC_BUSY (1 << 31) +#define RADEON_RB3D_CNTL 0x1c3c +# define RADEON_ALPHA_BLEND_ENABLE (1 << 0) +# define RADEON_PLANE_MASK_ENABLE (1 << 1) +# define RADEON_DITHER_ENABLE (1 << 2) +# define RADEON_ROUND_ENABLE (1 << 3) +# define RADEON_SCALE_DITHER_ENABLE (1 << 4) +# define RADEON_DITHER_INIT (1 << 5) +# define RADEON_ROP_ENABLE (1 << 6) +# define RADEON_STENCIL_ENABLE (1 << 7) +# define RADEON_Z_ENABLE (1 << 8) +# define RADEON_ZBLOCK16 (1 << 15) +#define RADEON_RB3D_DEPTHOFFSET 0x1c24 +#define RADEON_RB3D_DEPTHCLEARVALUE 0x3230 +#define RADEON_RB3D_DEPTHPITCH 0x1c28 +#define RADEON_RB3D_PLANEMASK 0x1d84 +#define RADEON_RB3D_STENCILREFMASK 0x1d7c +#define RADEON_RB3D_ZCACHE_MODE 0x3250 +#define RADEON_RB3D_ZCACHE_CTLSTAT 0x3254 +# define RADEON_RB3D_ZC_FLUSH (1 << 0) +# define RADEON_RB3D_ZC_FREE (1 << 2) +# define RADEON_RB3D_ZC_FLUSH_ALL 0x5 +# define RADEON_RB3D_ZC_BUSY (1 << 31) +#define R300_ZB_ZCACHE_CTLSTAT 0x4f18 +# define R300_ZC_FLUSH (1 << 0) +# define R300_ZC_FREE (1 << 1) +# define R300_ZC_FLUSH_ALL 0x3 +# define R300_ZC_BUSY (1 << 31) +#define RADEON_RB3D_DSTCACHE_CTLSTAT 0x325c +# define RADEON_RB3D_DC_FLUSH (3 << 0) +# define RADEON_RB3D_DC_FREE (3 << 2) +# define RADEON_RB3D_DC_FLUSH_ALL 0xf +# define RADEON_RB3D_DC_BUSY (1 << 31) +#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c +# define R300_RB3D_DC_FINISH (1 << 4) +#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c +# define RADEON_Z_TEST_MASK (7 << 4) +# define RADEON_Z_TEST_ALWAYS (7 << 4) +# define RADEON_Z_HIERARCHY_ENABLE (1 << 8) +# define RADEON_STENCIL_TEST_ALWAYS (7 << 12) +# define RADEON_STENCIL_S_FAIL_REPLACE (2 << 16) +# define RADEON_STENCIL_ZPASS_REPLACE (2 << 20) +# define RADEON_STENCIL_ZFAIL_REPLACE (2 << 24) +# define RADEON_Z_COMPRESSION_ENABLE (1 << 28) +# define RADEON_FORCE_Z_DIRTY (1 << 29) +# define RADEON_Z_WRITE_ENABLE (1 << 30) +# define RADEON_Z_DECOMPRESSION_ENABLE (1 << 31) +#define RADEON_RBBM_SOFT_RESET 0x00f0 +# define RADEON_SOFT_RESET_CP (1 << 0) +# define RADEON_SOFT_RESET_HI (1 << 1) +# define RADEON_SOFT_RESET_SE (1 << 2) +# define RADEON_SOFT_RESET_RE (1 << 3) +# define RADEON_SOFT_RESET_PP (1 << 4) +# define RADEON_SOFT_RESET_E2 (1 << 5) +# define RADEON_SOFT_RESET_RB (1 << 6) +# define RADEON_SOFT_RESET_HDP (1 << 7) +/* + * 6:0 Available slots in the FIFO + * 8 Host Interface active + * 9 CP request active + * 10 FIFO request active + * 11 Host Interface retry active + * 12 CP retry active + * 13 FIFO retry active + * 14 FIFO pipeline busy + * 15 Event engine busy + * 16 CP command stream busy + * 17 2D engine busy + * 18 2D portion of render backend busy + * 20 3D setup engine busy + * 26 GA engine busy + * 27 CBA 2D engine busy + * 31 2D engine busy or 3D engine busy or FIFO not empty or CP busy or + * command stream queue not empty or Ring Buffer not empty + */ +#define RADEON_RBBM_STATUS 0x0e40 +/* Same as the previous RADEON_RBBM_STATUS; this is a mirror of that register. */ +/* #define RADEON_RBBM_STATUS 0x1740 */ +/* bits 6:0 are dword slots available in the cmd fifo */ +# define RADEON_RBBM_FIFOCNT_MASK 0x007f +# define RADEON_HIRQ_ON_RBB (1 << 8) +# define RADEON_CPRQ_ON_RBB (1 << 9) +# define RADEON_CFRQ_ON_RBB (1 << 10) +# define RADEON_HIRQ_IN_RTBUF (1 << 11) +# define RADEON_CPRQ_IN_RTBUF (1 << 12) +# define RADEON_CFRQ_IN_RTBUF (1 << 13) +# define RADEON_PIPE_BUSY (1 << 14) +# define RADEON_ENG_EV_BUSY (1 << 15) +# define RADEON_CP_CMDSTRM_BUSY (1 << 16) +# define RADEON_E2_BUSY (1 << 17) +# define RADEON_RB2D_BUSY (1 << 18) +# define RADEON_RB3D_BUSY (1 << 19) /* not used on r300 */ +# define RADEON_VAP_BUSY (1 << 20) +# define RADEON_RE_BUSY (1 << 21) /* not used on r300 */ +# define RADEON_TAM_BUSY (1 << 22) /* not used on r300 */ +# define RADEON_TDM_BUSY (1 << 23) /* not used on r300 */ +# define RADEON_PB_BUSY (1 << 24) /* not used on r300 */ +# define RADEON_TIM_BUSY (1 << 25) /* not used on r300 */ +# define RADEON_GA_BUSY (1 << 26) +# define RADEON_CBA2D_BUSY (1 << 27) +# define RADEON_RBBM_ACTIVE (1 << 31) +#define RADEON_RE_LINE_PATTERN 0x1cd0 +#define RADEON_RE_MISC 0x26c4 +#define RADEON_RE_TOP_LEFT 0x26c0 +#define RADEON_RE_WIDTH_HEIGHT 0x1c44 +#define RADEON_RE_STIPPLE_ADDR 0x1cc8 +#define RADEON_RE_STIPPLE_DATA 0x1ccc + +#define RADEON_SCISSOR_TL_0 0x1cd8 +#define RADEON_SCISSOR_BR_0 0x1cdc +#define RADEON_SCISSOR_TL_1 0x1ce0 +#define RADEON_SCISSOR_BR_1 0x1ce4 +#define RADEON_SCISSOR_TL_2 0x1ce8 +#define RADEON_SCISSOR_BR_2 0x1cec +#define RADEON_SE_COORD_FMT 0x1c50 +#define RADEON_SE_CNTL 0x1c4c +# define RADEON_FFACE_CULL_CW (0 << 0) +# define RADEON_BFACE_SOLID (3 << 1) +# define RADEON_FFACE_SOLID (3 << 3) +# define RADEON_FLAT_SHADE_VTX_LAST (3 << 6) +# define RADEON_DIFFUSE_SHADE_FLAT (1 << 8) +# define RADEON_DIFFUSE_SHADE_GOURAUD (2 << 8) +# define RADEON_ALPHA_SHADE_FLAT (1 << 10) +# define RADEON_ALPHA_SHADE_GOURAUD (2 << 10) +# define RADEON_SPECULAR_SHADE_FLAT (1 << 12) +# define RADEON_SPECULAR_SHADE_GOURAUD (2 << 12) +# define RADEON_FOG_SHADE_FLAT (1 << 14) +# define RADEON_FOG_SHADE_GOURAUD (2 << 14) +# define RADEON_VPORT_XY_XFORM_ENABLE (1 << 24) +# define RADEON_VPORT_Z_XFORM_ENABLE (1 << 25) +# define RADEON_VTX_PIX_CENTER_OGL (1 << 27) +# define RADEON_ROUND_MODE_TRUNC (0 << 28) +# define RADEON_ROUND_PREC_8TH_PIX (1 << 30) +#define RADEON_SE_CNTL_STATUS 0x2140 +#define RADEON_SE_LINE_WIDTH 0x1db8 +#define RADEON_SE_VPORT_XSCALE 0x1d98 +#define RADEON_SE_ZBIAS_FACTOR 0x1db0 +#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED 0x2210 +#define RADEON_SE_TCL_OUTPUT_VTX_FMT 0x2254 +#define RADEON_SE_TCL_VECTOR_INDX_REG 0x2200 +# define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT 16 +# define RADEON_VEC_INDX_DWORD_COUNT_SHIFT 28 +#define RADEON_SE_TCL_VECTOR_DATA_REG 0x2204 +#define RADEON_SE_TCL_SCALAR_INDX_REG 0x2208 +# define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT 16 +#define RADEON_SE_TCL_SCALAR_DATA_REG 0x220C +#define RADEON_SURFACE_ACCESS_FLAGS 0x0bf8 +#define RADEON_SURFACE_ACCESS_CLR 0x0bfc +#define RADEON_SURFACE_CNTL 0x0b00 +# define RADEON_SURF_TRANSLATION_DIS (1 << 8) +# define RADEON_NONSURF_AP0_SWP_MASK (3 << 20) +# define RADEON_NONSURF_AP0_SWP_LITTLE (0 << 20) +# define RADEON_NONSURF_AP0_SWP_BIG16 (1 << 20) +# define RADEON_NONSURF_AP0_SWP_BIG32 (2 << 20) +# define RADEON_NONSURF_AP1_SWP_MASK (3 << 22) +# define RADEON_NONSURF_AP1_SWP_LITTLE (0 << 22) +# define RADEON_NONSURF_AP1_SWP_BIG16 (1 << 22) +# define RADEON_NONSURF_AP1_SWP_BIG32 (2 << 22) +#define RADEON_SURFACE0_INFO 0x0b0c +# define RADEON_SURF_PITCHSEL_MASK (0x1ff << 0) +# define RADEON_SURF_TILE_MODE_MASK (3 << 16) +# define RADEON_SURF_TILE_MODE_MACRO (0 << 16) +# define RADEON_SURF_TILE_MODE_MICRO (1 << 16) +# define RADEON_SURF_TILE_MODE_32BIT_Z (2 << 16) +# define RADEON_SURF_TILE_MODE_16BIT_Z (3 << 16) +#define RADEON_SURFACE0_LOWER_BOUND 0x0b04 +#define RADEON_SURFACE0_UPPER_BOUND 0x0b08 +# define RADEON_SURF_ADDRESS_FIXED_MASK (0x3ff << 0) +#define RADEON_SURFACE1_INFO 0x0b1c +#define RADEON_SURFACE1_LOWER_BOUND 0x0b14 +#define RADEON_SURFACE1_UPPER_BOUND 0x0b18 +#define RADEON_SURFACE2_INFO 0x0b2c +#define RADEON_SURFACE2_LOWER_BOUND 0x0b24 +#define RADEON_SURFACE2_UPPER_BOUND 0x0b28 +#define RADEON_SURFACE3_INFO 0x0b3c +#define RADEON_SURFACE3_LOWER_BOUND 0x0b34 +#define RADEON_SURFACE3_UPPER_BOUND 0x0b38 +#define RADEON_SURFACE4_INFO 0x0b4c +#define RADEON_SURFACE4_LOWER_BOUND 0x0b44 +#define RADEON_SURFACE4_UPPER_BOUND 0x0b48 +#define RADEON_SURFACE5_INFO 0x0b5c +#define RADEON_SURFACE5_LOWER_BOUND 0x0b54 +#define RADEON_SURFACE5_UPPER_BOUND 0x0b58 +#define RADEON_SURFACE6_INFO 0x0b6c +#define RADEON_SURFACE6_LOWER_BOUND 0x0b64 +#define RADEON_SURFACE6_UPPER_BOUND 0x0b68 +#define RADEON_SURFACE7_INFO 0x0b7c +#define RADEON_SURFACE7_LOWER_BOUND 0x0b74 +#define RADEON_SURFACE7_UPPER_BOUND 0x0b78 +#define RADEON_SW_SEMAPHORE 0x013c + +#define RADEON_WAIT_UNTIL 0x1720 +# define RADEON_WAIT_CRTC_PFLIP (1 << 0) +# define RADEON_WAIT_2D_IDLE (1 << 14) +# define RADEON_WAIT_3D_IDLE (1 << 15) +# define RADEON_WAIT_2D_IDLECLEAN (1 << 16) +# define RADEON_WAIT_3D_IDLECLEAN (1 << 17) +# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18) + +#define RADEON_RB3D_ZMASKOFFSET 0x3234 +#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c +# define RADEON_DEPTH_FORMAT_16BIT_INT_Z (0 << 0) +# define RADEON_DEPTH_FORMAT_24BIT_INT_Z (2 << 0) + +/* CP registers */ +#define RADEON_CP_ME_RAM_ADDR 0x07d4 +#define RADEON_CP_ME_RAM_RADDR 0x07d8 +#define RADEON_CP_ME_RAM_DATAH 0x07dc +#define RADEON_CP_ME_RAM_DATAL 0x07e0 + +#define RADEON_CP_RB_BASE 0x0700 +#define RADEON_CP_RB_CNTL 0x0704 +# define RADEON_BUF_SWAP_32BIT (2 << 16) +# define RADEON_RB_NO_UPDATE (1 << 27) +#define RADEON_CP_RB_RPTR_ADDR 0x070c +#define RADEON_CP_RB_RPTR 0x0710 +#define RADEON_CP_RB_WPTR 0x0714 + +#define RADEON_CP_RB_WPTR_DELAY 0x0718 +# define RADEON_PRE_WRITE_TIMER_SHIFT 0 +# define RADEON_PRE_WRITE_LIMIT_SHIFT 23 + +#define RADEON_CP_IB_BASE 0x0738 + +#define RADEON_CP_CSQ_CNTL 0x0740 +# define RADEON_CSQ_CNT_PRIMARY_MASK (0xff << 0) +# define RADEON_CSQ_PRIDIS_INDDIS (0 << 28) +# define RADEON_CSQ_PRIPIO_INDDIS (1 << 28) +# define RADEON_CSQ_PRIBM_INDDIS (2 << 28) +# define RADEON_CSQ_PRIPIO_INDBM (3 << 28) +# define RADEON_CSQ_PRIBM_INDBM (4 << 28) +# define RADEON_CSQ_PRIPIO_INDPIO (15 << 28) + +#define RADEON_AIC_CNTL 0x01d0 +# define RADEON_PCIGART_TRANSLATE_EN (1 << 0) +#define RADEON_AIC_STAT 0x01d4 +#define RADEON_AIC_PT_BASE 0x01d8 +#define RADEON_AIC_LO_ADDR 0x01dc +#define RADEON_AIC_HI_ADDR 0x01e0 +#define RADEON_AIC_TLB_ADDR 0x01e4 +#define RADEON_AIC_TLB_DATA 0x01e8 + +/* CP command packets */ +#define RADEON_CP_PACKET0 0x00000000 +# define RADEON_ONE_REG_WR (1 << 15) +#define RADEON_CP_PACKET1 0x40000000 +#define RADEON_CP_PACKET2 0x80000000 +#define RADEON_CP_PACKET3 0xC0000000 +# define RADEON_CP_NOP 0x00001000 +# define RADEON_CP_NEXT_CHAR 0x00001900 +# define RADEON_CP_PLY_NEXTSCAN 0x00001D00 +# define RADEON_CP_SET_SCISSORS 0x00001E00 + /* GEN_INDX_PRIM is unsupported starting with R300 */ +# define RADEON_3D_RNDR_GEN_INDX_PRIM 0x00002300 +# define RADEON_WAIT_FOR_IDLE 0x00002600 +# define RADEON_3D_DRAW_VBUF 0x00002800 +# define RADEON_3D_DRAW_IMMD 0x00002900 +# define RADEON_3D_DRAW_INDX 0x00002A00 +# define RADEON_CP_LOAD_PALETTE 0x00002C00 +# define RADEON_3D_LOAD_VBPNTR 0x00002F00 +# define RADEON_MPEG_IDCT_MACROBLOCK 0x00003000 +# define RADEON_MPEG_IDCT_MACROBLOCK_REV 0x00003100 +# define RADEON_3D_CLEAR_ZMASK 0x00003200 +# define RADEON_CP_INDX_BUFFER 0x00003300 +# define RADEON_CP_3D_DRAW_VBUF_2 0x00003400 +# define RADEON_CP_3D_DRAW_IMMD_2 0x00003500 +# define RADEON_CP_3D_DRAW_INDX_2 0x00003600 +# define RADEON_3D_CLEAR_HIZ 0x00003700 +# define RADEON_CP_3D_CLEAR_CMASK 0x00003802 +# define RADEON_CNTL_HOSTDATA_BLT 0x00009400 +# define RADEON_CNTL_PAINT_MULTI 0x00009A00 +# define RADEON_CNTL_BITBLT_MULTI 0x00009B00 +# define RADEON_CNTL_SET_SCISSORS 0xC0001E00 + +#define RADEON_CP_PACKET_MASK 0xC0000000 +#define RADEON_CP_PACKET_COUNT_MASK 0x3fff0000 +#define RADEON_CP_PACKET0_REG_MASK 0x000007ff +#define RADEON_CP_PACKET1_REG0_MASK 0x000007ff +#define RADEON_CP_PACKET1_REG1_MASK 0x003ff800 + +#define RADEON_VTX_Z_PRESENT (1 << 31) +#define RADEON_VTX_PKCOLOR_PRESENT (1 << 3) + +#define RADEON_PRIM_TYPE_NONE (0 << 0) +#define RADEON_PRIM_TYPE_POINT (1 << 0) +#define RADEON_PRIM_TYPE_LINE (2 << 0) +#define RADEON_PRIM_TYPE_LINE_STRIP (3 << 0) +#define RADEON_PRIM_TYPE_TRI_LIST (4 << 0) +#define RADEON_PRIM_TYPE_TRI_FAN (5 << 0) +#define RADEON_PRIM_TYPE_TRI_STRIP (6 << 0) +#define RADEON_PRIM_TYPE_TRI_TYPE2 (7 << 0) +#define RADEON_PRIM_TYPE_RECT_LIST (8 << 0) +#define RADEON_PRIM_TYPE_3VRT_POINT_LIST (9 << 0) +#define RADEON_PRIM_TYPE_3VRT_LINE_LIST (10 << 0) +#define RADEON_PRIM_TYPE_MASK 0xf +#define RADEON_PRIM_WALK_IND (1 << 4) +#define RADEON_PRIM_WALK_LIST (2 << 4) +#define RADEON_PRIM_WALK_RING (3 << 4) +#define RADEON_COLOR_ORDER_BGRA (0 << 6) +#define RADEON_COLOR_ORDER_RGBA (1 << 6) +#define RADEON_MAOS_ENABLE (1 << 7) +#define RADEON_VTX_FMT_R128_MODE (0 << 8) +#define RADEON_VTX_FMT_RADEON_MODE (1 << 8) +#define RADEON_NUM_VERTICES_SHIFT 16 + +#define RADEON_COLOR_FORMAT_CI8 2 +#define RADEON_COLOR_FORMAT_ARGB1555 3 +#define RADEON_COLOR_FORMAT_RGB565 4 +#define RADEON_COLOR_FORMAT_ARGB8888 6 +#define RADEON_COLOR_FORMAT_RGB332 7 +#define RADEON_COLOR_FORMAT_RGB8 9 +#define RADEON_COLOR_FORMAT_ARGB4444 15 + +#define RADEON_TXFORMAT_I8 0 +#define RADEON_TXFORMAT_AI88 1 +#define RADEON_TXFORMAT_RGB332 2 +#define RADEON_TXFORMAT_ARGB1555 3 +#define RADEON_TXFORMAT_RGB565 4 +#define RADEON_TXFORMAT_ARGB4444 5 +#define RADEON_TXFORMAT_ARGB8888 6 +#define RADEON_TXFORMAT_RGBA8888 7 +#define RADEON_TXFORMAT_Y8 8 +#define RADEON_TXFORMAT_VYUY422 10 +#define RADEON_TXFORMAT_YVYU422 11 +#define RADEON_TXFORMAT_DXT1 12 +#define RADEON_TXFORMAT_DXT23 14 +#define RADEON_TXFORMAT_DXT45 15 + +#define R200_PP_TXCBLEND_0 0x2f00 +#define R200_PP_TXCBLEND_1 0x2f10 +#define R200_PP_TXCBLEND_2 0x2f20 +#define R200_PP_TXCBLEND_3 0x2f30 +#define R200_PP_TXCBLEND_4 0x2f40 +#define R200_PP_TXCBLEND_5 0x2f50 +#define R200_PP_TXCBLEND_6 0x2f60 +#define R200_PP_TXCBLEND_7 0x2f70 +#define R200_SE_TCL_LIGHT_MODEL_CTL_0 0x2268 +#define R200_PP_TFACTOR_0 0x2ee0 +#define R200_SE_VTX_FMT_0 0x2088 +#define R200_SE_VAP_CNTL 0x2080 +#define R200_SE_TCL_MATRIX_SEL_0 0x2230 +#define R200_SE_TCL_TEX_PROC_CTL_2 0x22a8 +#define R200_SE_TCL_UCP_VERT_BLEND_CTL 0x22c0 +#define R200_PP_TXFILTER_5 0x2ca0 +#define R200_PP_TXFILTER_4 0x2c80 +#define R200_PP_TXFILTER_3 0x2c60 +#define R200_PP_TXFILTER_2 0x2c40 +#define R200_PP_TXFILTER_1 0x2c20 +#define R200_PP_TXFILTER_0 0x2c00 +#define R200_PP_TXOFFSET_5 0x2d78 +#define R200_PP_TXOFFSET_4 0x2d60 +#define R200_PP_TXOFFSET_3 0x2d48 +#define R200_PP_TXOFFSET_2 0x2d30 +#define R200_PP_TXOFFSET_1 0x2d18 +#define R200_PP_TXOFFSET_0 0x2d00 + +#define R200_PP_CUBIC_FACES_0 0x2c18 +#define R200_PP_CUBIC_FACES_1 0x2c38 +#define R200_PP_CUBIC_FACES_2 0x2c58 +#define R200_PP_CUBIC_FACES_3 0x2c78 +#define R200_PP_CUBIC_FACES_4 0x2c98 +#define R200_PP_CUBIC_FACES_5 0x2cb8 +#define R200_PP_CUBIC_OFFSET_F1_0 0x2d04 +#define R200_PP_CUBIC_OFFSET_F2_0 0x2d08 +#define R200_PP_CUBIC_OFFSET_F3_0 0x2d0c +#define R200_PP_CUBIC_OFFSET_F4_0 0x2d10 +#define R200_PP_CUBIC_OFFSET_F5_0 0x2d14 +#define R200_PP_CUBIC_OFFSET_F1_1 0x2d1c +#define R200_PP_CUBIC_OFFSET_F2_1 0x2d20 +#define R200_PP_CUBIC_OFFSET_F3_1 0x2d24 +#define R200_PP_CUBIC_OFFSET_F4_1 0x2d28 +#define R200_PP_CUBIC_OFFSET_F5_1 0x2d2c +#define R200_PP_CUBIC_OFFSET_F1_2 0x2d34 +#define R200_PP_CUBIC_OFFSET_F2_2 0x2d38 +#define R200_PP_CUBIC_OFFSET_F3_2 0x2d3c +#define R200_PP_CUBIC_OFFSET_F4_2 0x2d40 +#define R200_PP_CUBIC_OFFSET_F5_2 0x2d44 +#define R200_PP_CUBIC_OFFSET_F1_3 0x2d4c +#define R200_PP_CUBIC_OFFSET_F2_3 0x2d50 +#define R200_PP_CUBIC_OFFSET_F3_3 0x2d54 +#define R200_PP_CUBIC_OFFSET_F4_3 0x2d58 +#define R200_PP_CUBIC_OFFSET_F5_3 0x2d5c +#define R200_PP_CUBIC_OFFSET_F1_4 0x2d64 +#define R200_PP_CUBIC_OFFSET_F2_4 0x2d68 +#define R200_PP_CUBIC_OFFSET_F3_4 0x2d6c +#define R200_PP_CUBIC_OFFSET_F4_4 0x2d70 +#define R200_PP_CUBIC_OFFSET_F5_4 0x2d74 +#define R200_PP_CUBIC_OFFSET_F1_5 0x2d7c +#define R200_PP_CUBIC_OFFSET_F2_5 0x2d80 +#define R200_PP_CUBIC_OFFSET_F3_5 0x2d84 +#define R200_PP_CUBIC_OFFSET_F4_5 0x2d88 +#define R200_PP_CUBIC_OFFSET_F5_5 0x2d8c + +#define R200_RE_AUX_SCISSOR_CNTL 0x26f0 +#define R200_SE_VTE_CNTL 0x20b0 +#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL 0x2250 +#define R200_PP_TAM_DEBUG3 0x2d9c +#define R200_PP_CNTL_X 0x2cc4 +#define R200_SE_VAP_CNTL_STATUS 0x2140 +#define R200_RE_SCISSOR_TL_0 0x1cd8 +#define R200_RE_SCISSOR_TL_1 0x1ce0 +#define R200_RE_SCISSOR_TL_2 0x1ce8 +#define R200_RB3D_DEPTHXY_OFFSET 0x1d60 +#define R200_RE_AUX_SCISSOR_CNTL 0x26f0 +#define R200_SE_VTX_STATE_CNTL 0x2180 +#define R200_RE_POINTSIZE 0x2648 +#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0 0x2254 + +#define RADEON_PP_TEX_SIZE_0 0x1d04 /* NPOT */ +#define RADEON_PP_TEX_SIZE_1 0x1d0c +#define RADEON_PP_TEX_SIZE_2 0x1d14 + +#define RADEON_PP_CUBIC_FACES_0 0x1d24 +#define RADEON_PP_CUBIC_FACES_1 0x1d28 +#define RADEON_PP_CUBIC_FACES_2 0x1d2c +#define RADEON_PP_CUBIC_OFFSET_T0_0 0x1dd0 /* bits [31:5] */ +#define RADEON_PP_CUBIC_OFFSET_T1_0 0x1e00 +#define RADEON_PP_CUBIC_OFFSET_T2_0 0x1e14 + +#define RADEON_SE_TCL_STATE_FLUSH 0x2284 + +#define SE_VAP_CNTL__TCL_ENA_MASK 0x00000001 +#define SE_VAP_CNTL__FORCE_W_TO_ONE_MASK 0x00010000 +#define SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT 0x00000012 +#define SE_VTE_CNTL__VTX_XY_FMT_MASK 0x00000100 +#define SE_VTE_CNTL__VTX_Z_FMT_MASK 0x00000200 +#define SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK 0x00000001 +#define SE_VTX_FMT_0__VTX_W0_PRESENT_MASK 0x00000002 +#define SE_VTX_FMT_0__VTX_COLOR_0_FMT__SHIFT 0x0000000b +#define R200_3D_DRAW_IMMD_2 0xC0003500 +#define R200_SE_VTX_FMT_1 0x208c +#define R200_RE_CNTL 0x1c50 + +#define R200_RB3D_BLENDCOLOR 0x3218 + +#define R200_SE_TCL_POINT_SPRITE_CNTL 0x22c4 + +#define R200_PP_TRI_PERF 0x2cf8 + +#define R200_PP_AFS_0 0x2f80 +#define R200_PP_AFS_1 0x2f00 /* same as txcblend_0 */ + +#define R200_VAP_PVS_CNTL_1 0x22D0 + +#define R500_D1CRTC_STATUS 0x609c +#define R500_D2CRTC_STATUS 0x689c +#define R500_CRTC_V_BLANK (1<<0) + +#define R500_D1CRTC_FRAME_COUNT 0x60a4 +#define R500_D2CRTC_FRAME_COUNT 0x68a4 + +#define R500_D1MODE_V_COUNTER 0x6530 +#define R500_D2MODE_V_COUNTER 0x6d30 + +#define R500_D1MODE_VBLANK_STATUS 0x6534 +#define R500_D2MODE_VBLANK_STATUS 0x6d34 +#define R500_VBLANK_OCCURED (1<<0) +#define R500_VBLANK_ACK (1<<4) +#define R500_VBLANK_STAT (1<<12) +#define R500_VBLANK_INT (1<<16) + +#define R500_DxMODE_INT_MASK 0x6540 +#define R500_D1MODE_INT_MASK (1<<0) +#define R500_D2MODE_INT_MASK (1<<8) + +#define R500_DISP_INTERRUPT_STATUS 0x7edc +#define R500_D1_VBLANK_INTERRUPT (1 << 4) +#define R500_D2_VBLANK_INTERRUPT (1 << 5) + +/* Constants */ +#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ + +#define RADEON_LAST_FRAME_REG RADEON_SCRATCH_REG0 +#define RADEON_LAST_DISPATCH_REG RADEON_SCRATCH_REG1 +#define RADEON_LAST_CLEAR_REG RADEON_SCRATCH_REG2 +#define RADEON_LAST_SWI_REG RADEON_SCRATCH_REG3 +#define RADEON_LAST_DISPATCH 1 + +#define RADEON_MAX_VB_AGE 0x7fffffff +#define RADEON_MAX_VB_VERTS (0xffff) + +#define RADEON_RING_HIGH_MARK 128 + +#define RADEON_PCIGART_TABLE_SIZE (32*1024) + +#define RADEON_READ(reg) DRM_READ32( dev_priv->mmio, (reg) ) +#define RADEON_WRITE(reg,val) DRM_WRITE32( dev_priv->mmio, (reg), (val) ) +#define RADEON_READ8(reg) DRM_READ8( dev_priv->mmio, (reg) ) +#define RADEON_WRITE8(reg,val) DRM_WRITE8( dev_priv->mmio, (reg), (val) ) + +#define RADEON_WRITE_PLL(addr, val) \ +do { \ + RADEON_WRITE8(RADEON_CLOCK_CNTL_INDEX, \ + ((addr) & 0x1f) | RADEON_PLL_WR_EN ); \ + RADEON_WRITE(RADEON_CLOCK_CNTL_DATA, (val)); \ +} while (0) + +#define RADEON_WRITE_PCIE(addr, val) \ +do { \ + RADEON_WRITE8(RADEON_PCIE_INDEX, \ + ((addr) & 0xff)); \ + RADEON_WRITE(RADEON_PCIE_DATA, (val)); \ +} while (0) + +#define R500_WRITE_MCIND(addr, val) \ +do { \ + RADEON_WRITE(R520_MC_IND_INDEX, 0xff0000 | ((addr) & 0xff)); \ + RADEON_WRITE(R520_MC_IND_DATA, (val)); \ + RADEON_WRITE(R520_MC_IND_INDEX, 0); \ +} while (0) + +#define RS480_WRITE_MCIND(addr, val) \ +do { \ + RADEON_WRITE(RS480_NB_MC_INDEX, \ + ((addr) & 0xff) | RS480_NB_MC_IND_WR_EN); \ + RADEON_WRITE(RS480_NB_MC_DATA, (val)); \ + RADEON_WRITE(RS480_NB_MC_INDEX, 0xff); \ +} while (0) + +#define RS690_WRITE_MCIND(addr, val) \ +do { \ + RADEON_WRITE(RS690_MC_INDEX, RS690_MC_INDEX_WR_EN | ((addr) & RS690_MC_INDEX_MASK)); \ + RADEON_WRITE(RS690_MC_DATA, val); \ + RADEON_WRITE(RS690_MC_INDEX, RS690_MC_INDEX_WR_ACK); \ +} while (0) + +#define IGP_WRITE_MCIND(addr, val) \ +do { \ + if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) \ + RS690_WRITE_MCIND(addr, val); \ + else \ + RS480_WRITE_MCIND(addr, val); \ +} while (0) + +#define CP_PACKET0( reg, n ) \ + (RADEON_CP_PACKET0 | ((n) << 16) | ((reg) >> 2)) +#define CP_PACKET0_TABLE( reg, n ) \ + (RADEON_CP_PACKET0 | RADEON_ONE_REG_WR | ((n) << 16) | ((reg) >> 2)) +#define CP_PACKET1( reg0, reg1 ) \ + (RADEON_CP_PACKET1 | (((reg1) >> 2) << 15) | ((reg0) >> 2)) +#define CP_PACKET2() \ + (RADEON_CP_PACKET2) +#define CP_PACKET3( pkt, n ) \ + (RADEON_CP_PACKET3 | (pkt) | ((n) << 16)) + +/* ================================================================ + * Engine control helper macros + */ + +#define RADEON_WAIT_UNTIL_2D_IDLE() do { \ + OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) ); \ + OUT_RING( (RADEON_WAIT_2D_IDLECLEAN | \ + RADEON_WAIT_HOST_IDLECLEAN) ); \ +} while (0) + +#define RADEON_WAIT_UNTIL_3D_IDLE() do { \ + OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) ); \ + OUT_RING( (RADEON_WAIT_3D_IDLECLEAN | \ + RADEON_WAIT_HOST_IDLECLEAN) ); \ +} while (0) + +#define RADEON_WAIT_UNTIL_IDLE() do { \ + OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) ); \ + OUT_RING( (RADEON_WAIT_2D_IDLECLEAN | \ + RADEON_WAIT_3D_IDLECLEAN | \ + RADEON_WAIT_HOST_IDLECLEAN) ); \ +} while (0) + +#define RADEON_WAIT_UNTIL_PAGE_FLIPPED() do { \ + OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) ); \ + OUT_RING( RADEON_WAIT_CRTC_PFLIP ); \ +} while (0) + +#define RADEON_FLUSH_CACHE() do { \ + if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \ + OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ + OUT_RING(RADEON_RB3D_DC_FLUSH); \ + } else { \ + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \ + OUT_RING(RADEON_RB3D_DC_FLUSH); \ + } \ +} while (0) + +#define RADEON_PURGE_CACHE() do { \ + if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \ + OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ + OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ + } else { \ + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \ + OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ + } \ +} while (0) + +#define RADEON_FLUSH_ZCACHE() do { \ + if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \ + OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \ + OUT_RING(RADEON_RB3D_ZC_FLUSH); \ + } else { \ + OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0)); \ + OUT_RING(R300_ZC_FLUSH); \ + } \ +} while (0) + +#define RADEON_PURGE_ZCACHE() do { \ + if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \ + OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \ + OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \ + } else { \ + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \ + OUT_RING(R300_ZC_FLUSH_ALL); \ + } \ +} while (0) + +/* ================================================================ + * Misc helper macros + */ + +/* Perfbox functionality only. + */ +#define RING_SPACE_TEST_WITH_RETURN( dev_priv ) \ +do { \ + if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE)) { \ + u32 head = GET_RING_HEAD( dev_priv ); \ + if (head == dev_priv->ring.tail) \ + dev_priv->stats.boxes |= RADEON_BOX_DMA_IDLE; \ + } \ +} while (0) + +#define VB_AGE_TEST_WITH_RETURN( dev_priv ) \ +do { \ + drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; \ + if ( sarea_priv->last_dispatch >= RADEON_MAX_VB_AGE ) { \ + int __ret = radeon_do_cp_idle( dev_priv ); \ + if ( __ret ) return __ret; \ + sarea_priv->last_dispatch = 0; \ + radeon_freelist_reset( dev ); \ + } \ +} while (0) + +#define RADEON_DISPATCH_AGE( age ) do { \ + OUT_RING( CP_PACKET0( RADEON_LAST_DISPATCH_REG, 0 ) ); \ + OUT_RING( age ); \ +} while (0) + +#define RADEON_FRAME_AGE( age ) do { \ + OUT_RING( CP_PACKET0( RADEON_LAST_FRAME_REG, 0 ) ); \ + OUT_RING( age ); \ +} while (0) + +#define RADEON_CLEAR_AGE( age ) do { \ + OUT_RING( CP_PACKET0( RADEON_LAST_CLEAR_REG, 0 ) ); \ + OUT_RING( age ); \ +} while (0) + +/* ================================================================ + * Ring control + */ + +#define RADEON_VERBOSE 0 + +#define RING_LOCALS int write, _nr; unsigned int mask; u32 *ring; + +#define BEGIN_RING( n ) do { \ + if ( RADEON_VERBOSE ) { \ + DRM_INFO( "BEGIN_RING( %d )\n", (n)); \ + } \ + if ( dev_priv->ring.space <= (n) * sizeof(u32) ) { \ + COMMIT_RING(); \ + radeon_wait_ring( dev_priv, (n) * sizeof(u32) ); \ + } \ + _nr = n; dev_priv->ring.space -= (n) * sizeof(u32); \ + ring = dev_priv->ring.start; \ + write = dev_priv->ring.tail; \ + mask = dev_priv->ring.tail_mask; \ +} while (0) + +#define ADVANCE_RING() do { \ + if ( RADEON_VERBOSE ) { \ + DRM_INFO( "ADVANCE_RING() wr=0x%06x tail=0x%06x\n", \ + write, dev_priv->ring.tail ); \ + } \ + if (((dev_priv->ring.tail + _nr) & mask) != write) { \ + DRM_ERROR( \ + "ADVANCE_RING(): mismatch: nr: %x write: %x line: %d\n", \ + ((dev_priv->ring.tail + _nr) & mask), \ + write, __LINE__); \ + } else \ + dev_priv->ring.tail = write; \ +} while (0) + +#define COMMIT_RING() do { \ + /* Flush writes to ring */ \ + DRM_MEMORYBARRIER(); \ + GET_RING_HEAD( dev_priv ); \ + RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail ); \ + /* read from PCI bus to ensure correct posting */ \ + RADEON_READ( RADEON_CP_RB_RPTR ); \ +} while (0) + +#define OUT_RING( x ) do { \ + if ( RADEON_VERBOSE ) { \ + DRM_INFO( " OUT_RING( 0x%08x ) at 0x%x\n", \ + (unsigned int)(x), write ); \ + } \ + ring[write++] = (x); \ + write &= mask; \ +} while (0) + +#define OUT_RING_REG( reg, val ) do { \ + OUT_RING( CP_PACKET0( reg, 0 ) ); \ + OUT_RING( val ); \ +} while (0) + +#define OUT_RING_TABLE( tab, sz ) do { \ + int _size = (sz); \ + int *_tab = (int *)(tab); \ + \ + if (write + _size > mask) { \ + int _i = (mask+1) - write; \ + _size -= _i; \ + while (_i > 0 ) { \ + *(int *)(ring + write) = *_tab++; \ + write++; \ + _i--; \ + } \ + write = 0; \ + _tab += _i; \ + } \ + while (_size > 0) { \ + *(ring + write) = *_tab++; \ + write++; \ + _size--; \ + } \ + write &= mask; \ +} while (0) + +#endif /* __RADEON_DRV_H__ */ diff --git a/drivers/gpu/drm/radeon/radeon_ioc32.c b/drivers/gpu/drm/radeon/radeon_ioc32.c new file mode 100644 index 00000000000..56decda2a71 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_ioc32.c @@ -0,0 +1,424 @@ +/** + * \file radeon_ioc32.c + * + * 32-bit ioctl compatibility routines for the Radeon DRM. + * + * \author Paul Mackerras <paulus@samba.org> + * + * Copyright (C) Paul Mackerras 2005 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include <linux/compat.h> + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" + +typedef struct drm_radeon_init32 { + int func; + u32 sarea_priv_offset; + int is_pci; + int cp_mode; + int gart_size; + int ring_size; + int usec_timeout; + + unsigned int fb_bpp; + unsigned int front_offset, front_pitch; + unsigned int back_offset, back_pitch; + unsigned int depth_bpp; + unsigned int depth_offset, depth_pitch; + + u32 fb_offset; + u32 mmio_offset; + u32 ring_offset; + u32 ring_rptr_offset; + u32 buffers_offset; + u32 gart_textures_offset; +} drm_radeon_init32_t; + +static int compat_radeon_cp_init(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_init32_t init32; + drm_radeon_init_t __user *init; + + if (copy_from_user(&init32, (void __user *)arg, sizeof(init32))) + return -EFAULT; + + init = compat_alloc_user_space(sizeof(*init)); + if (!access_ok(VERIFY_WRITE, init, sizeof(*init)) + || __put_user(init32.func, &init->func) + || __put_user(init32.sarea_priv_offset, &init->sarea_priv_offset) + || __put_user(init32.is_pci, &init->is_pci) + || __put_user(init32.cp_mode, &init->cp_mode) + || __put_user(init32.gart_size, &init->gart_size) + || __put_user(init32.ring_size, &init->ring_size) + || __put_user(init32.usec_timeout, &init->usec_timeout) + || __put_user(init32.fb_bpp, &init->fb_bpp) + || __put_user(init32.front_offset, &init->front_offset) + || __put_user(init32.front_pitch, &init->front_pitch) + || __put_user(init32.back_offset, &init->back_offset) + || __put_user(init32.back_pitch, &init->back_pitch) + || __put_user(init32.depth_bpp, &init->depth_bpp) + || __put_user(init32.depth_offset, &init->depth_offset) + || __put_user(init32.depth_pitch, &init->depth_pitch) + || __put_user(init32.fb_offset, &init->fb_offset) + || __put_user(init32.mmio_offset, &init->mmio_offset) + || __put_user(init32.ring_offset, &init->ring_offset) + || __put_user(init32.ring_rptr_offset, &init->ring_rptr_offset) + || __put_user(init32.buffers_offset, &init->buffers_offset) + || __put_user(init32.gart_textures_offset, + &init->gart_textures_offset)) + return -EFAULT; + + return drm_ioctl(file->f_path.dentry->d_inode, file, + DRM_IOCTL_RADEON_CP_INIT, (unsigned long)init); +} + +typedef struct drm_radeon_clear32 { + unsigned int flags; + unsigned int clear_color; + unsigned int clear_depth; + unsigned int color_mask; + unsigned int depth_mask; /* misnamed field: should be stencil */ + u32 depth_boxes; +} drm_radeon_clear32_t; + +static int compat_radeon_cp_clear(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_clear32_t clr32; + drm_radeon_clear_t __user *clr; + + if (copy_from_user(&clr32, (void __user *)arg, sizeof(clr32))) + return -EFAULT; + + clr = compat_alloc_user_space(sizeof(*clr)); + if (!access_ok(VERIFY_WRITE, clr, sizeof(*clr)) + || __put_user(clr32.flags, &clr->flags) + || __put_user(clr32.clear_color, &clr->clear_color) + || __put_user(clr32.clear_depth, &clr->clear_depth) + || __put_user(clr32.color_mask, &clr->color_mask) + || __put_user(clr32.depth_mask, &clr->depth_mask) + || __put_user((void __user *)(unsigned long)clr32.depth_boxes, + &clr->depth_boxes)) + return -EFAULT; + + return drm_ioctl(file->f_path.dentry->d_inode, file, + DRM_IOCTL_RADEON_CLEAR, (unsigned long)clr); +} + +typedef struct drm_radeon_stipple32 { + u32 mask; +} drm_radeon_stipple32_t; + +static int compat_radeon_cp_stipple(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_stipple32_t __user *argp = (void __user *)arg; + drm_radeon_stipple_t __user *request; + u32 mask; + + if (get_user(mask, &argp->mask)) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user((unsigned int __user *)(unsigned long)mask, + &request->mask)) + return -EFAULT; + + return drm_ioctl(file->f_path.dentry->d_inode, file, + DRM_IOCTL_RADEON_STIPPLE, (unsigned long)request); +} + +typedef struct drm_radeon_tex_image32 { + unsigned int x, y; /* Blit coordinates */ + unsigned int width, height; + u32 data; +} drm_radeon_tex_image32_t; + +typedef struct drm_radeon_texture32 { + unsigned int offset; + int pitch; + int format; + int width; /* Texture image coordinates */ + int height; + u32 image; +} drm_radeon_texture32_t; + +static int compat_radeon_cp_texture(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_texture32_t req32; + drm_radeon_texture_t __user *request; + drm_radeon_tex_image32_t img32; + drm_radeon_tex_image_t __user *image; + + if (copy_from_user(&req32, (void __user *)arg, sizeof(req32))) + return -EFAULT; + if (req32.image == 0) + return -EINVAL; + if (copy_from_user(&img32, (void __user *)(unsigned long)req32.image, + sizeof(img32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request) + sizeof(*image)); + if (!access_ok(VERIFY_WRITE, request, + sizeof(*request) + sizeof(*image))) + return -EFAULT; + image = (drm_radeon_tex_image_t __user *) (request + 1); + + if (__put_user(req32.offset, &request->offset) + || __put_user(req32.pitch, &request->pitch) + || __put_user(req32.format, &request->format) + || __put_user(req32.width, &request->width) + || __put_user(req32.height, &request->height) + || __put_user(image, &request->image) + || __put_user(img32.x, &image->x) + || __put_user(img32.y, &image->y) + || __put_user(img32.width, &image->width) + || __put_user(img32.height, &image->height) + || __put_user((const void __user *)(unsigned long)img32.data, + &image->data)) + return -EFAULT; + + return drm_ioctl(file->f_path.dentry->d_inode, file, + DRM_IOCTL_RADEON_TEXTURE, (unsigned long)request); +} + +typedef struct drm_radeon_vertex2_32 { + int idx; /* Index of vertex buffer */ + int discard; /* Client finished with buffer? */ + int nr_states; + u32 state; + int nr_prims; + u32 prim; +} drm_radeon_vertex2_32_t; + +static int compat_radeon_cp_vertex2(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_vertex2_32_t req32; + drm_radeon_vertex2_t __user *request; + + if (copy_from_user(&req32, (void __user *)arg, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.idx, &request->idx) + || __put_user(req32.discard, &request->discard) + || __put_user(req32.nr_states, &request->nr_states) + || __put_user((void __user *)(unsigned long)req32.state, + &request->state) + || __put_user(req32.nr_prims, &request->nr_prims) + || __put_user((void __user *)(unsigned long)req32.prim, + &request->prim)) + return -EFAULT; + + return drm_ioctl(file->f_path.dentry->d_inode, file, + DRM_IOCTL_RADEON_VERTEX2, (unsigned long)request); +} + +typedef struct drm_radeon_cmd_buffer32 { + int bufsz; + u32 buf; + int nbox; + u32 boxes; +} drm_radeon_cmd_buffer32_t; + +static int compat_radeon_cp_cmdbuf(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_cmd_buffer32_t req32; + drm_radeon_cmd_buffer_t __user *request; + + if (copy_from_user(&req32, (void __user *)arg, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.bufsz, &request->bufsz) + || __put_user((void __user *)(unsigned long)req32.buf, + &request->buf) + || __put_user(req32.nbox, &request->nbox) + || __put_user((void __user *)(unsigned long)req32.boxes, + &request->boxes)) + return -EFAULT; + + return drm_ioctl(file->f_path.dentry->d_inode, file, + DRM_IOCTL_RADEON_CMDBUF, (unsigned long)request); +} + +typedef struct drm_radeon_getparam32 { + int param; + u32 value; +} drm_radeon_getparam32_t; + +static int compat_radeon_cp_getparam(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_getparam32_t req32; + drm_radeon_getparam_t __user *request; + + if (copy_from_user(&req32, (void __user *)arg, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.param, &request->param) + || __put_user((void __user *)(unsigned long)req32.value, + &request->value)) + return -EFAULT; + + return drm_ioctl(file->f_path.dentry->d_inode, file, + DRM_IOCTL_RADEON_GETPARAM, (unsigned long)request); +} + +typedef struct drm_radeon_mem_alloc32 { + int region; + int alignment; + int size; + u32 region_offset; /* offset from start of fb or GART */ +} drm_radeon_mem_alloc32_t; + +static int compat_radeon_mem_alloc(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_mem_alloc32_t req32; + drm_radeon_mem_alloc_t __user *request; + + if (copy_from_user(&req32, (void __user *)arg, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.region, &request->region) + || __put_user(req32.alignment, &request->alignment) + || __put_user(req32.size, &request->size) + || __put_user((int __user *)(unsigned long)req32.region_offset, + &request->region_offset)) + return -EFAULT; + + return drm_ioctl(file->f_path.dentry->d_inode, file, + DRM_IOCTL_RADEON_ALLOC, (unsigned long)request); +} + +typedef struct drm_radeon_irq_emit32 { + u32 irq_seq; +} drm_radeon_irq_emit32_t; + +static int compat_radeon_irq_emit(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_irq_emit32_t req32; + drm_radeon_irq_emit_t __user *request; + + if (copy_from_user(&req32, (void __user *)arg, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user((int __user *)(unsigned long)req32.irq_seq, + &request->irq_seq)) + return -EFAULT; + + return drm_ioctl(file->f_path.dentry->d_inode, file, + DRM_IOCTL_RADEON_IRQ_EMIT, (unsigned long)request); +} + +/* The two 64-bit arches where alignof(u64)==4 in 32-bit code */ +#if defined (CONFIG_X86_64) || defined(CONFIG_IA64) +typedef struct drm_radeon_setparam32 { + int param; + u64 value; +} __attribute__((packed)) drm_radeon_setparam32_t; + +static int compat_radeon_cp_setparam(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_setparam32_t req32; + drm_radeon_setparam_t __user *request; + + if (copy_from_user(&req32, (void __user *) arg, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.param, &request->param) + || __put_user((void __user *)(unsigned long)req32.value, + &request->value)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RADEON_SETPARAM, (unsigned long) request); +} +#else +#define compat_radeon_cp_setparam NULL +#endif /* X86_64 || IA64 */ + +drm_ioctl_compat_t *radeon_compat_ioctls[] = { + [DRM_RADEON_CP_INIT] = compat_radeon_cp_init, + [DRM_RADEON_CLEAR] = compat_radeon_cp_clear, + [DRM_RADEON_STIPPLE] = compat_radeon_cp_stipple, + [DRM_RADEON_TEXTURE] = compat_radeon_cp_texture, + [DRM_RADEON_VERTEX2] = compat_radeon_cp_vertex2, + [DRM_RADEON_CMDBUF] = compat_radeon_cp_cmdbuf, + [DRM_RADEON_GETPARAM] = compat_radeon_cp_getparam, + [DRM_RADEON_SETPARAM] = compat_radeon_cp_setparam, + [DRM_RADEON_ALLOC] = compat_radeon_mem_alloc, + [DRM_RADEON_IRQ_EMIT] = compat_radeon_irq_emit, +}; + +/** + * Called whenever a 32-bit process running under a 64-bit kernel + * performs an ioctl on /dev/dri/card<n>. + * + * \param filp file pointer. + * \param cmd command. + * \param arg user argument. + * \return zero on success or negative number on failure. + */ +long radeon_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + unsigned int nr = DRM_IOCTL_NR(cmd); + drm_ioctl_compat_t *fn = NULL; + int ret; + + if (nr < DRM_COMMAND_BASE) + return drm_compat_ioctl(filp, cmd, arg); + + if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(radeon_compat_ioctls)) + fn = radeon_compat_ioctls[nr - DRM_COMMAND_BASE]; + + lock_kernel(); /* XXX for now */ + if (fn != NULL) + ret = (*fn) (filp, cmd, arg); + else + ret = drm_ioctl(filp->f_path.dentry->d_inode, filp, cmd, arg); + unlock_kernel(); + + return ret; +} diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c new file mode 100644 index 00000000000..ee40d197deb --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_irq.c @@ -0,0 +1,320 @@ +/* radeon_irq.c -- IRQ handling for radeon -*- linux-c -*- */ +/* + * Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + * + * The Weather Channel (TM) funded Tungsten Graphics to develop the + * initial release of the Radeon 8500 driver under the XFree86 license. + * This notice must be preserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@daenzer.net> + */ + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" + +static __inline__ u32 radeon_acknowledge_irqs(drm_radeon_private_t * dev_priv, + u32 mask) +{ + u32 irqs = RADEON_READ(RADEON_GEN_INT_STATUS) & mask; + if (irqs) + RADEON_WRITE(RADEON_GEN_INT_STATUS, irqs); + return irqs; +} + +/* Interrupts - Used for device synchronization and flushing in the + * following circumstances: + * + * - Exclusive FB access with hw idle: + * - Wait for GUI Idle (?) interrupt, then do normal flush. + * + * - Frame throttling, NV_fence: + * - Drop marker irq's into command stream ahead of time. + * - Wait on irq's with lock *not held* + * - Check each for termination condition + * + * - Internally in cp_getbuffer, etc: + * - as above, but wait with lock held??? + * + * NOTE: These functions are misleadingly named -- the irq's aren't + * tied to dma at all, this is just a hangover from dri prehistory. + */ + +irqreturn_t radeon_driver_irq_handler(DRM_IRQ_ARGS) +{ + struct drm_device *dev = (struct drm_device *) arg; + drm_radeon_private_t *dev_priv = + (drm_radeon_private_t *) dev->dev_private; + u32 stat; + + /* Only consider the bits we're interested in - others could be used + * outside the DRM + */ + stat = radeon_acknowledge_irqs(dev_priv, (RADEON_SW_INT_TEST_ACK | + RADEON_CRTC_VBLANK_STAT | + RADEON_CRTC2_VBLANK_STAT)); + if (!stat) + return IRQ_NONE; + + stat &= dev_priv->irq_enable_reg; + + /* SW interrupt */ + if (stat & RADEON_SW_INT_TEST) { + DRM_WAKEUP(&dev_priv->swi_queue); + } + + /* VBLANK interrupt */ + if (stat & (RADEON_CRTC_VBLANK_STAT|RADEON_CRTC2_VBLANK_STAT)) { + int vblank_crtc = dev_priv->vblank_crtc; + + if ((vblank_crtc & + (DRM_RADEON_VBLANK_CRTC1 | DRM_RADEON_VBLANK_CRTC2)) == + (DRM_RADEON_VBLANK_CRTC1 | DRM_RADEON_VBLANK_CRTC2)) { + if (stat & RADEON_CRTC_VBLANK_STAT) + atomic_inc(&dev->vbl_received); + if (stat & RADEON_CRTC2_VBLANK_STAT) + atomic_inc(&dev->vbl_received2); + } else if (((stat & RADEON_CRTC_VBLANK_STAT) && + (vblank_crtc & DRM_RADEON_VBLANK_CRTC1)) || + ((stat & RADEON_CRTC2_VBLANK_STAT) && + (vblank_crtc & DRM_RADEON_VBLANK_CRTC2))) + atomic_inc(&dev->vbl_received); + + DRM_WAKEUP(&dev->vbl_queue); + drm_vbl_send_signals(dev); + } + + return IRQ_HANDLED; +} + +static int radeon_emit_irq(struct drm_device * dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + unsigned int ret; + RING_LOCALS; + + atomic_inc(&dev_priv->swi_emitted); + ret = atomic_read(&dev_priv->swi_emitted); + + BEGIN_RING(4); + OUT_RING_REG(RADEON_LAST_SWI_REG, ret); + OUT_RING_REG(RADEON_GEN_INT_STATUS, RADEON_SW_INT_FIRE); + ADVANCE_RING(); + COMMIT_RING(); + + return ret; +} + +static int radeon_wait_irq(struct drm_device * dev, int swi_nr) +{ + drm_radeon_private_t *dev_priv = + (drm_radeon_private_t *) dev->dev_private; + int ret = 0; + + if (RADEON_READ(RADEON_LAST_SWI_REG) >= swi_nr) + return 0; + + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + + DRM_WAIT_ON(ret, dev_priv->swi_queue, 3 * DRM_HZ, + RADEON_READ(RADEON_LAST_SWI_REG) >= swi_nr); + + return ret; +} + +static int radeon_driver_vblank_do_wait(struct drm_device * dev, + unsigned int *sequence, int crtc) +{ + drm_radeon_private_t *dev_priv = + (drm_radeon_private_t *) dev->dev_private; + unsigned int cur_vblank; + int ret = 0; + int ack = 0; + atomic_t *counter; + if (!dev_priv) { + DRM_ERROR("called with no initialization\n"); + return -EINVAL; + } + + if (crtc == DRM_RADEON_VBLANK_CRTC1) { + counter = &dev->vbl_received; + ack |= RADEON_CRTC_VBLANK_STAT; + } else if (crtc == DRM_RADEON_VBLANK_CRTC2) { + counter = &dev->vbl_received2; + ack |= RADEON_CRTC2_VBLANK_STAT; + } else + return -EINVAL; + + radeon_acknowledge_irqs(dev_priv, ack); + + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + + /* Assume that the user has missed the current sequence number + * by about a day rather than she wants to wait for years + * using vertical blanks... + */ + DRM_WAIT_ON(ret, dev->vbl_queue, 3 * DRM_HZ, + (((cur_vblank = atomic_read(counter)) + - *sequence) <= (1 << 23))); + + *sequence = cur_vblank; + + return ret; +} + +int radeon_driver_vblank_wait(struct drm_device *dev, unsigned int *sequence) +{ + return radeon_driver_vblank_do_wait(dev, sequence, DRM_RADEON_VBLANK_CRTC1); +} + +int radeon_driver_vblank_wait2(struct drm_device *dev, unsigned int *sequence) +{ + return radeon_driver_vblank_do_wait(dev, sequence, DRM_RADEON_VBLANK_CRTC2); +} + +/* Needs the lock as it touches the ring. + */ +int radeon_irq_emit(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_irq_emit_t *emit = data; + int result; + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + if (!dev_priv) { + DRM_ERROR("called with no initialization\n"); + return -EINVAL; + } + + result = radeon_emit_irq(dev); + + if (DRM_COPY_TO_USER(emit->irq_seq, &result, sizeof(int))) { + DRM_ERROR("copy_to_user\n"); + return -EFAULT; + } + + return 0; +} + +/* Doesn't need the hardware lock. + */ +int radeon_irq_wait(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_irq_wait_t *irqwait = data; + + if (!dev_priv) { + DRM_ERROR("called with no initialization\n"); + return -EINVAL; + } + + return radeon_wait_irq(dev, irqwait->irq_seq); +} + +void radeon_enable_interrupt(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = (drm_radeon_private_t *) dev->dev_private; + + dev_priv->irq_enable_reg = RADEON_SW_INT_ENABLE; + if (dev_priv->vblank_crtc & DRM_RADEON_VBLANK_CRTC1) + dev_priv->irq_enable_reg |= RADEON_CRTC_VBLANK_MASK; + + if (dev_priv->vblank_crtc & DRM_RADEON_VBLANK_CRTC2) + dev_priv->irq_enable_reg |= RADEON_CRTC2_VBLANK_MASK; + + RADEON_WRITE(RADEON_GEN_INT_CNTL, dev_priv->irq_enable_reg); + dev_priv->irq_enabled = 1; +} + +/* drm_dma.h hooks +*/ +void radeon_driver_irq_preinstall(struct drm_device * dev) +{ + drm_radeon_private_t *dev_priv = + (drm_radeon_private_t *) dev->dev_private; + + /* Disable *all* interrupts */ + RADEON_WRITE(RADEON_GEN_INT_CNTL, 0); + + /* Clear bits if they're already high */ + radeon_acknowledge_irqs(dev_priv, (RADEON_SW_INT_TEST_ACK | + RADEON_CRTC_VBLANK_STAT | + RADEON_CRTC2_VBLANK_STAT)); +} + +void radeon_driver_irq_postinstall(struct drm_device * dev) +{ + drm_radeon_private_t *dev_priv = + (drm_radeon_private_t *) dev->dev_private; + + atomic_set(&dev_priv->swi_emitted, 0); + DRM_INIT_WAITQUEUE(&dev_priv->swi_queue); + + radeon_enable_interrupt(dev); +} + +void radeon_driver_irq_uninstall(struct drm_device * dev) +{ + drm_radeon_private_t *dev_priv = + (drm_radeon_private_t *) dev->dev_private; + if (!dev_priv) + return; + + dev_priv->irq_enabled = 0; + + /* Disable *all* interrupts */ + RADEON_WRITE(RADEON_GEN_INT_CNTL, 0); +} + + +int radeon_vblank_crtc_get(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = (drm_radeon_private_t *) dev->dev_private; + u32 flag; + u32 value; + + flag = RADEON_READ(RADEON_GEN_INT_CNTL); + value = 0; + + if (flag & RADEON_CRTC_VBLANK_MASK) + value |= DRM_RADEON_VBLANK_CRTC1; + + if (flag & RADEON_CRTC2_VBLANK_MASK) + value |= DRM_RADEON_VBLANK_CRTC2; + return value; +} + +int radeon_vblank_crtc_set(struct drm_device *dev, int64_t value) +{ + drm_radeon_private_t *dev_priv = (drm_radeon_private_t *) dev->dev_private; + if (value & ~(DRM_RADEON_VBLANK_CRTC1 | DRM_RADEON_VBLANK_CRTC2)) { + DRM_ERROR("called with invalid crtc 0x%x\n", (unsigned int)value); + return -EINVAL; + } + dev_priv->vblank_crtc = (unsigned int)value; + radeon_enable_interrupt(dev); + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_mem.c b/drivers/gpu/drm/radeon/radeon_mem.c new file mode 100644 index 00000000000..4af5286a36f --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_mem.c @@ -0,0 +1,302 @@ +/* radeon_mem.c -- Simple GART/fb memory manager for radeon -*- linux-c -*- */ +/* + * Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + * + * The Weather Channel (TM) funded Tungsten Graphics to develop the + * initial release of the Radeon 8500 driver under the XFree86 license. + * This notice must be preserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" + +/* Very simple allocator for GART memory, working on a static range + * already mapped into each client's address space. + */ + +static struct mem_block *split_block(struct mem_block *p, int start, int size, + struct drm_file *file_priv) +{ + /* Maybe cut off the start of an existing block */ + if (start > p->start) { + struct mem_block *newblock = + drm_alloc(sizeof(*newblock), DRM_MEM_BUFS); + if (!newblock) + goto out; + newblock->start = start; + newblock->size = p->size - (start - p->start); + newblock->file_priv = NULL; + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + p->size -= newblock->size; + p = newblock; + } + + /* Maybe cut off the end of an existing block */ + if (size < p->size) { + struct mem_block *newblock = + drm_alloc(sizeof(*newblock), DRM_MEM_BUFS); + if (!newblock) + goto out; + newblock->start = start + size; + newblock->size = p->size - size; + newblock->file_priv = NULL; + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + p->size = size; + } + + out: + /* Our block is in the middle */ + p->file_priv = file_priv; + return p; +} + +static struct mem_block *alloc_block(struct mem_block *heap, int size, + int align2, struct drm_file *file_priv) +{ + struct mem_block *p; + int mask = (1 << align2) - 1; + + list_for_each(p, heap) { + int start = (p->start + mask) & ~mask; + if (p->file_priv == NULL && start + size <= p->start + p->size) + return split_block(p, start, size, file_priv); + } + + return NULL; +} + +static struct mem_block *find_block(struct mem_block *heap, int start) +{ + struct mem_block *p; + + list_for_each(p, heap) + if (p->start == start) + return p; + + return NULL; +} + +static void free_block(struct mem_block *p) +{ + p->file_priv = NULL; + + /* Assumes a single contiguous range. Needs a special file_priv in + * 'heap' to stop it being subsumed. + */ + if (p->next->file_priv == NULL) { + struct mem_block *q = p->next; + p->size += q->size; + p->next = q->next; + p->next->prev = p; + drm_free(q, sizeof(*q), DRM_MEM_BUFS); + } + + if (p->prev->file_priv == NULL) { + struct mem_block *q = p->prev; + q->size += p->size; + q->next = p->next; + q->next->prev = q; + drm_free(p, sizeof(*q), DRM_MEM_BUFS); + } +} + +/* Initialize. How to check for an uninitialized heap? + */ +static int init_heap(struct mem_block **heap, int start, int size) +{ + struct mem_block *blocks = drm_alloc(sizeof(*blocks), DRM_MEM_BUFS); + + if (!blocks) + return -ENOMEM; + + *heap = drm_alloc(sizeof(**heap), DRM_MEM_BUFS); + if (!*heap) { + drm_free(blocks, sizeof(*blocks), DRM_MEM_BUFS); + return -ENOMEM; + } + + blocks->start = start; + blocks->size = size; + blocks->file_priv = NULL; + blocks->next = blocks->prev = *heap; + + memset(*heap, 0, sizeof(**heap)); + (*heap)->file_priv = (struct drm_file *) - 1; + (*heap)->next = (*heap)->prev = blocks; + return 0; +} + +/* Free all blocks associated with the releasing file. + */ +void radeon_mem_release(struct drm_file *file_priv, struct mem_block *heap) +{ + struct mem_block *p; + + if (!heap || !heap->next) + return; + + list_for_each(p, heap) { + if (p->file_priv == file_priv) + p->file_priv = NULL; + } + + /* Assumes a single contiguous range. Needs a special file_priv in + * 'heap' to stop it being subsumed. + */ + list_for_each(p, heap) { + while (p->file_priv == NULL && p->next->file_priv == NULL) { + struct mem_block *q = p->next; + p->size += q->size; + p->next = q->next; + p->next->prev = p; + drm_free(q, sizeof(*q), DRM_MEM_DRIVER); + } + } +} + +/* Shutdown. + */ +void radeon_mem_takedown(struct mem_block **heap) +{ + struct mem_block *p; + + if (!*heap) + return; + + for (p = (*heap)->next; p != *heap;) { + struct mem_block *q = p; + p = p->next; + drm_free(q, sizeof(*q), DRM_MEM_DRIVER); + } + + drm_free(*heap, sizeof(**heap), DRM_MEM_DRIVER); + *heap = NULL; +} + +/* IOCTL HANDLERS */ + +static struct mem_block **get_heap(drm_radeon_private_t * dev_priv, int region) +{ + switch (region) { + case RADEON_MEM_REGION_GART: + return &dev_priv->gart_heap; + case RADEON_MEM_REGION_FB: + return &dev_priv->fb_heap; + default: + return NULL; + } +} + +int radeon_mem_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_mem_alloc_t *alloc = data; + struct mem_block *block, **heap; + + if (!dev_priv) { + DRM_ERROR("called with no initialization\n"); + return -EINVAL; + } + + heap = get_heap(dev_priv, alloc->region); + if (!heap || !*heap) + return -EFAULT; + + /* Make things easier on ourselves: all allocations at least + * 4k aligned. + */ + if (alloc->alignment < 12) + alloc->alignment = 12; + + block = alloc_block(*heap, alloc->size, alloc->alignment, file_priv); + + if (!block) + return -ENOMEM; + + if (DRM_COPY_TO_USER(alloc->region_offset, &block->start, + sizeof(int))) { + DRM_ERROR("copy_to_user\n"); + return -EFAULT; + } + + return 0; +} + +int radeon_mem_free(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_mem_free_t *memfree = data; + struct mem_block *block, **heap; + + if (!dev_priv) { + DRM_ERROR("called with no initialization\n"); + return -EINVAL; + } + + heap = get_heap(dev_priv, memfree->region); + if (!heap || !*heap) + return -EFAULT; + + block = find_block(*heap, memfree->region_offset); + if (!block) + return -EFAULT; + + if (block->file_priv != file_priv) + return -EPERM; + + free_block(block); + return 0; +} + +int radeon_mem_init_heap(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_mem_init_heap_t *initheap = data; + struct mem_block **heap; + + if (!dev_priv) { + DRM_ERROR("called with no initialization\n"); + return -EINVAL; + } + + heap = get_heap(dev_priv, initheap->region); + if (!heap) + return -EFAULT; + + if (*heap) { + DRM_ERROR("heap already initialized?"); + return -EFAULT; + } + + return init_heap(heap, initheap->start, initheap->size); +} diff --git a/drivers/gpu/drm/radeon/radeon_microcode.h b/drivers/gpu/drm/radeon/radeon_microcode.h new file mode 100644 index 00000000000..a348c9e7db1 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_microcode.h @@ -0,0 +1,1844 @@ +/* + * Copyright 2007 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_MICROCODE_H +#define RADEON_MICROCODE_H + +/* production radeon ucode r1xx-r6xx */ +static const u32 R100_cp_microcode[][2] = { + { 0x21007000, 0000000000 }, + { 0x20007000, 0000000000 }, + { 0x000000b4, 0x00000004 }, + { 0x000000b8, 0x00000004 }, + { 0x6f5b4d4c, 0000000000 }, + { 0x4c4c427f, 0000000000 }, + { 0x5b568a92, 0000000000 }, + { 0x4ca09c6d, 0000000000 }, + { 0xad4c4c4c, 0000000000 }, + { 0x4ce1af3d, 0000000000 }, + { 0xd8afafaf, 0000000000 }, + { 0xd64c4cdc, 0000000000 }, + { 0x4cd10d10, 0000000000 }, + { 0x000f0000, 0x00000016 }, + { 0x362f242d, 0000000000 }, + { 0x00000012, 0x00000004 }, + { 0x000f0000, 0x00000016 }, + { 0x362f282d, 0000000000 }, + { 0x000380e7, 0x00000002 }, + { 0x04002c97, 0x00000002 }, + { 0x000f0001, 0x00000016 }, + { 0x333a3730, 0000000000 }, + { 0x000077ef, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x00000021, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00061000, 0x00000002 }, + { 0x00000021, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00061000, 0x00000002 }, + { 0x00000021, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00000017, 0x00000004 }, + { 0x0003802b, 0x00000002 }, + { 0x040067e0, 0x00000002 }, + { 0x00000017, 0x00000004 }, + { 0x000077e0, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x000037e1, 0x00000002 }, + { 0x040067e1, 0x00000006 }, + { 0x000077e0, 0x00000002 }, + { 0x000077e1, 0x00000002 }, + { 0x000077e1, 0x00000006 }, + { 0xffffffff, 0000000000 }, + { 0x10000000, 0000000000 }, + { 0x0003802b, 0x00000002 }, + { 0x040067e0, 0x00000006 }, + { 0x00007675, 0x00000002 }, + { 0x00007676, 0x00000002 }, + { 0x00007677, 0x00000002 }, + { 0x00007678, 0x00000006 }, + { 0x0003802c, 0x00000002 }, + { 0x04002676, 0x00000002 }, + { 0x00007677, 0x00000002 }, + { 0x00007678, 0x00000006 }, + { 0x0000002f, 0x00000018 }, + { 0x0000002f, 0x00000018 }, + { 0000000000, 0x00000006 }, + { 0x00000030, 0x00000018 }, + { 0x00000030, 0x00000018 }, + { 0000000000, 0x00000006 }, + { 0x01605000, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x00098000, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x64c0603e, 0x00000004 }, + { 0x000380e6, 0x00000002 }, + { 0x040025c5, 0x00000002 }, + { 0x00080000, 0x00000016 }, + { 0000000000, 0000000000 }, + { 0x0400251d, 0x00000002 }, + { 0x00007580, 0x00000002 }, + { 0x00067581, 0x00000002 }, + { 0x04002580, 0x00000002 }, + { 0x00067581, 0x00000002 }, + { 0x00000049, 0x00000004 }, + { 0x00005000, 0000000000 }, + { 0x000380e6, 0x00000002 }, + { 0x040025c5, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x0000750e, 0x00000002 }, + { 0x00019000, 0x00000002 }, + { 0x00011055, 0x00000014 }, + { 0x00000055, 0x00000012 }, + { 0x0400250f, 0x00000002 }, + { 0x0000504f, 0x00000004 }, + { 0x000380e6, 0x00000002 }, + { 0x040025c5, 0x00000002 }, + { 0x00007565, 0x00000002 }, + { 0x00007566, 0x00000002 }, + { 0x00000058, 0x00000004 }, + { 0x000380e6, 0x00000002 }, + { 0x040025c5, 0x00000002 }, + { 0x01e655b4, 0x00000002 }, + { 0x4401b0e4, 0x00000002 }, + { 0x01c110e4, 0x00000002 }, + { 0x26667066, 0x00000018 }, + { 0x040c2565, 0x00000002 }, + { 0x00000066, 0x00000018 }, + { 0x04002564, 0x00000002 }, + { 0x00007566, 0x00000002 }, + { 0x0000005d, 0x00000004 }, + { 0x00401069, 0x00000008 }, + { 0x00101000, 0x00000002 }, + { 0x000d80ff, 0x00000002 }, + { 0x0080006c, 0x00000008 }, + { 0x000f9000, 0x00000002 }, + { 0x000e00ff, 0x00000002 }, + { 0000000000, 0x00000006 }, + { 0x0000008f, 0x00000018 }, + { 0x0000005b, 0x00000004 }, + { 0x000380e6, 0x00000002 }, + { 0x040025c5, 0x00000002 }, + { 0x00007576, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x00009000, 0x00000002 }, + { 0x00041000, 0x00000002 }, + { 0x0c00350e, 0x00000002 }, + { 0x00049000, 0x00000002 }, + { 0x00051000, 0x00000002 }, + { 0x01e785f8, 0x00000002 }, + { 0x00200000, 0x00000002 }, + { 0x0060007e, 0x0000000c }, + { 0x00007563, 0x00000002 }, + { 0x006075f0, 0x00000021 }, + { 0x20007073, 0x00000004 }, + { 0x00005073, 0x00000004 }, + { 0x000380e6, 0x00000002 }, + { 0x040025c5, 0x00000002 }, + { 0x00007576, 0x00000002 }, + { 0x00007577, 0x00000002 }, + { 0x0000750e, 0x00000002 }, + { 0x0000750f, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00600083, 0x0000000c }, + { 0x006075f0, 0x00000021 }, + { 0x000075f8, 0x00000002 }, + { 0x00000083, 0x00000004 }, + { 0x000a750e, 0x00000002 }, + { 0x000380e6, 0x00000002 }, + { 0x040025c5, 0x00000002 }, + { 0x0020750f, 0x00000002 }, + { 0x00600086, 0x00000004 }, + { 0x00007570, 0x00000002 }, + { 0x00007571, 0x00000002 }, + { 0x00007572, 0x00000006 }, + { 0x000380e6, 0x00000002 }, + { 0x040025c5, 0x00000002 }, + { 0x00005000, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00007568, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x00000095, 0x0000000c }, + { 0x00058000, 0x00000002 }, + { 0x0c607562, 0x00000002 }, + { 0x00000097, 0x00000004 }, + { 0x000380e6, 0x00000002 }, + { 0x040025c5, 0x00000002 }, + { 0x00600096, 0x00000004 }, + { 0x400070e5, 0000000000 }, + { 0x000380e6, 0x00000002 }, + { 0x040025c5, 0x00000002 }, + { 0x000380e5, 0x00000002 }, + { 0x000000a8, 0x0000001c }, + { 0x000650aa, 0x00000018 }, + { 0x040025bb, 0x00000002 }, + { 0x000610ab, 0x00000018 }, + { 0x040075bc, 0000000000 }, + { 0x000075bb, 0x00000002 }, + { 0x000075bc, 0000000000 }, + { 0x00090000, 0x00000006 }, + { 0x00090000, 0x00000002 }, + { 0x000d8002, 0x00000006 }, + { 0x00007832, 0x00000002 }, + { 0x00005000, 0x00000002 }, + { 0x000380e7, 0x00000002 }, + { 0x04002c97, 0x00000002 }, + { 0x00007820, 0x00000002 }, + { 0x00007821, 0x00000002 }, + { 0x00007800, 0000000000 }, + { 0x01200000, 0x00000002 }, + { 0x20077000, 0x00000002 }, + { 0x01200000, 0x00000002 }, + { 0x20007000, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x0120751b, 0x00000002 }, + { 0x8040750a, 0x00000002 }, + { 0x8040750b, 0x00000002 }, + { 0x00110000, 0x00000002 }, + { 0x000380e5, 0x00000002 }, + { 0x000000c6, 0x0000001c }, + { 0x000610ab, 0x00000018 }, + { 0x844075bd, 0x00000002 }, + { 0x000610aa, 0x00000018 }, + { 0x840075bb, 0x00000002 }, + { 0x000610ab, 0x00000018 }, + { 0x844075bc, 0x00000002 }, + { 0x000000c9, 0x00000004 }, + { 0x804075bd, 0x00000002 }, + { 0x800075bb, 0x00000002 }, + { 0x804075bc, 0x00000002 }, + { 0x00108000, 0x00000002 }, + { 0x01400000, 0x00000002 }, + { 0x006000cd, 0x0000000c }, + { 0x20c07000, 0x00000020 }, + { 0x000000cf, 0x00000012 }, + { 0x00800000, 0x00000006 }, + { 0x0080751d, 0x00000006 }, + { 0000000000, 0000000000 }, + { 0x0000775c, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00661000, 0x00000002 }, + { 0x0460275d, 0x00000020 }, + { 0x00004000, 0000000000 }, + { 0x01e00830, 0x00000002 }, + { 0x21007000, 0000000000 }, + { 0x6464614d, 0000000000 }, + { 0x69687420, 0000000000 }, + { 0x00000073, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x00005000, 0x00000002 }, + { 0x000380d0, 0x00000002 }, + { 0x040025e0, 0x00000002 }, + { 0x000075e1, 0000000000 }, + { 0x00000001, 0000000000 }, + { 0x000380e0, 0x00000002 }, + { 0x04002394, 0x00000002 }, + { 0x00005000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x00000008, 0000000000 }, + { 0x00000004, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, +}; + +static const u32 R200_cp_microcode[][2] = { + { 0x21007000, 0000000000 }, + { 0x20007000, 0000000000 }, + { 0x000000bf, 0x00000004 }, + { 0x000000c3, 0x00000004 }, + { 0x7a685e5d, 0000000000 }, + { 0x5d5d5588, 0000000000 }, + { 0x68659197, 0000000000 }, + { 0x5da19f78, 0000000000 }, + { 0x5d5d5d5d, 0000000000 }, + { 0x5dee5d50, 0000000000 }, + { 0xf2acacac, 0000000000 }, + { 0xe75df9e9, 0000000000 }, + { 0xb1dd0e11, 0000000000 }, + { 0xe2afafaf, 0000000000 }, + { 0x000f0000, 0x00000016 }, + { 0x452f232d, 0000000000 }, + { 0x00000013, 0x00000004 }, + { 0x000f0000, 0x00000016 }, + { 0x452f272d, 0000000000 }, + { 0x000f0001, 0x00000016 }, + { 0x3e4d4a37, 0000000000 }, + { 0x000077ef, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x00000020, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00061000, 0x00000002 }, + { 0x00000020, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00061000, 0x00000002 }, + { 0x00000020, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00000016, 0x00000004 }, + { 0x0003802a, 0x00000002 }, + { 0x040067e0, 0x00000002 }, + { 0x00000016, 0x00000004 }, + { 0x000077e0, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x000037e1, 0x00000002 }, + { 0x040067e1, 0x00000006 }, + { 0x000077e0, 0x00000002 }, + { 0x000077e1, 0x00000002 }, + { 0x000077e1, 0x00000006 }, + { 0xffffffff, 0000000000 }, + { 0x10000000, 0000000000 }, + { 0x07f007f0, 0000000000 }, + { 0x0003802a, 0x00000002 }, + { 0x040067e0, 0x00000006 }, + { 0x0003802c, 0x00000002 }, + { 0x04002741, 0x00000002 }, + { 0x04002741, 0x00000002 }, + { 0x04002743, 0x00000002 }, + { 0x00007675, 0x00000002 }, + { 0x00007676, 0x00000002 }, + { 0x00007677, 0x00000002 }, + { 0x00007678, 0x00000006 }, + { 0x0003802c, 0x00000002 }, + { 0x04002741, 0x00000002 }, + { 0x04002741, 0x00000002 }, + { 0x04002743, 0x00000002 }, + { 0x00007676, 0x00000002 }, + { 0x00007677, 0x00000002 }, + { 0x00007678, 0x00000006 }, + { 0x0003802b, 0x00000002 }, + { 0x04002676, 0x00000002 }, + { 0x00007677, 0x00000002 }, + { 0x0003802c, 0x00000002 }, + { 0x04002741, 0x00000002 }, + { 0x04002743, 0x00000002 }, + { 0x00007678, 0x00000006 }, + { 0x0003802c, 0x00000002 }, + { 0x04002741, 0x00000002 }, + { 0x04002741, 0x00000002 }, + { 0x04002743, 0x00000002 }, + { 0x00007678, 0x00000006 }, + { 0x0000002f, 0x00000018 }, + { 0x0000002f, 0x00000018 }, + { 0000000000, 0x00000006 }, + { 0x00000037, 0x00000018 }, + { 0x00000037, 0x00000018 }, + { 0000000000, 0x00000006 }, + { 0x01605000, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x00098000, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x64c06051, 0x00000004 }, + { 0x00080000, 0x00000016 }, + { 0000000000, 0000000000 }, + { 0x0400251d, 0x00000002 }, + { 0x00007580, 0x00000002 }, + { 0x00067581, 0x00000002 }, + { 0x04002580, 0x00000002 }, + { 0x00067581, 0x00000002 }, + { 0x0000005a, 0x00000004 }, + { 0x00005000, 0000000000 }, + { 0x00061000, 0x00000002 }, + { 0x0000750e, 0x00000002 }, + { 0x00019000, 0x00000002 }, + { 0x00011064, 0x00000014 }, + { 0x00000064, 0x00000012 }, + { 0x0400250f, 0x00000002 }, + { 0x0000505e, 0x00000004 }, + { 0x00007565, 0x00000002 }, + { 0x00007566, 0x00000002 }, + { 0x00000065, 0x00000004 }, + { 0x01e655b4, 0x00000002 }, + { 0x4401b0f0, 0x00000002 }, + { 0x01c110f0, 0x00000002 }, + { 0x26667071, 0x00000018 }, + { 0x040c2565, 0x00000002 }, + { 0x00000071, 0x00000018 }, + { 0x04002564, 0x00000002 }, + { 0x00007566, 0x00000002 }, + { 0x00000068, 0x00000004 }, + { 0x00401074, 0x00000008 }, + { 0x00101000, 0x00000002 }, + { 0x000d80ff, 0x00000002 }, + { 0x00800077, 0x00000008 }, + { 0x000f9000, 0x00000002 }, + { 0x000e00ff, 0x00000002 }, + { 0000000000, 0x00000006 }, + { 0x00000094, 0x00000018 }, + { 0x00000068, 0x00000004 }, + { 0x00007576, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x00009000, 0x00000002 }, + { 0x00041000, 0x00000002 }, + { 0x0c00350e, 0x00000002 }, + { 0x00049000, 0x00000002 }, + { 0x00051000, 0x00000002 }, + { 0x01e785f8, 0x00000002 }, + { 0x00200000, 0x00000002 }, + { 0x00600087, 0x0000000c }, + { 0x00007563, 0x00000002 }, + { 0x006075f0, 0x00000021 }, + { 0x2000707c, 0x00000004 }, + { 0x0000507c, 0x00000004 }, + { 0x00007576, 0x00000002 }, + { 0x00007577, 0x00000002 }, + { 0x0000750e, 0x00000002 }, + { 0x0000750f, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x0060008a, 0x0000000c }, + { 0x006075f0, 0x00000021 }, + { 0x000075f8, 0x00000002 }, + { 0x0000008a, 0x00000004 }, + { 0x000a750e, 0x00000002 }, + { 0x0020750f, 0x00000002 }, + { 0x0060008d, 0x00000004 }, + { 0x00007570, 0x00000002 }, + { 0x00007571, 0x00000002 }, + { 0x00007572, 0x00000006 }, + { 0x00005000, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00007568, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x00000098, 0x0000000c }, + { 0x00058000, 0x00000002 }, + { 0x0c607562, 0x00000002 }, + { 0x0000009a, 0x00000004 }, + { 0x00600099, 0x00000004 }, + { 0x400070f1, 0000000000 }, + { 0x000380f1, 0x00000002 }, + { 0x000000a7, 0x0000001c }, + { 0x000650a9, 0x00000018 }, + { 0x040025bb, 0x00000002 }, + { 0x000610aa, 0x00000018 }, + { 0x040075bc, 0000000000 }, + { 0x000075bb, 0x00000002 }, + { 0x000075bc, 0000000000 }, + { 0x00090000, 0x00000006 }, + { 0x00090000, 0x00000002 }, + { 0x000d8002, 0x00000006 }, + { 0x00005000, 0x00000002 }, + { 0x00007821, 0x00000002 }, + { 0x00007800, 0000000000 }, + { 0x00007821, 0x00000002 }, + { 0x00007800, 0000000000 }, + { 0x01665000, 0x00000002 }, + { 0x000a0000, 0x00000002 }, + { 0x000671cc, 0x00000002 }, + { 0x0286f1cd, 0x00000002 }, + { 0x000000b7, 0x00000010 }, + { 0x21007000, 0000000000 }, + { 0x000000be, 0x0000001c }, + { 0x00065000, 0x00000002 }, + { 0x000a0000, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x000b0000, 0x00000002 }, + { 0x38067000, 0x00000002 }, + { 0x000a00ba, 0x00000004 }, + { 0x20007000, 0000000000 }, + { 0x01200000, 0x00000002 }, + { 0x20077000, 0x00000002 }, + { 0x01200000, 0x00000002 }, + { 0x20007000, 0000000000 }, + { 0x00061000, 0x00000002 }, + { 0x0120751b, 0x00000002 }, + { 0x8040750a, 0x00000002 }, + { 0x8040750b, 0x00000002 }, + { 0x00110000, 0x00000002 }, + { 0x000380f1, 0x00000002 }, + { 0x000000d1, 0x0000001c }, + { 0x000610aa, 0x00000018 }, + { 0x844075bd, 0x00000002 }, + { 0x000610a9, 0x00000018 }, + { 0x840075bb, 0x00000002 }, + { 0x000610aa, 0x00000018 }, + { 0x844075bc, 0x00000002 }, + { 0x000000d4, 0x00000004 }, + { 0x804075bd, 0x00000002 }, + { 0x800075bb, 0x00000002 }, + { 0x804075bc, 0x00000002 }, + { 0x00108000, 0x00000002 }, + { 0x01400000, 0x00000002 }, + { 0x006000d8, 0x0000000c }, + { 0x20c07000, 0x00000020 }, + { 0x000000da, 0x00000012 }, + { 0x00800000, 0x00000006 }, + { 0x0080751d, 0x00000006 }, + { 0x000025bb, 0x00000002 }, + { 0x000040d4, 0x00000004 }, + { 0x0000775c, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00661000, 0x00000002 }, + { 0x0460275d, 0x00000020 }, + { 0x00004000, 0000000000 }, + { 0x00007999, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00661000, 0x00000002 }, + { 0x0460299b, 0x00000020 }, + { 0x00004000, 0000000000 }, + { 0x01e00830, 0x00000002 }, + { 0x21007000, 0000000000 }, + { 0x00005000, 0x00000002 }, + { 0x00038056, 0x00000002 }, + { 0x040025e0, 0x00000002 }, + { 0x000075e1, 0000000000 }, + { 0x00000001, 0000000000 }, + { 0x000380ed, 0x00000002 }, + { 0x04007394, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x000078c4, 0x00000002 }, + { 0x000078c5, 0x00000002 }, + { 0x000078c6, 0x00000002 }, + { 0x00007924, 0x00000002 }, + { 0x00007925, 0x00000002 }, + { 0x00007926, 0x00000002 }, + { 0x000000f2, 0x00000004 }, + { 0x00007924, 0x00000002 }, + { 0x00007925, 0x00000002 }, + { 0x00007926, 0x00000002 }, + { 0x000000f9, 0x00000004 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, +}; + +static const u32 R300_cp_microcode[][2] = { + { 0x4200e000, 0000000000 }, + { 0x4000e000, 0000000000 }, + { 0x000000ae, 0x00000008 }, + { 0x000000b2, 0x00000008 }, + { 0x67554b4a, 0000000000 }, + { 0x4a4a4475, 0000000000 }, + { 0x55527d83, 0000000000 }, + { 0x4a8c8b65, 0000000000 }, + { 0x4aef4af6, 0000000000 }, + { 0x4ae14a4a, 0000000000 }, + { 0xe4979797, 0000000000 }, + { 0xdb4aebdd, 0000000000 }, + { 0x9ccc4a4a, 0000000000 }, + { 0xd1989898, 0000000000 }, + { 0x4a0f9ad6, 0000000000 }, + { 0x000ca000, 0x00000004 }, + { 0x000d0012, 0x00000038 }, + { 0x0000e8b4, 0x00000004 }, + { 0x000d0014, 0x00000038 }, + { 0x0000e8b6, 0x00000004 }, + { 0x000d0016, 0x00000038 }, + { 0x0000e854, 0x00000004 }, + { 0x000d0018, 0x00000038 }, + { 0x0000e855, 0x00000004 }, + { 0x000d001a, 0x00000038 }, + { 0x0000e856, 0x00000004 }, + { 0x000d001c, 0x00000038 }, + { 0x0000e857, 0x00000004 }, + { 0x000d001e, 0x00000038 }, + { 0x0000e824, 0x00000004 }, + { 0x000d0020, 0x00000038 }, + { 0x0000e825, 0x00000004 }, + { 0x000d0022, 0x00000038 }, + { 0x0000e830, 0x00000004 }, + { 0x000d0024, 0x00000038 }, + { 0x0000f0c0, 0x00000004 }, + { 0x000d0026, 0x00000038 }, + { 0x0000f0c1, 0x00000004 }, + { 0x000d0028, 0x00000038 }, + { 0x0000f041, 0x00000004 }, + { 0x000d002a, 0x00000038 }, + { 0x0000f184, 0x00000004 }, + { 0x000d002c, 0x00000038 }, + { 0x0000f185, 0x00000004 }, + { 0x000d002e, 0x00000038 }, + { 0x0000f186, 0x00000004 }, + { 0x000d0030, 0x00000038 }, + { 0x0000f187, 0x00000004 }, + { 0x000d0032, 0x00000038 }, + { 0x0000f180, 0x00000004 }, + { 0x000d0034, 0x00000038 }, + { 0x0000f393, 0x00000004 }, + { 0x000d0036, 0x00000038 }, + { 0x0000f38a, 0x00000004 }, + { 0x000d0038, 0x00000038 }, + { 0x0000f38e, 0x00000004 }, + { 0x0000e821, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00000043, 0x00000018 }, + { 0x00cce800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x0000003a, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x2000451d, 0x00000004 }, + { 0x0000e580, 0x00000004 }, + { 0x000ce581, 0x00000004 }, + { 0x08004580, 0x00000004 }, + { 0x000ce581, 0x00000004 }, + { 0x00000047, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x0000e50e, 0x00000004 }, + { 0x00032000, 0x00000004 }, + { 0x00022051, 0x00000028 }, + { 0x00000051, 0x00000024 }, + { 0x0800450f, 0x00000004 }, + { 0x0000a04b, 0x00000008 }, + { 0x0000e565, 0x00000004 }, + { 0x0000e566, 0x00000004 }, + { 0x00000052, 0x00000008 }, + { 0x03cca5b4, 0x00000004 }, + { 0x05432000, 0x00000004 }, + { 0x00022000, 0x00000004 }, + { 0x4ccce05e, 0x00000030 }, + { 0x08274565, 0x00000004 }, + { 0x0000005e, 0x00000030 }, + { 0x08004564, 0x00000004 }, + { 0x0000e566, 0x00000004 }, + { 0x00000055, 0x00000008 }, + { 0x00802061, 0x00000010 }, + { 0x00202000, 0x00000004 }, + { 0x001b00ff, 0x00000004 }, + { 0x01000064, 0x00000010 }, + { 0x001f2000, 0x00000004 }, + { 0x001c00ff, 0x00000004 }, + { 0000000000, 0x0000000c }, + { 0x00000080, 0x00000030 }, + { 0x00000055, 0x00000008 }, + { 0x0000e576, 0x00000004 }, + { 0x000ca000, 0x00000004 }, + { 0x00012000, 0x00000004 }, + { 0x00082000, 0x00000004 }, + { 0x1800650e, 0x00000004 }, + { 0x00092000, 0x00000004 }, + { 0x000a2000, 0x00000004 }, + { 0x000f0000, 0x00000004 }, + { 0x00400000, 0x00000004 }, + { 0x00000074, 0x00000018 }, + { 0x0000e563, 0x00000004 }, + { 0x00c0e5f9, 0x000000c2 }, + { 0x00000069, 0x00000008 }, + { 0x0000a069, 0x00000008 }, + { 0x0000e576, 0x00000004 }, + { 0x0000e577, 0x00000004 }, + { 0x0000e50e, 0x00000004 }, + { 0x0000e50f, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00000077, 0x00000018 }, + { 0x00c0e5f9, 0x000000c2 }, + { 0x00000077, 0x00000008 }, + { 0x0014e50e, 0x00000004 }, + { 0x0040e50f, 0x00000004 }, + { 0x00c0007a, 0x00000008 }, + { 0x0000e570, 0x00000004 }, + { 0x0000e571, 0x00000004 }, + { 0x0000e572, 0x0000000c }, + { 0x0000a000, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x0000e568, 0x00000004 }, + { 0x000c2000, 0x00000004 }, + { 0x00000084, 0x00000018 }, + { 0x000b0000, 0x00000004 }, + { 0x18c0e562, 0x00000004 }, + { 0x00000086, 0x00000008 }, + { 0x00c00085, 0x00000008 }, + { 0x000700e3, 0x00000004 }, + { 0x00000092, 0x00000038 }, + { 0x000ca094, 0x00000030 }, + { 0x080045bb, 0x00000004 }, + { 0x000c2095, 0x00000030 }, + { 0x0800e5bc, 0000000000 }, + { 0x0000e5bb, 0x00000004 }, + { 0x0000e5bc, 0000000000 }, + { 0x00120000, 0x0000000c }, + { 0x00120000, 0x00000004 }, + { 0x001b0002, 0x0000000c }, + { 0x0000a000, 0x00000004 }, + { 0x0000e821, 0x00000004 }, + { 0x0000e800, 0000000000 }, + { 0x0000e821, 0x00000004 }, + { 0x0000e82e, 0000000000 }, + { 0x02cca000, 0x00000004 }, + { 0x00140000, 0x00000004 }, + { 0x000ce1cc, 0x00000004 }, + { 0x050de1cd, 0x00000004 }, + { 0x00400000, 0x00000004 }, + { 0x000000a4, 0x00000018 }, + { 0x00c0a000, 0x00000004 }, + { 0x000000a1, 0x00000008 }, + { 0x000000a6, 0x00000020 }, + { 0x4200e000, 0000000000 }, + { 0x000000ad, 0x00000038 }, + { 0x000ca000, 0x00000004 }, + { 0x00140000, 0x00000004 }, + { 0x000c2000, 0x00000004 }, + { 0x00160000, 0x00000004 }, + { 0x700ce000, 0x00000004 }, + { 0x001400a9, 0x00000008 }, + { 0x4000e000, 0000000000 }, + { 0x02400000, 0x00000004 }, + { 0x400ee000, 0x00000004 }, + { 0x02400000, 0x00000004 }, + { 0x4000e000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x0240e51b, 0x00000004 }, + { 0x0080e50a, 0x00000005 }, + { 0x0080e50b, 0x00000005 }, + { 0x00220000, 0x00000004 }, + { 0x000700e3, 0x00000004 }, + { 0x000000c0, 0x00000038 }, + { 0x000c2095, 0x00000030 }, + { 0x0880e5bd, 0x00000005 }, + { 0x000c2094, 0x00000030 }, + { 0x0800e5bb, 0x00000005 }, + { 0x000c2095, 0x00000030 }, + { 0x0880e5bc, 0x00000005 }, + { 0x000000c3, 0x00000008 }, + { 0x0080e5bd, 0x00000005 }, + { 0x0000e5bb, 0x00000005 }, + { 0x0080e5bc, 0x00000005 }, + { 0x00210000, 0x00000004 }, + { 0x02800000, 0x00000004 }, + { 0x00c000c7, 0x00000018 }, + { 0x4180e000, 0x00000040 }, + { 0x000000c9, 0x00000024 }, + { 0x01000000, 0x0000000c }, + { 0x0100e51d, 0x0000000c }, + { 0x000045bb, 0x00000004 }, + { 0x000080c3, 0x00000008 }, + { 0x0000f3ce, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c053cf, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x0000f3d2, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c053d3, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x0000f39d, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c0539e, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x03c00830, 0x00000004 }, + { 0x4200e000, 0000000000 }, + { 0x0000a000, 0x00000004 }, + { 0x200045e0, 0x00000004 }, + { 0x0000e5e1, 0000000000 }, + { 0x00000001, 0000000000 }, + { 0x000700e0, 0x00000004 }, + { 0x0800e394, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x0000e8c4, 0x00000004 }, + { 0x0000e8c5, 0x00000004 }, + { 0x0000e8c6, 0x00000004 }, + { 0x0000e928, 0x00000004 }, + { 0x0000e929, 0x00000004 }, + { 0x0000e92a, 0x00000004 }, + { 0x000000e4, 0x00000008 }, + { 0x0000e928, 0x00000004 }, + { 0x0000e929, 0x00000004 }, + { 0x0000e92a, 0x00000004 }, + { 0x000000eb, 0x00000008 }, + { 0x02c02000, 0x00000004 }, + { 0x00060000, 0x00000004 }, + { 0x000000f3, 0x00000034 }, + { 0x000000f0, 0x00000008 }, + { 0x00008000, 0x00000004 }, + { 0xc000e000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x001d0018, 0x00000004 }, + { 0x001a0001, 0x00000004 }, + { 0x000000fb, 0x00000034 }, + { 0x0000004a, 0x00000008 }, + { 0x0500a04a, 0x00000008 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, +}; + +static const u32 R420_cp_microcode[][2] = { + { 0x4200e000, 0000000000 }, + { 0x4000e000, 0000000000 }, + { 0x00000099, 0x00000008 }, + { 0x0000009d, 0x00000008 }, + { 0x4a554b4a, 0000000000 }, + { 0x4a4a4467, 0000000000 }, + { 0x55526f75, 0000000000 }, + { 0x4a7e7d65, 0000000000 }, + { 0xd9d3dff6, 0000000000 }, + { 0x4ac54a4a, 0000000000 }, + { 0xc8828282, 0000000000 }, + { 0xbf4acfc1, 0000000000 }, + { 0x87b04a4a, 0000000000 }, + { 0xb5838383, 0000000000 }, + { 0x4a0f85ba, 0000000000 }, + { 0x000ca000, 0x00000004 }, + { 0x000d0012, 0x00000038 }, + { 0x0000e8b4, 0x00000004 }, + { 0x000d0014, 0x00000038 }, + { 0x0000e8b6, 0x00000004 }, + { 0x000d0016, 0x00000038 }, + { 0x0000e854, 0x00000004 }, + { 0x000d0018, 0x00000038 }, + { 0x0000e855, 0x00000004 }, + { 0x000d001a, 0x00000038 }, + { 0x0000e856, 0x00000004 }, + { 0x000d001c, 0x00000038 }, + { 0x0000e857, 0x00000004 }, + { 0x000d001e, 0x00000038 }, + { 0x0000e824, 0x00000004 }, + { 0x000d0020, 0x00000038 }, + { 0x0000e825, 0x00000004 }, + { 0x000d0022, 0x00000038 }, + { 0x0000e830, 0x00000004 }, + { 0x000d0024, 0x00000038 }, + { 0x0000f0c0, 0x00000004 }, + { 0x000d0026, 0x00000038 }, + { 0x0000f0c1, 0x00000004 }, + { 0x000d0028, 0x00000038 }, + { 0x0000f041, 0x00000004 }, + { 0x000d002a, 0x00000038 }, + { 0x0000f184, 0x00000004 }, + { 0x000d002c, 0x00000038 }, + { 0x0000f185, 0x00000004 }, + { 0x000d002e, 0x00000038 }, + { 0x0000f186, 0x00000004 }, + { 0x000d0030, 0x00000038 }, + { 0x0000f187, 0x00000004 }, + { 0x000d0032, 0x00000038 }, + { 0x0000f180, 0x00000004 }, + { 0x000d0034, 0x00000038 }, + { 0x0000f393, 0x00000004 }, + { 0x000d0036, 0x00000038 }, + { 0x0000f38a, 0x00000004 }, + { 0x000d0038, 0x00000038 }, + { 0x0000f38e, 0x00000004 }, + { 0x0000e821, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00000043, 0x00000018 }, + { 0x00cce800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x0000003a, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x2000451d, 0x00000004 }, + { 0x0000e580, 0x00000004 }, + { 0x000ce581, 0x00000004 }, + { 0x08004580, 0x00000004 }, + { 0x000ce581, 0x00000004 }, + { 0x00000047, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x0000e50e, 0x00000004 }, + { 0x00032000, 0x00000004 }, + { 0x00022051, 0x00000028 }, + { 0x00000051, 0x00000024 }, + { 0x0800450f, 0x00000004 }, + { 0x0000a04b, 0x00000008 }, + { 0x0000e565, 0x00000004 }, + { 0x0000e566, 0x00000004 }, + { 0x00000052, 0x00000008 }, + { 0x03cca5b4, 0x00000004 }, + { 0x05432000, 0x00000004 }, + { 0x00022000, 0x00000004 }, + { 0x4ccce05e, 0x00000030 }, + { 0x08274565, 0x00000004 }, + { 0x0000005e, 0x00000030 }, + { 0x08004564, 0x00000004 }, + { 0x0000e566, 0x00000004 }, + { 0x00000055, 0x00000008 }, + { 0x00802061, 0x00000010 }, + { 0x00202000, 0x00000004 }, + { 0x001b00ff, 0x00000004 }, + { 0x01000064, 0x00000010 }, + { 0x001f2000, 0x00000004 }, + { 0x001c00ff, 0x00000004 }, + { 0000000000, 0x0000000c }, + { 0x00000072, 0x00000030 }, + { 0x00000055, 0x00000008 }, + { 0x0000e576, 0x00000004 }, + { 0x0000e577, 0x00000004 }, + { 0x0000e50e, 0x00000004 }, + { 0x0000e50f, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00000069, 0x00000018 }, + { 0x00c0e5f9, 0x000000c2 }, + { 0x00000069, 0x00000008 }, + { 0x0014e50e, 0x00000004 }, + { 0x0040e50f, 0x00000004 }, + { 0x00c0006c, 0x00000008 }, + { 0x0000e570, 0x00000004 }, + { 0x0000e571, 0x00000004 }, + { 0x0000e572, 0x0000000c }, + { 0x0000a000, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x0000e568, 0x00000004 }, + { 0x000c2000, 0x00000004 }, + { 0x00000076, 0x00000018 }, + { 0x000b0000, 0x00000004 }, + { 0x18c0e562, 0x00000004 }, + { 0x00000078, 0x00000008 }, + { 0x00c00077, 0x00000008 }, + { 0x000700c7, 0x00000004 }, + { 0x00000080, 0x00000038 }, + { 0x0000e5bb, 0x00000004 }, + { 0x0000e5bc, 0000000000 }, + { 0x0000a000, 0x00000004 }, + { 0x0000e821, 0x00000004 }, + { 0x0000e800, 0000000000 }, + { 0x0000e821, 0x00000004 }, + { 0x0000e82e, 0000000000 }, + { 0x02cca000, 0x00000004 }, + { 0x00140000, 0x00000004 }, + { 0x000ce1cc, 0x00000004 }, + { 0x050de1cd, 0x00000004 }, + { 0x00400000, 0x00000004 }, + { 0x0000008f, 0x00000018 }, + { 0x00c0a000, 0x00000004 }, + { 0x0000008c, 0x00000008 }, + { 0x00000091, 0x00000020 }, + { 0x4200e000, 0000000000 }, + { 0x00000098, 0x00000038 }, + { 0x000ca000, 0x00000004 }, + { 0x00140000, 0x00000004 }, + { 0x000c2000, 0x00000004 }, + { 0x00160000, 0x00000004 }, + { 0x700ce000, 0x00000004 }, + { 0x00140094, 0x00000008 }, + { 0x4000e000, 0000000000 }, + { 0x02400000, 0x00000004 }, + { 0x400ee000, 0x00000004 }, + { 0x02400000, 0x00000004 }, + { 0x4000e000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x0240e51b, 0x00000004 }, + { 0x0080e50a, 0x00000005 }, + { 0x0080e50b, 0x00000005 }, + { 0x00220000, 0x00000004 }, + { 0x000700c7, 0x00000004 }, + { 0x000000a4, 0x00000038 }, + { 0x0080e5bd, 0x00000005 }, + { 0x0000e5bb, 0x00000005 }, + { 0x0080e5bc, 0x00000005 }, + { 0x00210000, 0x00000004 }, + { 0x02800000, 0x00000004 }, + { 0x00c000ab, 0x00000018 }, + { 0x4180e000, 0x00000040 }, + { 0x000000ad, 0x00000024 }, + { 0x01000000, 0x0000000c }, + { 0x0100e51d, 0x0000000c }, + { 0x000045bb, 0x00000004 }, + { 0x000080a7, 0x00000008 }, + { 0x0000f3ce, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c053cf, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x0000f3d2, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c053d3, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x0000f39d, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c0539e, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x03c00830, 0x00000004 }, + { 0x4200e000, 0000000000 }, + { 0x0000a000, 0x00000004 }, + { 0x200045e0, 0x00000004 }, + { 0x0000e5e1, 0000000000 }, + { 0x00000001, 0000000000 }, + { 0x000700c4, 0x00000004 }, + { 0x0800e394, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x0000e8c4, 0x00000004 }, + { 0x0000e8c5, 0x00000004 }, + { 0x0000e8c6, 0x00000004 }, + { 0x0000e928, 0x00000004 }, + { 0x0000e929, 0x00000004 }, + { 0x0000e92a, 0x00000004 }, + { 0x000000c8, 0x00000008 }, + { 0x0000e928, 0x00000004 }, + { 0x0000e929, 0x00000004 }, + { 0x0000e92a, 0x00000004 }, + { 0x000000cf, 0x00000008 }, + { 0x02c02000, 0x00000004 }, + { 0x00060000, 0x00000004 }, + { 0x000000d7, 0x00000034 }, + { 0x000000d4, 0x00000008 }, + { 0x00008000, 0x00000004 }, + { 0xc000e000, 0000000000 }, + { 0x0000e1cc, 0x00000004 }, + { 0x0500e1cd, 0x00000004 }, + { 0x000ca000, 0x00000004 }, + { 0x000000de, 0x00000034 }, + { 0x000000da, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x0019e1cc, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x0500a000, 0x00000004 }, + { 0x080041cd, 0x00000004 }, + { 0x000ca000, 0x00000004 }, + { 0x000000fb, 0x00000034 }, + { 0x0000004a, 0x00000008 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x001d0018, 0x00000004 }, + { 0x001a0001, 0x00000004 }, + { 0x000000fb, 0x00000034 }, + { 0x0000004a, 0x00000008 }, + { 0x0500a04a, 0x00000008 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, +}; + +static const u32 RS600_cp_microcode[][2] = { + { 0x4200e000, 0000000000 }, + { 0x4000e000, 0000000000 }, + { 0x000000a0, 0x00000008 }, + { 0x000000a4, 0x00000008 }, + { 0x4a554b4a, 0000000000 }, + { 0x4a4a4467, 0000000000 }, + { 0x55526f75, 0000000000 }, + { 0x4a7e7d65, 0000000000 }, + { 0x4ae74af6, 0000000000 }, + { 0x4ad34a4a, 0000000000 }, + { 0xd6898989, 0000000000 }, + { 0xcd4addcf, 0000000000 }, + { 0x8ebe4ae2, 0000000000 }, + { 0xc38a8a8a, 0000000000 }, + { 0x4a0f8cc8, 0000000000 }, + { 0x000ca000, 0x00000004 }, + { 0x000d0012, 0x00000038 }, + { 0x0000e8b4, 0x00000004 }, + { 0x000d0014, 0x00000038 }, + { 0x0000e8b6, 0x00000004 }, + { 0x000d0016, 0x00000038 }, + { 0x0000e854, 0x00000004 }, + { 0x000d0018, 0x00000038 }, + { 0x0000e855, 0x00000004 }, + { 0x000d001a, 0x00000038 }, + { 0x0000e856, 0x00000004 }, + { 0x000d001c, 0x00000038 }, + { 0x0000e857, 0x00000004 }, + { 0x000d001e, 0x00000038 }, + { 0x0000e824, 0x00000004 }, + { 0x000d0020, 0x00000038 }, + { 0x0000e825, 0x00000004 }, + { 0x000d0022, 0x00000038 }, + { 0x0000e830, 0x00000004 }, + { 0x000d0024, 0x00000038 }, + { 0x0000f0c0, 0x00000004 }, + { 0x000d0026, 0x00000038 }, + { 0x0000f0c1, 0x00000004 }, + { 0x000d0028, 0x00000038 }, + { 0x0000f041, 0x00000004 }, + { 0x000d002a, 0x00000038 }, + { 0x0000f184, 0x00000004 }, + { 0x000d002c, 0x00000038 }, + { 0x0000f185, 0x00000004 }, + { 0x000d002e, 0x00000038 }, + { 0x0000f186, 0x00000004 }, + { 0x000d0030, 0x00000038 }, + { 0x0000f187, 0x00000004 }, + { 0x000d0032, 0x00000038 }, + { 0x0000f180, 0x00000004 }, + { 0x000d0034, 0x00000038 }, + { 0x0000f393, 0x00000004 }, + { 0x000d0036, 0x00000038 }, + { 0x0000f38a, 0x00000004 }, + { 0x000d0038, 0x00000038 }, + { 0x0000f38e, 0x00000004 }, + { 0x0000e821, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00000043, 0x00000018 }, + { 0x00cce800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x0000003a, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x2000451d, 0x00000004 }, + { 0x0000e580, 0x00000004 }, + { 0x000ce581, 0x00000004 }, + { 0x08004580, 0x00000004 }, + { 0x000ce581, 0x00000004 }, + { 0x00000047, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x0000e50e, 0x00000004 }, + { 0x00032000, 0x00000004 }, + { 0x00022051, 0x00000028 }, + { 0x00000051, 0x00000024 }, + { 0x0800450f, 0x00000004 }, + { 0x0000a04b, 0x00000008 }, + { 0x0000e565, 0x00000004 }, + { 0x0000e566, 0x00000004 }, + { 0x00000052, 0x00000008 }, + { 0x03cca5b4, 0x00000004 }, + { 0x05432000, 0x00000004 }, + { 0x00022000, 0x00000004 }, + { 0x4ccce05e, 0x00000030 }, + { 0x08274565, 0x00000004 }, + { 0x0000005e, 0x00000030 }, + { 0x08004564, 0x00000004 }, + { 0x0000e566, 0x00000004 }, + { 0x00000055, 0x00000008 }, + { 0x00802061, 0x00000010 }, + { 0x00202000, 0x00000004 }, + { 0x001b00ff, 0x00000004 }, + { 0x01000064, 0x00000010 }, + { 0x001f2000, 0x00000004 }, + { 0x001c00ff, 0x00000004 }, + { 0000000000, 0x0000000c }, + { 0x00000072, 0x00000030 }, + { 0x00000055, 0x00000008 }, + { 0x0000e576, 0x00000004 }, + { 0x0000e577, 0x00000004 }, + { 0x0000e50e, 0x00000004 }, + { 0x0000e50f, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00000069, 0x00000018 }, + { 0x00c0e5f9, 0x000000c2 }, + { 0x00000069, 0x00000008 }, + { 0x0014e50e, 0x00000004 }, + { 0x0040e50f, 0x00000004 }, + { 0x00c0006c, 0x00000008 }, + { 0x0000e570, 0x00000004 }, + { 0x0000e571, 0x00000004 }, + { 0x0000e572, 0x0000000c }, + { 0x0000a000, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x0000e568, 0x00000004 }, + { 0x000c2000, 0x00000004 }, + { 0x00000076, 0x00000018 }, + { 0x000b0000, 0x00000004 }, + { 0x18c0e562, 0x00000004 }, + { 0x00000078, 0x00000008 }, + { 0x00c00077, 0x00000008 }, + { 0x000700d5, 0x00000004 }, + { 0x00000084, 0x00000038 }, + { 0x000ca086, 0x00000030 }, + { 0x080045bb, 0x00000004 }, + { 0x000c2087, 0x00000030 }, + { 0x0800e5bc, 0000000000 }, + { 0x0000e5bb, 0x00000004 }, + { 0x0000e5bc, 0000000000 }, + { 0x00120000, 0x0000000c }, + { 0x00120000, 0x00000004 }, + { 0x001b0002, 0x0000000c }, + { 0x0000a000, 0x00000004 }, + { 0x0000e821, 0x00000004 }, + { 0x0000e800, 0000000000 }, + { 0x0000e821, 0x00000004 }, + { 0x0000e82e, 0000000000 }, + { 0x02cca000, 0x00000004 }, + { 0x00140000, 0x00000004 }, + { 0x000ce1cc, 0x00000004 }, + { 0x050de1cd, 0x00000004 }, + { 0x00400000, 0x00000004 }, + { 0x00000096, 0x00000018 }, + { 0x00c0a000, 0x00000004 }, + { 0x00000093, 0x00000008 }, + { 0x00000098, 0x00000020 }, + { 0x4200e000, 0000000000 }, + { 0x0000009f, 0x00000038 }, + { 0x000ca000, 0x00000004 }, + { 0x00140000, 0x00000004 }, + { 0x000c2000, 0x00000004 }, + { 0x00160000, 0x00000004 }, + { 0x700ce000, 0x00000004 }, + { 0x0014009b, 0x00000008 }, + { 0x4000e000, 0000000000 }, + { 0x02400000, 0x00000004 }, + { 0x400ee000, 0x00000004 }, + { 0x02400000, 0x00000004 }, + { 0x4000e000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x0240e51b, 0x00000004 }, + { 0x0080e50a, 0x00000005 }, + { 0x0080e50b, 0x00000005 }, + { 0x00220000, 0x00000004 }, + { 0x000700d5, 0x00000004 }, + { 0x000000b2, 0x00000038 }, + { 0x000c2087, 0x00000030 }, + { 0x0880e5bd, 0x00000005 }, + { 0x000c2086, 0x00000030 }, + { 0x0800e5bb, 0x00000005 }, + { 0x000c2087, 0x00000030 }, + { 0x0880e5bc, 0x00000005 }, + { 0x000000b5, 0x00000008 }, + { 0x0080e5bd, 0x00000005 }, + { 0x0000e5bb, 0x00000005 }, + { 0x0080e5bc, 0x00000005 }, + { 0x00210000, 0x00000004 }, + { 0x02800000, 0x00000004 }, + { 0x00c000b9, 0x00000018 }, + { 0x4180e000, 0x00000040 }, + { 0x000000bb, 0x00000024 }, + { 0x01000000, 0x0000000c }, + { 0x0100e51d, 0x0000000c }, + { 0x000045bb, 0x00000004 }, + { 0x000080b5, 0x00000008 }, + { 0x0000f3ce, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c053cf, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x0000f3d2, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c053d3, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x0000f39d, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c0539e, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x03c00830, 0x00000004 }, + { 0x4200e000, 0000000000 }, + { 0x0000a000, 0x00000004 }, + { 0x200045e0, 0x00000004 }, + { 0x0000e5e1, 0000000000 }, + { 0x00000001, 0000000000 }, + { 0x000700d2, 0x00000004 }, + { 0x0800e394, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x0000e8c4, 0x00000004 }, + { 0x0000e8c5, 0x00000004 }, + { 0x0000e8c6, 0x00000004 }, + { 0x0000e928, 0x00000004 }, + { 0x0000e929, 0x00000004 }, + { 0x0000e92a, 0x00000004 }, + { 0x000000d6, 0x00000008 }, + { 0x0000e928, 0x00000004 }, + { 0x0000e929, 0x00000004 }, + { 0x0000e92a, 0x00000004 }, + { 0x000000dd, 0x00000008 }, + { 0x00e00116, 0000000000 }, + { 0x000700e1, 0x00000004 }, + { 0x0800401c, 0x00000004 }, + { 0x200050e7, 0x00000004 }, + { 0x0000e01d, 0x00000004 }, + { 0x000000e4, 0x00000008 }, + { 0x02c02000, 0x00000004 }, + { 0x00060000, 0x00000004 }, + { 0x000000eb, 0x00000034 }, + { 0x000000e8, 0x00000008 }, + { 0x00008000, 0x00000004 }, + { 0xc000e000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x001d0018, 0x00000004 }, + { 0x001a0001, 0x00000004 }, + { 0x000000fb, 0x00000034 }, + { 0x0000004a, 0x00000008 }, + { 0x0500a04a, 0x00000008 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, +}; + +static const u32 RS690_cp_microcode[][2] = { + { 0x000000dd, 0x00000008 }, + { 0x000000df, 0x00000008 }, + { 0x000000a0, 0x00000008 }, + { 0x000000a4, 0x00000008 }, + { 0x4a554b4a, 0000000000 }, + { 0x4a4a4467, 0000000000 }, + { 0x55526f75, 0000000000 }, + { 0x4a7e7d65, 0000000000 }, + { 0x4ad74af6, 0000000000 }, + { 0x4ac94a4a, 0000000000 }, + { 0xcc898989, 0000000000 }, + { 0xc34ad3c5, 0000000000 }, + { 0x8e4a4a4a, 0000000000 }, + { 0x4a8a8a8a, 0000000000 }, + { 0x4a0f8c4a, 0000000000 }, + { 0x000ca000, 0x00000004 }, + { 0x000d0012, 0x00000038 }, + { 0x0000e8b4, 0x00000004 }, + { 0x000d0014, 0x00000038 }, + { 0x0000e8b6, 0x00000004 }, + { 0x000d0016, 0x00000038 }, + { 0x0000e854, 0x00000004 }, + { 0x000d0018, 0x00000038 }, + { 0x0000e855, 0x00000004 }, + { 0x000d001a, 0x00000038 }, + { 0x0000e856, 0x00000004 }, + { 0x000d001c, 0x00000038 }, + { 0x0000e857, 0x00000004 }, + { 0x000d001e, 0x00000038 }, + { 0x0000e824, 0x00000004 }, + { 0x000d0020, 0x00000038 }, + { 0x0000e825, 0x00000004 }, + { 0x000d0022, 0x00000038 }, + { 0x0000e830, 0x00000004 }, + { 0x000d0024, 0x00000038 }, + { 0x0000f0c0, 0x00000004 }, + { 0x000d0026, 0x00000038 }, + { 0x0000f0c1, 0x00000004 }, + { 0x000d0028, 0x00000038 }, + { 0x0000f041, 0x00000004 }, + { 0x000d002a, 0x00000038 }, + { 0x0000f184, 0x00000004 }, + { 0x000d002c, 0x00000038 }, + { 0x0000f185, 0x00000004 }, + { 0x000d002e, 0x00000038 }, + { 0x0000f186, 0x00000004 }, + { 0x000d0030, 0x00000038 }, + { 0x0000f187, 0x00000004 }, + { 0x000d0032, 0x00000038 }, + { 0x0000f180, 0x00000004 }, + { 0x000d0034, 0x00000038 }, + { 0x0000f393, 0x00000004 }, + { 0x000d0036, 0x00000038 }, + { 0x0000f38a, 0x00000004 }, + { 0x000d0038, 0x00000038 }, + { 0x0000f38e, 0x00000004 }, + { 0x0000e821, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00000043, 0x00000018 }, + { 0x00cce800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x0000003a, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x2000451d, 0x00000004 }, + { 0x0000e580, 0x00000004 }, + { 0x000ce581, 0x00000004 }, + { 0x08004580, 0x00000004 }, + { 0x000ce581, 0x00000004 }, + { 0x00000047, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x0000e50e, 0x00000004 }, + { 0x00032000, 0x00000004 }, + { 0x00022051, 0x00000028 }, + { 0x00000051, 0x00000024 }, + { 0x0800450f, 0x00000004 }, + { 0x0000a04b, 0x00000008 }, + { 0x0000e565, 0x00000004 }, + { 0x0000e566, 0x00000004 }, + { 0x00000052, 0x00000008 }, + { 0x03cca5b4, 0x00000004 }, + { 0x05432000, 0x00000004 }, + { 0x00022000, 0x00000004 }, + { 0x4ccce05e, 0x00000030 }, + { 0x08274565, 0x00000004 }, + { 0x0000005e, 0x00000030 }, + { 0x08004564, 0x00000004 }, + { 0x0000e566, 0x00000004 }, + { 0x00000055, 0x00000008 }, + { 0x00802061, 0x00000010 }, + { 0x00202000, 0x00000004 }, + { 0x001b00ff, 0x00000004 }, + { 0x01000064, 0x00000010 }, + { 0x001f2000, 0x00000004 }, + { 0x001c00ff, 0x00000004 }, + { 0000000000, 0x0000000c }, + { 0x00000072, 0x00000030 }, + { 0x00000055, 0x00000008 }, + { 0x0000e576, 0x00000004 }, + { 0x0000e577, 0x00000004 }, + { 0x0000e50e, 0x00000004 }, + { 0x0000e50f, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00000069, 0x00000018 }, + { 0x00c0e5f9, 0x000000c2 }, + { 0x00000069, 0x00000008 }, + { 0x0014e50e, 0x00000004 }, + { 0x0040e50f, 0x00000004 }, + { 0x00c0006c, 0x00000008 }, + { 0x0000e570, 0x00000004 }, + { 0x0000e571, 0x00000004 }, + { 0x0000e572, 0x0000000c }, + { 0x0000a000, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x0000e568, 0x00000004 }, + { 0x000c2000, 0x00000004 }, + { 0x00000076, 0x00000018 }, + { 0x000b0000, 0x00000004 }, + { 0x18c0e562, 0x00000004 }, + { 0x00000078, 0x00000008 }, + { 0x00c00077, 0x00000008 }, + { 0x000700cb, 0x00000004 }, + { 0x00000084, 0x00000038 }, + { 0x000ca086, 0x00000030 }, + { 0x080045bb, 0x00000004 }, + { 0x000c2087, 0x00000030 }, + { 0x0800e5bc, 0000000000 }, + { 0x0000e5bb, 0x00000004 }, + { 0x0000e5bc, 0000000000 }, + { 0x00120000, 0x0000000c }, + { 0x00120000, 0x00000004 }, + { 0x001b0002, 0x0000000c }, + { 0x0000a000, 0x00000004 }, + { 0x0000e821, 0x00000004 }, + { 0x0000e800, 0000000000 }, + { 0x0000e821, 0x00000004 }, + { 0x0000e82e, 0000000000 }, + { 0x02cca000, 0x00000004 }, + { 0x00140000, 0x00000004 }, + { 0x000ce1cc, 0x00000004 }, + { 0x050de1cd, 0x00000004 }, + { 0x00400000, 0x00000004 }, + { 0x00000096, 0x00000018 }, + { 0x00c0a000, 0x00000004 }, + { 0x00000093, 0x00000008 }, + { 0x00000098, 0x00000020 }, + { 0x4200e000, 0000000000 }, + { 0x0000009f, 0x00000038 }, + { 0x000ca000, 0x00000004 }, + { 0x00140000, 0x00000004 }, + { 0x000c2000, 0x00000004 }, + { 0x00160000, 0x00000004 }, + { 0x700ce000, 0x00000004 }, + { 0x0014009b, 0x00000008 }, + { 0x4000e000, 0000000000 }, + { 0x02400000, 0x00000004 }, + { 0x400ee000, 0x00000004 }, + { 0x02400000, 0x00000004 }, + { 0x4000e000, 0000000000 }, + { 0x00100000, 0x0000002c }, + { 0x00004000, 0000000000 }, + { 0x080045c8, 0x00000004 }, + { 0x00240005, 0x00000004 }, + { 0x08004d0b, 0x00000004 }, + { 0x000c2000, 0x00000004 }, + { 0x0240e51b, 0x00000004 }, + { 0x0080e50a, 0x00000005 }, + { 0x0080e50b, 0x00000005 }, + { 0x00220000, 0x00000004 }, + { 0x000700cb, 0x00000004 }, + { 0x000000b7, 0x00000038 }, + { 0x000c2087, 0x00000030 }, + { 0x0880e5bd, 0x00000005 }, + { 0x000c2086, 0x00000030 }, + { 0x0800e5bb, 0x00000005 }, + { 0x000c2087, 0x00000030 }, + { 0x0880e5bc, 0x00000005 }, + { 0x000000ba, 0x00000008 }, + { 0x0080e5bd, 0x00000005 }, + { 0x0000e5bb, 0x00000005 }, + { 0x0080e5bc, 0x00000005 }, + { 0x00210000, 0x00000004 }, + { 0x02800000, 0x00000004 }, + { 0x00c000be, 0x00000018 }, + { 0x4180e000, 0x00000040 }, + { 0x000000c0, 0x00000024 }, + { 0x01000000, 0x0000000c }, + { 0x0100e51d, 0x0000000c }, + { 0x000045bb, 0x00000004 }, + { 0x000080ba, 0x00000008 }, + { 0x03c00830, 0x00000004 }, + { 0x4200e000, 0000000000 }, + { 0x0000a000, 0x00000004 }, + { 0x200045e0, 0x00000004 }, + { 0x0000e5e1, 0000000000 }, + { 0x00000001, 0000000000 }, + { 0x000700c8, 0x00000004 }, + { 0x0800e394, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x0000e8c4, 0x00000004 }, + { 0x0000e8c5, 0x00000004 }, + { 0x0000e8c6, 0x00000004 }, + { 0x0000e928, 0x00000004 }, + { 0x0000e929, 0x00000004 }, + { 0x0000e92a, 0x00000004 }, + { 0x000000cc, 0x00000008 }, + { 0x0000e928, 0x00000004 }, + { 0x0000e929, 0x00000004 }, + { 0x0000e92a, 0x00000004 }, + { 0x000000d3, 0x00000008 }, + { 0x02c02000, 0x00000004 }, + { 0x00060000, 0x00000004 }, + { 0x000000db, 0x00000034 }, + { 0x000000d8, 0x00000008 }, + { 0x00008000, 0x00000004 }, + { 0xc000e000, 0000000000 }, + { 0x000000e1, 0x00000030 }, + { 0x4200e000, 0000000000 }, + { 0x000000e1, 0x00000030 }, + { 0x4000e000, 0000000000 }, + { 0x0025001b, 0x00000004 }, + { 0x00230000, 0x00000004 }, + { 0x00250005, 0x00000004 }, + { 0x000000e6, 0x00000034 }, + { 0000000000, 0x0000000c }, + { 0x00244000, 0x00000004 }, + { 0x080045c8, 0x00000004 }, + { 0x00240005, 0x00000004 }, + { 0x08004d0b, 0x0000000c }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x001d0018, 0x00000004 }, + { 0x001a0001, 0x00000004 }, + { 0x000000fb, 0x00000034 }, + { 0x0000004a, 0x00000008 }, + { 0x0500a04a, 0x00000008 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, +}; + +static const u32 R520_cp_microcode[][2] = { + { 0x4200e000, 0000000000 }, + { 0x4000e000, 0000000000 }, + { 0x00000099, 0x00000008 }, + { 0x0000009d, 0x00000008 }, + { 0x4a554b4a, 0000000000 }, + { 0x4a4a4467, 0000000000 }, + { 0x55526f75, 0000000000 }, + { 0x4a7e7d65, 0000000000 }, + { 0xe0dae6f6, 0000000000 }, + { 0x4ac54a4a, 0000000000 }, + { 0xc8828282, 0000000000 }, + { 0xbf4acfc1, 0000000000 }, + { 0x87b04ad5, 0000000000 }, + { 0xb5838383, 0000000000 }, + { 0x4a0f85ba, 0000000000 }, + { 0x000ca000, 0x00000004 }, + { 0x000d0012, 0x00000038 }, + { 0x0000e8b4, 0x00000004 }, + { 0x000d0014, 0x00000038 }, + { 0x0000e8b6, 0x00000004 }, + { 0x000d0016, 0x00000038 }, + { 0x0000e854, 0x00000004 }, + { 0x000d0018, 0x00000038 }, + { 0x0000e855, 0x00000004 }, + { 0x000d001a, 0x00000038 }, + { 0x0000e856, 0x00000004 }, + { 0x000d001c, 0x00000038 }, + { 0x0000e857, 0x00000004 }, + { 0x000d001e, 0x00000038 }, + { 0x0000e824, 0x00000004 }, + { 0x000d0020, 0x00000038 }, + { 0x0000e825, 0x00000004 }, + { 0x000d0022, 0x00000038 }, + { 0x0000e830, 0x00000004 }, + { 0x000d0024, 0x00000038 }, + { 0x0000f0c0, 0x00000004 }, + { 0x000d0026, 0x00000038 }, + { 0x0000f0c1, 0x00000004 }, + { 0x000d0028, 0x00000038 }, + { 0x0000e000, 0x00000004 }, + { 0x000d002a, 0x00000038 }, + { 0x0000e000, 0x00000004 }, + { 0x000d002c, 0x00000038 }, + { 0x0000e000, 0x00000004 }, + { 0x000d002e, 0x00000038 }, + { 0x0000e000, 0x00000004 }, + { 0x000d0030, 0x00000038 }, + { 0x0000e000, 0x00000004 }, + { 0x000d0032, 0x00000038 }, + { 0x0000f180, 0x00000004 }, + { 0x000d0034, 0x00000038 }, + { 0x0000f393, 0x00000004 }, + { 0x000d0036, 0x00000038 }, + { 0x0000f38a, 0x00000004 }, + { 0x000d0038, 0x00000038 }, + { 0x0000f38e, 0x00000004 }, + { 0x0000e821, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00000043, 0x00000018 }, + { 0x00cce800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x08004800, 0x00000004 }, + { 0x0000003a, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x2000451d, 0x00000004 }, + { 0x0000e580, 0x00000004 }, + { 0x000ce581, 0x00000004 }, + { 0x08004580, 0x00000004 }, + { 0x000ce581, 0x00000004 }, + { 0x00000047, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x0000e50e, 0x00000004 }, + { 0x00032000, 0x00000004 }, + { 0x00022051, 0x00000028 }, + { 0x00000051, 0x00000024 }, + { 0x0800450f, 0x00000004 }, + { 0x0000a04b, 0x00000008 }, + { 0x0000e565, 0x00000004 }, + { 0x0000e566, 0x00000004 }, + { 0x00000052, 0x00000008 }, + { 0x03cca5b4, 0x00000004 }, + { 0x05432000, 0x00000004 }, + { 0x00022000, 0x00000004 }, + { 0x4ccce05e, 0x00000030 }, + { 0x08274565, 0x00000004 }, + { 0x0000005e, 0x00000030 }, + { 0x08004564, 0x00000004 }, + { 0x0000e566, 0x00000004 }, + { 0x00000055, 0x00000008 }, + { 0x00802061, 0x00000010 }, + { 0x00202000, 0x00000004 }, + { 0x001b00ff, 0x00000004 }, + { 0x01000064, 0x00000010 }, + { 0x001f2000, 0x00000004 }, + { 0x001c00ff, 0x00000004 }, + { 0000000000, 0x0000000c }, + { 0x00000072, 0x00000030 }, + { 0x00000055, 0x00000008 }, + { 0x0000e576, 0x00000004 }, + { 0x0000e577, 0x00000004 }, + { 0x0000e50e, 0x00000004 }, + { 0x0000e50f, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00000069, 0x00000018 }, + { 0x00c0e5f9, 0x000000c2 }, + { 0x00000069, 0x00000008 }, + { 0x0014e50e, 0x00000004 }, + { 0x0040e50f, 0x00000004 }, + { 0x00c0006c, 0x00000008 }, + { 0x0000e570, 0x00000004 }, + { 0x0000e571, 0x00000004 }, + { 0x0000e572, 0x0000000c }, + { 0x0000a000, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x0000e568, 0x00000004 }, + { 0x000c2000, 0x00000004 }, + { 0x00000076, 0x00000018 }, + { 0x000b0000, 0x00000004 }, + { 0x18c0e562, 0x00000004 }, + { 0x00000078, 0x00000008 }, + { 0x00c00077, 0x00000008 }, + { 0x000700c7, 0x00000004 }, + { 0x00000080, 0x00000038 }, + { 0x0000e5bb, 0x00000004 }, + { 0x0000e5bc, 0000000000 }, + { 0x0000a000, 0x00000004 }, + { 0x0000e821, 0x00000004 }, + { 0x0000e800, 0000000000 }, + { 0x0000e821, 0x00000004 }, + { 0x0000e82e, 0000000000 }, + { 0x02cca000, 0x00000004 }, + { 0x00140000, 0x00000004 }, + { 0x000ce1cc, 0x00000004 }, + { 0x050de1cd, 0x00000004 }, + { 0x00400000, 0x00000004 }, + { 0x0000008f, 0x00000018 }, + { 0x00c0a000, 0x00000004 }, + { 0x0000008c, 0x00000008 }, + { 0x00000091, 0x00000020 }, + { 0x4200e000, 0000000000 }, + { 0x00000098, 0x00000038 }, + { 0x000ca000, 0x00000004 }, + { 0x00140000, 0x00000004 }, + { 0x000c2000, 0x00000004 }, + { 0x00160000, 0x00000004 }, + { 0x700ce000, 0x00000004 }, + { 0x00140094, 0x00000008 }, + { 0x4000e000, 0000000000 }, + { 0x02400000, 0x00000004 }, + { 0x400ee000, 0x00000004 }, + { 0x02400000, 0x00000004 }, + { 0x4000e000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x0240e51b, 0x00000004 }, + { 0x0080e50a, 0x00000005 }, + { 0x0080e50b, 0x00000005 }, + { 0x00220000, 0x00000004 }, + { 0x000700c7, 0x00000004 }, + { 0x000000a4, 0x00000038 }, + { 0x0080e5bd, 0x00000005 }, + { 0x0000e5bb, 0x00000005 }, + { 0x0080e5bc, 0x00000005 }, + { 0x00210000, 0x00000004 }, + { 0x02800000, 0x00000004 }, + { 0x00c000ab, 0x00000018 }, + { 0x4180e000, 0x00000040 }, + { 0x000000ad, 0x00000024 }, + { 0x01000000, 0x0000000c }, + { 0x0100e51d, 0x0000000c }, + { 0x000045bb, 0x00000004 }, + { 0x000080a7, 0x00000008 }, + { 0x0000f3ce, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c053cf, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x0000f3d2, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c053d3, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x0000f39d, 0x00000004 }, + { 0x0140a000, 0x00000004 }, + { 0x00cc2000, 0x00000004 }, + { 0x08c0539e, 0x00000040 }, + { 0x00008000, 0000000000 }, + { 0x03c00830, 0x00000004 }, + { 0x4200e000, 0000000000 }, + { 0x0000a000, 0x00000004 }, + { 0x200045e0, 0x00000004 }, + { 0x0000e5e1, 0000000000 }, + { 0x00000001, 0000000000 }, + { 0x000700c4, 0x00000004 }, + { 0x0800e394, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x0000e8c4, 0x00000004 }, + { 0x0000e8c5, 0x00000004 }, + { 0x0000e8c6, 0x00000004 }, + { 0x0000e928, 0x00000004 }, + { 0x0000e929, 0x00000004 }, + { 0x0000e92a, 0x00000004 }, + { 0x000000c8, 0x00000008 }, + { 0x0000e928, 0x00000004 }, + { 0x0000e929, 0x00000004 }, + { 0x0000e92a, 0x00000004 }, + { 0x000000cf, 0x00000008 }, + { 0xdeadbeef, 0000000000 }, + { 0x00000116, 0000000000 }, + { 0x000700d3, 0x00000004 }, + { 0x080050e7, 0x00000004 }, + { 0x000700d4, 0x00000004 }, + { 0x0800401c, 0x00000004 }, + { 0x0000e01d, 0000000000 }, + { 0x02c02000, 0x00000004 }, + { 0x00060000, 0x00000004 }, + { 0x000000de, 0x00000034 }, + { 0x000000db, 0x00000008 }, + { 0x00008000, 0x00000004 }, + { 0xc000e000, 0000000000 }, + { 0x0000e1cc, 0x00000004 }, + { 0x0500e1cd, 0x00000004 }, + { 0x000ca000, 0x00000004 }, + { 0x000000e5, 0x00000034 }, + { 0x000000e1, 0x00000008 }, + { 0x0000a000, 0000000000 }, + { 0x0019e1cc, 0x00000004 }, + { 0x001b0001, 0x00000004 }, + { 0x0500a000, 0x00000004 }, + { 0x080041cd, 0x00000004 }, + { 0x000ca000, 0x00000004 }, + { 0x000000fb, 0x00000034 }, + { 0x0000004a, 0x00000008 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0x000c2000, 0x00000004 }, + { 0x001d0018, 0x00000004 }, + { 0x001a0001, 0x00000004 }, + { 0x000000fb, 0x00000034 }, + { 0x0000004a, 0x00000008 }, + { 0x0500a04a, 0x00000008 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, +}; + + +#endif diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c new file mode 100644 index 00000000000..11c146b4921 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -0,0 +1,3203 @@ +/* radeon_state.c -- State support for Radeon -*- linux-c -*- */ +/* + * Copyright 2000 VA Linux Systems, Inc., Fremont, California. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Kevin E. Martin <martin@valinux.com> + */ + +#include "drmP.h" +#include "drm.h" +#include "drm_sarea.h" +#include "radeon_drm.h" +#include "radeon_drv.h" + +/* ================================================================ + * Helper functions for client state checking and fixup + */ + +static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t * + dev_priv, + struct drm_file * file_priv, + u32 *offset) +{ + u64 off = *offset; + u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1; + struct drm_radeon_driver_file_fields *radeon_priv; + + /* Hrm ... the story of the offset ... So this function converts + * the various ideas of what userland clients might have for an + * offset in the card address space into an offset into the card + * address space :) So with a sane client, it should just keep + * the value intact and just do some boundary checking. However, + * not all clients are sane. Some older clients pass us 0 based + * offsets relative to the start of the framebuffer and some may + * assume the AGP aperture it appended to the framebuffer, so we + * try to detect those cases and fix them up. + * + * Note: It might be a good idea here to make sure the offset lands + * in some "allowed" area to protect things like the PCIE GART... + */ + + /* First, the best case, the offset already lands in either the + * framebuffer or the GART mapped space + */ + if (radeon_check_offset(dev_priv, off)) + return 0; + + /* Ok, that didn't happen... now check if we have a zero based + * offset that fits in the framebuffer + gart space, apply the + * magic offset we get from SETPARAM or calculated from fb_location + */ + if (off < (dev_priv->fb_size + dev_priv->gart_size)) { + radeon_priv = file_priv->driver_priv; + off += radeon_priv->radeon_fb_delta; + } + + /* Finally, assume we aimed at a GART offset if beyond the fb */ + if (off > fb_end) + off = off - fb_end - 1 + dev_priv->gart_vm_start; + + /* Now recheck and fail if out of bounds */ + if (radeon_check_offset(dev_priv, off)) { + DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off); + *offset = off; + return 0; + } + return -EINVAL; +} + +static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t * + dev_priv, + struct drm_file *file_priv, + int id, u32 *data) +{ + switch (id) { + + case RADEON_EMIT_PP_MISC: + if (radeon_check_and_fixup_offset(dev_priv, file_priv, + &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) { + DRM_ERROR("Invalid depth buffer offset\n"); + return -EINVAL; + } + break; + + case RADEON_EMIT_PP_CNTL: + if (radeon_check_and_fixup_offset(dev_priv, file_priv, + &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) { + DRM_ERROR("Invalid colour buffer offset\n"); + return -EINVAL; + } + break; + + case R200_EMIT_PP_TXOFFSET_0: + case R200_EMIT_PP_TXOFFSET_1: + case R200_EMIT_PP_TXOFFSET_2: + case R200_EMIT_PP_TXOFFSET_3: + case R200_EMIT_PP_TXOFFSET_4: + case R200_EMIT_PP_TXOFFSET_5: + if (radeon_check_and_fixup_offset(dev_priv, file_priv, + &data[0])) { + DRM_ERROR("Invalid R200 texture offset\n"); + return -EINVAL; + } + break; + + case RADEON_EMIT_PP_TXFILTER_0: + case RADEON_EMIT_PP_TXFILTER_1: + case RADEON_EMIT_PP_TXFILTER_2: + if (radeon_check_and_fixup_offset(dev_priv, file_priv, + &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) { + DRM_ERROR("Invalid R100 texture offset\n"); + return -EINVAL; + } + break; + + case R200_EMIT_PP_CUBIC_OFFSETS_0: + case R200_EMIT_PP_CUBIC_OFFSETS_1: + case R200_EMIT_PP_CUBIC_OFFSETS_2: + case R200_EMIT_PP_CUBIC_OFFSETS_3: + case R200_EMIT_PP_CUBIC_OFFSETS_4: + case R200_EMIT_PP_CUBIC_OFFSETS_5:{ + int i; + for (i = 0; i < 5; i++) { + if (radeon_check_and_fixup_offset(dev_priv, + file_priv, + &data[i])) { + DRM_ERROR + ("Invalid R200 cubic texture offset\n"); + return -EINVAL; + } + } + break; + } + + case RADEON_EMIT_PP_CUBIC_OFFSETS_T0: + case RADEON_EMIT_PP_CUBIC_OFFSETS_T1: + case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{ + int i; + for (i = 0; i < 5; i++) { + if (radeon_check_and_fixup_offset(dev_priv, + file_priv, + &data[i])) { + DRM_ERROR + ("Invalid R100 cubic texture offset\n"); + return -EINVAL; + } + } + } + break; + + case R200_EMIT_VAP_CTL:{ + RING_LOCALS; + BEGIN_RING(2); + OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0); + ADVANCE_RING(); + } + break; + + case RADEON_EMIT_RB3D_COLORPITCH: + case RADEON_EMIT_RE_LINE_PATTERN: + case RADEON_EMIT_SE_LINE_WIDTH: + case RADEON_EMIT_PP_LUM_MATRIX: + case RADEON_EMIT_PP_ROT_MATRIX_0: + case RADEON_EMIT_RB3D_STENCILREFMASK: + case RADEON_EMIT_SE_VPORT_XSCALE: + case RADEON_EMIT_SE_CNTL: + case RADEON_EMIT_SE_CNTL_STATUS: + case RADEON_EMIT_RE_MISC: + case RADEON_EMIT_PP_BORDER_COLOR_0: + case RADEON_EMIT_PP_BORDER_COLOR_1: + case RADEON_EMIT_PP_BORDER_COLOR_2: + case RADEON_EMIT_SE_ZBIAS_FACTOR: + case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT: + case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED: + case R200_EMIT_PP_TXCBLEND_0: + case R200_EMIT_PP_TXCBLEND_1: + case R200_EMIT_PP_TXCBLEND_2: + case R200_EMIT_PP_TXCBLEND_3: + case R200_EMIT_PP_TXCBLEND_4: + case R200_EMIT_PP_TXCBLEND_5: + case R200_EMIT_PP_TXCBLEND_6: + case R200_EMIT_PP_TXCBLEND_7: + case R200_EMIT_TCL_LIGHT_MODEL_CTL_0: + case R200_EMIT_TFACTOR_0: + case R200_EMIT_VTX_FMT_0: + case R200_EMIT_MATRIX_SELECT_0: + case R200_EMIT_TEX_PROC_CTL_2: + case R200_EMIT_TCL_UCP_VERT_BLEND_CTL: + case R200_EMIT_PP_TXFILTER_0: + case R200_EMIT_PP_TXFILTER_1: + case R200_EMIT_PP_TXFILTER_2: + case R200_EMIT_PP_TXFILTER_3: + case R200_EMIT_PP_TXFILTER_4: + case R200_EMIT_PP_TXFILTER_5: + case R200_EMIT_VTE_CNTL: + case R200_EMIT_OUTPUT_VTX_COMP_SEL: + case R200_EMIT_PP_TAM_DEBUG3: + case R200_EMIT_PP_CNTL_X: + case R200_EMIT_RB3D_DEPTHXY_OFFSET: + case R200_EMIT_RE_AUX_SCISSOR_CNTL: + case R200_EMIT_RE_SCISSOR_TL_0: + case R200_EMIT_RE_SCISSOR_TL_1: + case R200_EMIT_RE_SCISSOR_TL_2: + case R200_EMIT_SE_VAP_CNTL_STATUS: + case R200_EMIT_SE_VTX_STATE_CNTL: + case R200_EMIT_RE_POINTSIZE: + case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0: + case R200_EMIT_PP_CUBIC_FACES_0: + case R200_EMIT_PP_CUBIC_FACES_1: + case R200_EMIT_PP_CUBIC_FACES_2: + case R200_EMIT_PP_CUBIC_FACES_3: + case R200_EMIT_PP_CUBIC_FACES_4: + case R200_EMIT_PP_CUBIC_FACES_5: + case RADEON_EMIT_PP_TEX_SIZE_0: + case RADEON_EMIT_PP_TEX_SIZE_1: + case RADEON_EMIT_PP_TEX_SIZE_2: + case R200_EMIT_RB3D_BLENDCOLOR: + case R200_EMIT_TCL_POINT_SPRITE_CNTL: + case RADEON_EMIT_PP_CUBIC_FACES_0: + case RADEON_EMIT_PP_CUBIC_FACES_1: + case RADEON_EMIT_PP_CUBIC_FACES_2: + case R200_EMIT_PP_TRI_PERF_CNTL: + case R200_EMIT_PP_AFS_0: + case R200_EMIT_PP_AFS_1: + case R200_EMIT_ATF_TFACTOR: + case R200_EMIT_PP_TXCTLALL_0: + case R200_EMIT_PP_TXCTLALL_1: + case R200_EMIT_PP_TXCTLALL_2: + case R200_EMIT_PP_TXCTLALL_3: + case R200_EMIT_PP_TXCTLALL_4: + case R200_EMIT_PP_TXCTLALL_5: + case R200_EMIT_VAP_PVS_CNTL: + /* These packets don't contain memory offsets */ + break; + + default: + DRM_ERROR("Unknown state packet ID %d\n", id); + return -EINVAL; + } + + return 0; +} + +static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t * + dev_priv, + struct drm_file *file_priv, + drm_radeon_kcmd_buffer_t * + cmdbuf, + unsigned int *cmdsz) +{ + u32 *cmd = (u32 *) cmdbuf->buf; + u32 offset, narrays; + int count, i, k; + + *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16); + + if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) { + DRM_ERROR("Not a type 3 packet\n"); + return -EINVAL; + } + + if (4 * *cmdsz > cmdbuf->bufsz) { + DRM_ERROR("Packet size larger than size of data provided\n"); + return -EINVAL; + } + + switch(cmd[0] & 0xff00) { + /* XXX Are there old drivers needing other packets? */ + + case RADEON_3D_DRAW_IMMD: + case RADEON_3D_DRAW_VBUF: + case RADEON_3D_DRAW_INDX: + case RADEON_WAIT_FOR_IDLE: + case RADEON_CP_NOP: + case RADEON_3D_CLEAR_ZMASK: +/* case RADEON_CP_NEXT_CHAR: + case RADEON_CP_PLY_NEXTSCAN: + case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */ + /* these packets are safe */ + break; + + case RADEON_CP_3D_DRAW_IMMD_2: + case RADEON_CP_3D_DRAW_VBUF_2: + case RADEON_CP_3D_DRAW_INDX_2: + case RADEON_3D_CLEAR_HIZ: + /* safe but r200 only */ + if (dev_priv->microcode_version != UCODE_R200) { + DRM_ERROR("Invalid 3d packet for r100-class chip\n"); + return -EINVAL; + } + break; + + case RADEON_3D_LOAD_VBPNTR: + count = (cmd[0] >> 16) & 0x3fff; + + if (count > 18) { /* 12 arrays max */ + DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n", + count); + return -EINVAL; + } + + /* carefully check packet contents */ + narrays = cmd[1] & ~0xc000; + k = 0; + i = 2; + while ((k < narrays) && (i < (count + 2))) { + i++; /* skip attribute field */ + if (radeon_check_and_fixup_offset(dev_priv, file_priv, + &cmd[i])) { + DRM_ERROR + ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n", + k, i); + return -EINVAL; + } + k++; + i++; + if (k == narrays) + break; + /* have one more to process, they come in pairs */ + if (radeon_check_and_fixup_offset(dev_priv, + file_priv, &cmd[i])) + { + DRM_ERROR + ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n", + k, i); + return -EINVAL; + } + k++; + i++; + } + /* do the counts match what we expect ? */ + if ((k != narrays) || (i != (count + 2))) { + DRM_ERROR + ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n", + k, i, narrays, count + 1); + return -EINVAL; + } + break; + + case RADEON_3D_RNDR_GEN_INDX_PRIM: + if (dev_priv->microcode_version != UCODE_R100) { + DRM_ERROR("Invalid 3d packet for r200-class chip\n"); + return -EINVAL; + } + if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) { + DRM_ERROR("Invalid rndr_gen_indx offset\n"); + return -EINVAL; + } + break; + + case RADEON_CP_INDX_BUFFER: + if (dev_priv->microcode_version != UCODE_R200) { + DRM_ERROR("Invalid 3d packet for r100-class chip\n"); + return -EINVAL; + } + if ((cmd[1] & 0x8000ffff) != 0x80000810) { + DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]); + return -EINVAL; + } + if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) { + DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]); + return -EINVAL; + } + break; + + case RADEON_CNTL_HOSTDATA_BLT: + case RADEON_CNTL_PAINT_MULTI: + case RADEON_CNTL_BITBLT_MULTI: + /* MSB of opcode: next DWORD GUI_CNTL */ + if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL + | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) { + offset = cmd[2] << 10; + if (radeon_check_and_fixup_offset + (dev_priv, file_priv, &offset)) { + DRM_ERROR("Invalid first packet offset\n"); + return -EINVAL; + } + cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10; + } + + if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) && + (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) { + offset = cmd[3] << 10; + if (radeon_check_and_fixup_offset + (dev_priv, file_priv, &offset)) { + DRM_ERROR("Invalid second packet offset\n"); + return -EINVAL; + } + cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10; + } + break; + + default: + DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00); + return -EINVAL; + } + + return 0; +} + +/* ================================================================ + * CP hardware state programming functions + */ + +static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv, + struct drm_clip_rect * box) +{ + RING_LOCALS; + + DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n", + box->x1, box->y1, box->x2, box->y2); + + BEGIN_RING(4); + OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0)); + OUT_RING((box->y1 << 16) | box->x1); + OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0)); + OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1)); + ADVANCE_RING(); +} + +/* Emit 1.1 state + */ +static int radeon_emit_state(drm_radeon_private_t * dev_priv, + struct drm_file *file_priv, + drm_radeon_context_regs_t * ctx, + drm_radeon_texture_regs_t * tex, + unsigned int dirty) +{ + RING_LOCALS; + DRM_DEBUG("dirty=0x%08x\n", dirty); + + if (dirty & RADEON_UPLOAD_CONTEXT) { + if (radeon_check_and_fixup_offset(dev_priv, file_priv, + &ctx->rb3d_depthoffset)) { + DRM_ERROR("Invalid depth buffer offset\n"); + return -EINVAL; + } + + if (radeon_check_and_fixup_offset(dev_priv, file_priv, + &ctx->rb3d_coloroffset)) { + DRM_ERROR("Invalid depth buffer offset\n"); + return -EINVAL; + } + + BEGIN_RING(14); + OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6)); + OUT_RING(ctx->pp_misc); + OUT_RING(ctx->pp_fog_color); + OUT_RING(ctx->re_solid_color); + OUT_RING(ctx->rb3d_blendcntl); + OUT_RING(ctx->rb3d_depthoffset); + OUT_RING(ctx->rb3d_depthpitch); + OUT_RING(ctx->rb3d_zstencilcntl); + OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2)); + OUT_RING(ctx->pp_cntl); + OUT_RING(ctx->rb3d_cntl); + OUT_RING(ctx->rb3d_coloroffset); + OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); + OUT_RING(ctx->rb3d_colorpitch); + ADVANCE_RING(); + } + + if (dirty & RADEON_UPLOAD_VERTFMT) { + BEGIN_RING(2); + OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0)); + OUT_RING(ctx->se_coord_fmt); + ADVANCE_RING(); + } + + if (dirty & RADEON_UPLOAD_LINE) { + BEGIN_RING(5); + OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1)); + OUT_RING(ctx->re_line_pattern); + OUT_RING(ctx->re_line_state); + OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0)); + OUT_RING(ctx->se_line_width); + ADVANCE_RING(); + } + + if (dirty & RADEON_UPLOAD_BUMPMAP) { + BEGIN_RING(5); + OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0)); + OUT_RING(ctx->pp_lum_matrix); + OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1)); + OUT_RING(ctx->pp_rot_matrix_0); + OUT_RING(ctx->pp_rot_matrix_1); + ADVANCE_RING(); + } + + if (dirty & RADEON_UPLOAD_MASKS) { + BEGIN_RING(4); + OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2)); + OUT_RING(ctx->rb3d_stencilrefmask); + OUT_RING(ctx->rb3d_ropcntl); + OUT_RING(ctx->rb3d_planemask); + ADVANCE_RING(); + } + + if (dirty & RADEON_UPLOAD_VIEWPORT) { + BEGIN_RING(7); + OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5)); + OUT_RING(ctx->se_vport_xscale); + OUT_RING(ctx->se_vport_xoffset); + OUT_RING(ctx->se_vport_yscale); + OUT_RING(ctx->se_vport_yoffset); + OUT_RING(ctx->se_vport_zscale); + OUT_RING(ctx->se_vport_zoffset); + ADVANCE_RING(); + } + + if (dirty & RADEON_UPLOAD_SETUP) { + BEGIN_RING(4); + OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0)); + OUT_RING(ctx->se_cntl); + OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0)); + OUT_RING(ctx->se_cntl_status); + ADVANCE_RING(); + } + + if (dirty & RADEON_UPLOAD_MISC) { + BEGIN_RING(2); + OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0)); + OUT_RING(ctx->re_misc); + ADVANCE_RING(); + } + + if (dirty & RADEON_UPLOAD_TEX0) { + if (radeon_check_and_fixup_offset(dev_priv, file_priv, + &tex[0].pp_txoffset)) { + DRM_ERROR("Invalid texture offset for unit 0\n"); + return -EINVAL; + } + + BEGIN_RING(9); + OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5)); + OUT_RING(tex[0].pp_txfilter); + OUT_RING(tex[0].pp_txformat); + OUT_RING(tex[0].pp_txoffset); + OUT_RING(tex[0].pp_txcblend); + OUT_RING(tex[0].pp_txablend); + OUT_RING(tex[0].pp_tfactor); + OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0)); + OUT_RING(tex[0].pp_border_color); + ADVANCE_RING(); + } + + if (dirty & RADEON_UPLOAD_TEX1) { + if (radeon_check_and_fixup_offset(dev_priv, file_priv, + &tex[1].pp_txoffset)) { + DRM_ERROR("Invalid texture offset for unit 1\n"); + return -EINVAL; + } + + BEGIN_RING(9); + OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5)); + OUT_RING(tex[1].pp_txfilter); + OUT_RING(tex[1].pp_txformat); + OUT_RING(tex[1].pp_txoffset); + OUT_RING(tex[1].pp_txcblend); + OUT_RING(tex[1].pp_txablend); + OUT_RING(tex[1].pp_tfactor); + OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0)); + OUT_RING(tex[1].pp_border_color); + ADVANCE_RING(); + } + + if (dirty & RADEON_UPLOAD_TEX2) { + if (radeon_check_and_fixup_offset(dev_priv, file_priv, + &tex[2].pp_txoffset)) { + DRM_ERROR("Invalid texture offset for unit 2\n"); + return -EINVAL; + } + + BEGIN_RING(9); + OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5)); + OUT_RING(tex[2].pp_txfilter); + OUT_RING(tex[2].pp_txformat); + OUT_RING(tex[2].pp_txoffset); + OUT_RING(tex[2].pp_txcblend); + OUT_RING(tex[2].pp_txablend); + OUT_RING(tex[2].pp_tfactor); + OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0)); + OUT_RING(tex[2].pp_border_color); + ADVANCE_RING(); + } + + return 0; +} + +/* Emit 1.2 state + */ +static int radeon_emit_state2(drm_radeon_private_t * dev_priv, + struct drm_file *file_priv, + drm_radeon_state_t * state) +{ + RING_LOCALS; + + if (state->dirty & RADEON_UPLOAD_ZBIAS) { + BEGIN_RING(3); + OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1)); + OUT_RING(state->context2.se_zbias_factor); + OUT_RING(state->context2.se_zbias_constant); + ADVANCE_RING(); + } + + return radeon_emit_state(dev_priv, file_priv, &state->context, + state->tex, state->dirty); +} + +/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in + * 1.3 cmdbuffers allow all previous state to be updated as well as + * the tcl scalar and vector areas. + */ +static struct { + int start; + int len; + const char *name; +} packet[RADEON_MAX_STATE_PACKETS] = { + {RADEON_PP_MISC, 7, "RADEON_PP_MISC"}, + {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"}, + {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"}, + {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"}, + {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"}, + {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"}, + {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"}, + {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"}, + {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"}, + {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"}, + {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"}, + {RADEON_RE_MISC, 1, "RADEON_RE_MISC"}, + {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"}, + {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"}, + {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"}, + {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"}, + {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"}, + {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"}, + {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"}, + {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"}, + {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17, + "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"}, + {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"}, + {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"}, + {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"}, + {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"}, + {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"}, + {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"}, + {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"}, + {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"}, + {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"}, + {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"}, + {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"}, + {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"}, + {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"}, + {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"}, + {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"}, + {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"}, + {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"}, + {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"}, + {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"}, + {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"}, + {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"}, + {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"}, + {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"}, + {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"}, + {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"}, + {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"}, + {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"}, + {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"}, + {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, + "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"}, + {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"}, + {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"}, + {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"}, + {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"}, + {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"}, + {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"}, + {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"}, + {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"}, + {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"}, + {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"}, + {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, + "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"}, + {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */ + {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */ + {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"}, + {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"}, + {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"}, + {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"}, + {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"}, + {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"}, + {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"}, + {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"}, + {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"}, + {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"}, + {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, + {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, + {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, + {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, + {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"}, + {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"}, + {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"}, + {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"}, + {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"}, + {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"}, + {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"}, + {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"}, + {R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */ + {R200_PP_AFS_1, 32, "R200_PP_AFS_1"}, + {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"}, + {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"}, + {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"}, + {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"}, + {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"}, + {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"}, + {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"}, + {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"}, +}; + +/* ================================================================ + * Performance monitoring functions + */ + +static void radeon_clear_box(drm_radeon_private_t * dev_priv, + int x, int y, int w, int h, int r, int g, int b) +{ + u32 color; + RING_LOCALS; + + x += dev_priv->sarea_priv->boxes[0].x1; + y += dev_priv->sarea_priv->boxes[0].y1; + + switch (dev_priv->color_fmt) { + case RADEON_COLOR_FORMAT_RGB565: + color = (((r & 0xf8) << 8) | + ((g & 0xfc) << 3) | ((b & 0xf8) >> 3)); + break; + case RADEON_COLOR_FORMAT_ARGB8888: + default: + color = (((0xff) << 24) | (r << 16) | (g << 8) | b); + break; + } + + BEGIN_RING(4); + RADEON_WAIT_UNTIL_3D_IDLE(); + OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0)); + OUT_RING(0xffffffff); + ADVANCE_RING(); + + BEGIN_RING(6); + + OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4)); + OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_SOLID_COLOR | + (dev_priv->color_fmt << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS); + + if (dev_priv->sarea_priv->pfCurrentPage == 1) { + OUT_RING(dev_priv->front_pitch_offset); + } else { + OUT_RING(dev_priv->back_pitch_offset); + } + + OUT_RING(color); + + OUT_RING((x << 16) | y); + OUT_RING((w << 16) | h); + + ADVANCE_RING(); +} + +static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv) +{ + /* Collapse various things into a wait flag -- trying to + * guess if userspase slept -- better just to have them tell us. + */ + if (dev_priv->stats.last_frame_reads > 1 || + dev_priv->stats.last_clear_reads > dev_priv->stats.clears) { + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + } + + if (dev_priv->stats.freelist_loops) { + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + } + + /* Purple box for page flipping + */ + if (dev_priv->stats.boxes & RADEON_BOX_FLIP) + radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255); + + /* Red box if we have to wait for idle at any point + */ + if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE) + radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0); + + /* Blue box: lost context? + */ + + /* Yellow box for texture swaps + */ + if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD) + radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0); + + /* Green box if hardware never idles (as far as we can tell) + */ + if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE)) + radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0); + + /* Draw bars indicating number of buffers allocated + * (not a great measure, easily confused) + */ + if (dev_priv->stats.requested_bufs) { + if (dev_priv->stats.requested_bufs > 100) + dev_priv->stats.requested_bufs = 100; + + radeon_clear_box(dev_priv, 4, 16, + dev_priv->stats.requested_bufs, 4, + 196, 128, 128); + } + + memset(&dev_priv->stats, 0, sizeof(dev_priv->stats)); + +} + +/* ================================================================ + * CP command dispatch functions + */ + +static void radeon_cp_dispatch_clear(struct drm_device * dev, + drm_radeon_clear_t * clear, + drm_radeon_clear_rect_t * depth_boxes) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; + drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear; + int nbox = sarea_priv->nbox; + struct drm_clip_rect *pbox = sarea_priv->boxes; + unsigned int flags = clear->flags; + u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0; + int i; + RING_LOCALS; + DRM_DEBUG("flags = 0x%x\n", flags); + + dev_priv->stats.clears++; + + if (dev_priv->sarea_priv->pfCurrentPage == 1) { + unsigned int tmp = flags; + + flags &= ~(RADEON_FRONT | RADEON_BACK); + if (tmp & RADEON_FRONT) + flags |= RADEON_BACK; + if (tmp & RADEON_BACK) + flags |= RADEON_FRONT; + } + + if (flags & (RADEON_FRONT | RADEON_BACK)) { + + BEGIN_RING(4); + + /* Ensure the 3D stream is idle before doing a + * 2D fill to clear the front or back buffer. + */ + RADEON_WAIT_UNTIL_3D_IDLE(); + + OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0)); + OUT_RING(clear->color_mask); + + ADVANCE_RING(); + + /* Make sure we restore the 3D state next time. + */ + dev_priv->sarea_priv->ctx_owner = 0; + + for (i = 0; i < nbox; i++) { + int x = pbox[i].x1; + int y = pbox[i].y1; + int w = pbox[i].x2 - x; + int h = pbox[i].y2 - y; + + DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n", + x, y, w, h, flags); + + if (flags & RADEON_FRONT) { + BEGIN_RING(6); + + OUT_RING(CP_PACKET3 + (RADEON_CNTL_PAINT_MULTI, 4)); + OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_SOLID_COLOR | + (dev_priv-> + color_fmt << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_P | + RADEON_GMC_CLR_CMP_CNTL_DIS); + + OUT_RING(dev_priv->front_pitch_offset); + OUT_RING(clear->clear_color); + + OUT_RING((x << 16) | y); + OUT_RING((w << 16) | h); + + ADVANCE_RING(); + } + + if (flags & RADEON_BACK) { + BEGIN_RING(6); + + OUT_RING(CP_PACKET3 + (RADEON_CNTL_PAINT_MULTI, 4)); + OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_SOLID_COLOR | + (dev_priv-> + color_fmt << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_P | + RADEON_GMC_CLR_CMP_CNTL_DIS); + + OUT_RING(dev_priv->back_pitch_offset); + OUT_RING(clear->clear_color); + + OUT_RING((x << 16) | y); + OUT_RING((w << 16) | h); + + ADVANCE_RING(); + } + } + } + + /* hyper z clear */ + /* no docs available, based on reverse engeneering by Stephane Marchesin */ + if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) + && (flags & RADEON_CLEAR_FASTZ)) { + + int i; + int depthpixperline = + dev_priv->depth_fmt == + RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch / + 2) : (dev_priv-> + depth_pitch / 4); + + u32 clearmask; + + u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth | + ((clear->depth_mask & 0xff) << 24); + + /* Make sure we restore the 3D state next time. + * we haven't touched any "normal" state - still need this? + */ + dev_priv->sarea_priv->ctx_owner = 0; + + if ((dev_priv->flags & RADEON_HAS_HIERZ) + && (flags & RADEON_USE_HIERZ)) { + /* FIXME : reverse engineer that for Rx00 cards */ + /* FIXME : the mask supposedly contains low-res z values. So can't set + just to the max (0xff? or actually 0x3fff?), need to take z clear + value into account? */ + /* pattern seems to work for r100, though get slight + rendering errors with glxgears. If hierz is not enabled for r100, + only 4 bits which indicate clear (15,16,31,32, all zero) matter, the + other ones are ignored, and the same clear mask can be used. That's + very different behaviour than R200 which needs different clear mask + and different number of tiles to clear if hierz is enabled or not !?! + */ + clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f; + } else { + /* clear mask : chooses the clearing pattern. + rv250: could be used to clear only parts of macrotiles + (but that would get really complicated...)? + bit 0 and 1 (either or both of them ?!?!) are used to + not clear tile (or maybe one of the bits indicates if the tile is + compressed or not), bit 2 and 3 to not clear tile 1,...,. + Pattern is as follows: + | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29| + bits ------------------------------------------------- + | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31| + rv100: clearmask covers 2x8 4x1 tiles, but one clear still + covers 256 pixels ?!? + */ + clearmask = 0x0; + } + + BEGIN_RING(8); + RADEON_WAIT_UNTIL_2D_IDLE(); + OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE, + tempRB3D_DEPTHCLEARVALUE); + /* what offset is this exactly ? */ + OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0); + /* need ctlstat, otherwise get some strange black flickering */ + OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT, + RADEON_RB3D_ZC_FLUSH_ALL); + ADVANCE_RING(); + + for (i = 0; i < nbox; i++) { + int tileoffset, nrtilesx, nrtilesy, j; + /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */ + if ((dev_priv->flags & RADEON_HAS_HIERZ) + && !(dev_priv->microcode_version == UCODE_R200)) { + /* FIXME : figure this out for r200 (when hierz is enabled). Or + maybe r200 actually doesn't need to put the low-res z value into + the tile cache like r100, but just needs to clear the hi-level z-buffer? + Works for R100, both with hierz and without. + R100 seems to operate on 2x1 8x8 tiles, but... + odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially + problematic with resolutions which are not 64 pix aligned? */ + tileoffset = + ((pbox[i].y1 >> 3) * depthpixperline + + pbox[i].x1) >> 6; + nrtilesx = + ((pbox[i].x2 & ~63) - + (pbox[i].x1 & ~63)) >> 4; + nrtilesy = + (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3); + for (j = 0; j <= nrtilesy; j++) { + BEGIN_RING(4); + OUT_RING(CP_PACKET3 + (RADEON_3D_CLEAR_ZMASK, 2)); + /* first tile */ + OUT_RING(tileoffset * 8); + /* the number of tiles to clear */ + OUT_RING(nrtilesx + 4); + /* clear mask : chooses the clearing pattern. */ + OUT_RING(clearmask); + ADVANCE_RING(); + tileoffset += depthpixperline >> 6; + } + } else if (dev_priv->microcode_version == UCODE_R200) { + /* works for rv250. */ + /* find first macro tile (8x2 4x4 z-pixels on rv250) */ + tileoffset = + ((pbox[i].y1 >> 3) * depthpixperline + + pbox[i].x1) >> 5; + nrtilesx = + (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5); + nrtilesy = + (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3); + for (j = 0; j <= nrtilesy; j++) { + BEGIN_RING(4); + OUT_RING(CP_PACKET3 + (RADEON_3D_CLEAR_ZMASK, 2)); + /* first tile */ + /* judging by the first tile offset needed, could possibly + directly address/clear 4x4 tiles instead of 8x2 * 4x4 + macro tiles, though would still need clear mask for + right/bottom if truely 4x4 granularity is desired ? */ + OUT_RING(tileoffset * 16); + /* the number of tiles to clear */ + OUT_RING(nrtilesx + 1); + /* clear mask : chooses the clearing pattern. */ + OUT_RING(clearmask); + ADVANCE_RING(); + tileoffset += depthpixperline >> 5; + } + } else { /* rv 100 */ + /* rv100 might not need 64 pix alignment, who knows */ + /* offsets are, hmm, weird */ + tileoffset = + ((pbox[i].y1 >> 4) * depthpixperline + + pbox[i].x1) >> 6; + nrtilesx = + ((pbox[i].x2 & ~63) - + (pbox[i].x1 & ~63)) >> 4; + nrtilesy = + (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4); + for (j = 0; j <= nrtilesy; j++) { + BEGIN_RING(4); + OUT_RING(CP_PACKET3 + (RADEON_3D_CLEAR_ZMASK, 2)); + OUT_RING(tileoffset * 128); + /* the number of tiles to clear */ + OUT_RING(nrtilesx + 4); + /* clear mask : chooses the clearing pattern. */ + OUT_RING(clearmask); + ADVANCE_RING(); + tileoffset += depthpixperline >> 6; + } + } + } + + /* TODO don't always clear all hi-level z tiles */ + if ((dev_priv->flags & RADEON_HAS_HIERZ) + && (dev_priv->microcode_version == UCODE_R200) + && (flags & RADEON_USE_HIERZ)) + /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */ + /* FIXME : the mask supposedly contains low-res z values. So can't set + just to the max (0xff? or actually 0x3fff?), need to take z clear + value into account? */ + { + BEGIN_RING(4); + OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2)); + OUT_RING(0x0); /* First tile */ + OUT_RING(0x3cc0); + OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f); + ADVANCE_RING(); + } + } + + /* We have to clear the depth and/or stencil buffers by + * rendering a quad into just those buffers. Thus, we have to + * make sure the 3D engine is configured correctly. + */ + else if ((dev_priv->microcode_version == UCODE_R200) && + (flags & (RADEON_DEPTH | RADEON_STENCIL))) { + + int tempPP_CNTL; + int tempRE_CNTL; + int tempRB3D_CNTL; + int tempRB3D_ZSTENCILCNTL; + int tempRB3D_STENCILREFMASK; + int tempRB3D_PLANEMASK; + int tempSE_CNTL; + int tempSE_VTE_CNTL; + int tempSE_VTX_FMT_0; + int tempSE_VTX_FMT_1; + int tempSE_VAP_CNTL; + int tempRE_AUX_SCISSOR_CNTL; + + tempPP_CNTL = 0; + tempRE_CNTL = 0; + + tempRB3D_CNTL = depth_clear->rb3d_cntl; + + tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl; + tempRB3D_STENCILREFMASK = 0x0; + + tempSE_CNTL = depth_clear->se_cntl; + + /* Disable TCL */ + + tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */ + (0x9 << + SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT)); + + tempRB3D_PLANEMASK = 0x0; + + tempRE_AUX_SCISSOR_CNTL = 0x0; + + tempSE_VTE_CNTL = + SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK; + + /* Vertex format (X, Y, Z, W) */ + tempSE_VTX_FMT_0 = + SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK | + SE_VTX_FMT_0__VTX_W0_PRESENT_MASK; + tempSE_VTX_FMT_1 = 0x0; + + /* + * Depth buffer specific enables + */ + if (flags & RADEON_DEPTH) { + /* Enable depth buffer */ + tempRB3D_CNTL |= RADEON_Z_ENABLE; + } else { + /* Disable depth buffer */ + tempRB3D_CNTL &= ~RADEON_Z_ENABLE; + } + + /* + * Stencil buffer specific enables + */ + if (flags & RADEON_STENCIL) { + tempRB3D_CNTL |= RADEON_STENCIL_ENABLE; + tempRB3D_STENCILREFMASK = clear->depth_mask; + } else { + tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE; + tempRB3D_STENCILREFMASK = 0x00000000; + } + + if (flags & RADEON_USE_COMP_ZBUF) { + tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE | + RADEON_Z_DECOMPRESSION_ENABLE; + } + if (flags & RADEON_USE_HIERZ) { + tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE; + } + + BEGIN_RING(26); + RADEON_WAIT_UNTIL_2D_IDLE(); + + OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL); + OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL); + OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL); + OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL); + OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, + tempRB3D_STENCILREFMASK); + OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK); + OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL); + OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL); + OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0); + OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1); + OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL); + OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL); + ADVANCE_RING(); + + /* Make sure we restore the 3D state next time. + */ + dev_priv->sarea_priv->ctx_owner = 0; + + for (i = 0; i < nbox; i++) { + + /* Funny that this should be required -- + * sets top-left? + */ + radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]); + + BEGIN_RING(14); + OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12)); + OUT_RING((RADEON_PRIM_TYPE_RECT_LIST | + RADEON_PRIM_WALK_RING | + (3 << RADEON_NUM_VERTICES_SHIFT))); + OUT_RING(depth_boxes[i].ui[CLEAR_X1]); + OUT_RING(depth_boxes[i].ui[CLEAR_Y1]); + OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]); + OUT_RING(0x3f800000); + OUT_RING(depth_boxes[i].ui[CLEAR_X1]); + OUT_RING(depth_boxes[i].ui[CLEAR_Y2]); + OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]); + OUT_RING(0x3f800000); + OUT_RING(depth_boxes[i].ui[CLEAR_X2]); + OUT_RING(depth_boxes[i].ui[CLEAR_Y2]); + OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]); + OUT_RING(0x3f800000); + ADVANCE_RING(); + } + } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) { + + int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl; + + rb3d_cntl = depth_clear->rb3d_cntl; + + if (flags & RADEON_DEPTH) { + rb3d_cntl |= RADEON_Z_ENABLE; + } else { + rb3d_cntl &= ~RADEON_Z_ENABLE; + } + + if (flags & RADEON_STENCIL) { + rb3d_cntl |= RADEON_STENCIL_ENABLE; + rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */ + } else { + rb3d_cntl &= ~RADEON_STENCIL_ENABLE; + rb3d_stencilrefmask = 0x00000000; + } + + if (flags & RADEON_USE_COMP_ZBUF) { + tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE | + RADEON_Z_DECOMPRESSION_ENABLE; + } + if (flags & RADEON_USE_HIERZ) { + tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE; + } + + BEGIN_RING(13); + RADEON_WAIT_UNTIL_2D_IDLE(); + + OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1)); + OUT_RING(0x00000000); + OUT_RING(rb3d_cntl); + + OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL); + OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask); + OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000); + OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl); + ADVANCE_RING(); + + /* Make sure we restore the 3D state next time. + */ + dev_priv->sarea_priv->ctx_owner = 0; + + for (i = 0; i < nbox; i++) { + + /* Funny that this should be required -- + * sets top-left? + */ + radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]); + + BEGIN_RING(15); + + OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13)); + OUT_RING(RADEON_VTX_Z_PRESENT | + RADEON_VTX_PKCOLOR_PRESENT); + OUT_RING((RADEON_PRIM_TYPE_RECT_LIST | + RADEON_PRIM_WALK_RING | + RADEON_MAOS_ENABLE | + RADEON_VTX_FMT_RADEON_MODE | + (3 << RADEON_NUM_VERTICES_SHIFT))); + + OUT_RING(depth_boxes[i].ui[CLEAR_X1]); + OUT_RING(depth_boxes[i].ui[CLEAR_Y1]); + OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]); + OUT_RING(0x0); + + OUT_RING(depth_boxes[i].ui[CLEAR_X1]); + OUT_RING(depth_boxes[i].ui[CLEAR_Y2]); + OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]); + OUT_RING(0x0); + + OUT_RING(depth_boxes[i].ui[CLEAR_X2]); + OUT_RING(depth_boxes[i].ui[CLEAR_Y2]); + OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]); + OUT_RING(0x0); + + ADVANCE_RING(); + } + } + + /* Increment the clear counter. The client-side 3D driver must + * wait on this value before performing the clear ioctl. We + * need this because the card's so damned fast... + */ + dev_priv->sarea_priv->last_clear++; + + BEGIN_RING(4); + + RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear); + RADEON_WAIT_UNTIL_IDLE(); + + ADVANCE_RING(); +} + +static void radeon_cp_dispatch_swap(struct drm_device * dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; + int nbox = sarea_priv->nbox; + struct drm_clip_rect *pbox = sarea_priv->boxes; + int i; + RING_LOCALS; + DRM_DEBUG("\n"); + + /* Do some trivial performance monitoring... + */ + if (dev_priv->do_boxes) + radeon_cp_performance_boxes(dev_priv); + + /* Wait for the 3D stream to idle before dispatching the bitblt. + * This will prevent data corruption between the two streams. + */ + BEGIN_RING(2); + + RADEON_WAIT_UNTIL_3D_IDLE(); + + ADVANCE_RING(); + + for (i = 0; i < nbox; i++) { + int x = pbox[i].x1; + int y = pbox[i].y1; + int w = pbox[i].x2 - x; + int h = pbox[i].y2 - y; + + DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h); + + BEGIN_RING(9); + + OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0)); + OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL | + RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_NONE | + (dev_priv->color_fmt << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_S | + RADEON_DP_SRC_SOURCE_MEMORY | + RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); + + /* Make this work even if front & back are flipped: + */ + OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1)); + if (dev_priv->sarea_priv->pfCurrentPage == 0) { + OUT_RING(dev_priv->back_pitch_offset); + OUT_RING(dev_priv->front_pitch_offset); + } else { + OUT_RING(dev_priv->front_pitch_offset); + OUT_RING(dev_priv->back_pitch_offset); + } + + OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2)); + OUT_RING((x << 16) | y); + OUT_RING((x << 16) | y); + OUT_RING((w << 16) | h); + + ADVANCE_RING(); + } + + /* Increment the frame counter. The client-side 3D driver must + * throttle the framerate by waiting for this value before + * performing the swapbuffer ioctl. + */ + dev_priv->sarea_priv->last_frame++; + + BEGIN_RING(4); + + RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame); + RADEON_WAIT_UNTIL_2D_IDLE(); + + ADVANCE_RING(); +} + +static void radeon_cp_dispatch_flip(struct drm_device * dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_sarea *sarea = (struct drm_sarea *) dev_priv->sarea->handle; + int offset = (dev_priv->sarea_priv->pfCurrentPage == 1) + ? dev_priv->front_offset : dev_priv->back_offset; + RING_LOCALS; + DRM_DEBUG("pfCurrentPage=%d\n", + dev_priv->sarea_priv->pfCurrentPage); + + /* Do some trivial performance monitoring... + */ + if (dev_priv->do_boxes) { + dev_priv->stats.boxes |= RADEON_BOX_FLIP; + radeon_cp_performance_boxes(dev_priv); + } + + /* Update the frame offsets for both CRTCs + */ + BEGIN_RING(6); + + RADEON_WAIT_UNTIL_3D_IDLE(); + OUT_RING_REG(RADEON_CRTC_OFFSET, + ((sarea->frame.y * dev_priv->front_pitch + + sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7) + + offset); + OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base + + offset); + + ADVANCE_RING(); + + /* Increment the frame counter. The client-side 3D driver must + * throttle the framerate by waiting for this value before + * performing the swapbuffer ioctl. + */ + dev_priv->sarea_priv->last_frame++; + dev_priv->sarea_priv->pfCurrentPage = + 1 - dev_priv->sarea_priv->pfCurrentPage; + + BEGIN_RING(2); + + RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame); + + ADVANCE_RING(); +} + +static int bad_prim_vertex_nr(int primitive, int nr) +{ + switch (primitive & RADEON_PRIM_TYPE_MASK) { + case RADEON_PRIM_TYPE_NONE: + case RADEON_PRIM_TYPE_POINT: + return nr < 1; + case RADEON_PRIM_TYPE_LINE: + return (nr & 1) || nr == 0; + case RADEON_PRIM_TYPE_LINE_STRIP: + return nr < 2; + case RADEON_PRIM_TYPE_TRI_LIST: + case RADEON_PRIM_TYPE_3VRT_POINT_LIST: + case RADEON_PRIM_TYPE_3VRT_LINE_LIST: + case RADEON_PRIM_TYPE_RECT_LIST: + return nr % 3 || nr == 0; + case RADEON_PRIM_TYPE_TRI_FAN: + case RADEON_PRIM_TYPE_TRI_STRIP: + return nr < 3; + default: + return 1; + } +} + +typedef struct { + unsigned int start; + unsigned int finish; + unsigned int prim; + unsigned int numverts; + unsigned int offset; + unsigned int vc_format; +} drm_radeon_tcl_prim_t; + +static void radeon_cp_dispatch_vertex(struct drm_device * dev, + struct drm_buf * buf, + drm_radeon_tcl_prim_t * prim) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; + int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start; + int numverts = (int)prim->numverts; + int nbox = sarea_priv->nbox; + int i = 0; + RING_LOCALS; + + DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n", + prim->prim, + prim->vc_format, prim->start, prim->finish, prim->numverts); + + if (bad_prim_vertex_nr(prim->prim, prim->numverts)) { + DRM_ERROR("bad prim %x numverts %d\n", + prim->prim, prim->numverts); + return; + } + + do { + /* Emit the next cliprect */ + if (i < nbox) { + radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]); + } + + /* Emit the vertex buffer rendering commands */ + BEGIN_RING(5); + + OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3)); + OUT_RING(offset); + OUT_RING(numverts); + OUT_RING(prim->vc_format); + OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST | + RADEON_COLOR_ORDER_RGBA | + RADEON_VTX_FMT_RADEON_MODE | + (numverts << RADEON_NUM_VERTICES_SHIFT)); + + ADVANCE_RING(); + + i++; + } while (i < nbox); +} + +static void radeon_cp_discard_buffer(struct drm_device * dev, struct drm_buf * buf) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_buf_priv_t *buf_priv = buf->dev_private; + RING_LOCALS; + + buf_priv->age = ++dev_priv->sarea_priv->last_dispatch; + + /* Emit the vertex buffer age */ + BEGIN_RING(2); + RADEON_DISPATCH_AGE(buf_priv->age); + ADVANCE_RING(); + + buf->pending = 1; + buf->used = 0; +} + +static void radeon_cp_dispatch_indirect(struct drm_device * dev, + struct drm_buf * buf, int start, int end) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + RING_LOCALS; + DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end); + + if (start != end) { + int offset = (dev_priv->gart_buffers_offset + + buf->offset + start); + int dwords = (end - start + 3) / sizeof(u32); + + /* Indirect buffer data must be an even number of + * dwords, so if we've been given an odd number we must + * pad the data with a Type-2 CP packet. + */ + if (dwords & 1) { + u32 *data = (u32 *) + ((char *)dev->agp_buffer_map->handle + + buf->offset + start); + data[dwords++] = RADEON_CP_PACKET2; + } + + /* Fire off the indirect buffer */ + BEGIN_RING(3); + + OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1)); + OUT_RING(offset); + OUT_RING(dwords); + + ADVANCE_RING(); + } +} + +static void radeon_cp_dispatch_indices(struct drm_device * dev, + struct drm_buf * elt_buf, + drm_radeon_tcl_prim_t * prim) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; + int offset = dev_priv->gart_buffers_offset + prim->offset; + u32 *data; + int dwords; + int i = 0; + int start = prim->start + RADEON_INDEX_PRIM_OFFSET; + int count = (prim->finish - start) / sizeof(u16); + int nbox = sarea_priv->nbox; + + DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n", + prim->prim, + prim->vc_format, + prim->start, prim->finish, prim->offset, prim->numverts); + + if (bad_prim_vertex_nr(prim->prim, count)) { + DRM_ERROR("bad prim %x count %d\n", prim->prim, count); + return; + } + + if (start >= prim->finish || (prim->start & 0x7)) { + DRM_ERROR("buffer prim %d\n", prim->prim); + return; + } + + dwords = (prim->finish - prim->start + 3) / sizeof(u32); + + data = (u32 *) ((char *)dev->agp_buffer_map->handle + + elt_buf->offset + prim->start); + + data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2); + data[1] = offset; + data[2] = prim->numverts; + data[3] = prim->vc_format; + data[4] = (prim->prim | + RADEON_PRIM_WALK_IND | + RADEON_COLOR_ORDER_RGBA | + RADEON_VTX_FMT_RADEON_MODE | + (count << RADEON_NUM_VERTICES_SHIFT)); + + do { + if (i < nbox) + radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]); + + radeon_cp_dispatch_indirect(dev, elt_buf, + prim->start, prim->finish); + + i++; + } while (i < nbox); + +} + +#define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE + +static int radeon_cp_dispatch_texture(struct drm_device * dev, + struct drm_file *file_priv, + drm_radeon_texture_t * tex, + drm_radeon_tex_image_t * image) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_buf *buf; + u32 format; + u32 *buffer; + const u8 __user *data; + int size, dwords, tex_width, blit_width, spitch; + u32 height; + int i; + u32 texpitch, microtile; + u32 offset, byte_offset; + RING_LOCALS; + + if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) { + DRM_ERROR("Invalid destination offset\n"); + return -EINVAL; + } + + dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD; + + /* Flush the pixel cache. This ensures no pixel data gets mixed + * up with the texture data from the host data blit, otherwise + * part of the texture image may be corrupted. + */ + BEGIN_RING(4); + RADEON_FLUSH_CACHE(); + RADEON_WAIT_UNTIL_IDLE(); + ADVANCE_RING(); + + /* The compiler won't optimize away a division by a variable, + * even if the only legal values are powers of two. Thus, we'll + * use a shift instead. + */ + switch (tex->format) { + case RADEON_TXFORMAT_ARGB8888: + case RADEON_TXFORMAT_RGBA8888: + format = RADEON_COLOR_FORMAT_ARGB8888; + tex_width = tex->width * 4; + blit_width = image->width * 4; + break; + case RADEON_TXFORMAT_AI88: + case RADEON_TXFORMAT_ARGB1555: + case RADEON_TXFORMAT_RGB565: + case RADEON_TXFORMAT_ARGB4444: + case RADEON_TXFORMAT_VYUY422: + case RADEON_TXFORMAT_YVYU422: + format = RADEON_COLOR_FORMAT_RGB565; + tex_width = tex->width * 2; + blit_width = image->width * 2; + break; + case RADEON_TXFORMAT_I8: + case RADEON_TXFORMAT_RGB332: + format = RADEON_COLOR_FORMAT_CI8; + tex_width = tex->width * 1; + blit_width = image->width * 1; + break; + default: + DRM_ERROR("invalid texture format %d\n", tex->format); + return -EINVAL; + } + spitch = blit_width >> 6; + if (spitch == 0 && image->height > 1) + return -EINVAL; + + texpitch = tex->pitch; + if ((texpitch << 22) & RADEON_DST_TILE_MICRO) { + microtile = 1; + if (tex_width < 64) { + texpitch &= ~(RADEON_DST_TILE_MICRO >> 22); + /* we got tiled coordinates, untile them */ + image->x *= 2; + } + } else + microtile = 0; + + /* this might fail for zero-sized uploads - are those illegal? */ + if (!radeon_check_offset(dev_priv, tex->offset + image->height * + blit_width - 1)) { + DRM_ERROR("Invalid final destination offset\n"); + return -EINVAL; + } + + DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width); + + do { + DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n", + tex->offset >> 10, tex->pitch, tex->format, + image->x, image->y, image->width, image->height); + + /* Make a copy of some parameters in case we have to + * update them for a multi-pass texture blit. + */ + height = image->height; + data = (const u8 __user *)image->data; + + size = height * blit_width; + + if (size > RADEON_MAX_TEXTURE_SIZE) { + height = RADEON_MAX_TEXTURE_SIZE / blit_width; + size = height * blit_width; + } else if (size < 4 && size > 0) { + size = 4; + } else if (size == 0) { + return 0; + } + + buf = radeon_freelist_get(dev); + if (0 && !buf) { + radeon_do_cp_idle(dev_priv); + buf = radeon_freelist_get(dev); + } + if (!buf) { + DRM_DEBUG("EAGAIN\n"); + if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image))) + return -EFAULT; + return -EAGAIN; + } + + /* Dispatch the indirect buffer. + */ + buffer = + (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset); + dwords = size / 4; + +#define RADEON_COPY_MT(_buf, _data, _width) \ + do { \ + if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\ + DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \ + return -EFAULT; \ + } \ + } while(0) + + if (microtile) { + /* texture micro tiling in use, minimum texture width is thus 16 bytes. + however, we cannot use blitter directly for texture width < 64 bytes, + since minimum tex pitch is 64 bytes and we need this to match + the texture width, otherwise the blitter will tile it wrong. + Thus, tiling manually in this case. Additionally, need to special + case tex height = 1, since our actual image will have height 2 + and we need to ensure we don't read beyond the texture size + from user space. */ + if (tex->height == 1) { + if (tex_width >= 64 || tex_width <= 16) { + RADEON_COPY_MT(buffer, data, + (int)(tex_width * sizeof(u32))); + } else if (tex_width == 32) { + RADEON_COPY_MT(buffer, data, 16); + RADEON_COPY_MT(buffer + 8, + data + 16, 16); + } + } else if (tex_width >= 64 || tex_width == 16) { + RADEON_COPY_MT(buffer, data, + (int)(dwords * sizeof(u32))); + } else if (tex_width < 16) { + for (i = 0; i < tex->height; i++) { + RADEON_COPY_MT(buffer, data, tex_width); + buffer += 4; + data += tex_width; + } + } else if (tex_width == 32) { + /* TODO: make sure this works when not fitting in one buffer + (i.e. 32bytes x 2048...) */ + for (i = 0; i < tex->height; i += 2) { + RADEON_COPY_MT(buffer, data, 16); + data += 16; + RADEON_COPY_MT(buffer + 8, data, 16); + data += 16; + RADEON_COPY_MT(buffer + 4, data, 16); + data += 16; + RADEON_COPY_MT(buffer + 12, data, 16); + data += 16; + buffer += 16; + } + } + } else { + if (tex_width >= 32) { + /* Texture image width is larger than the minimum, so we + * can upload it directly. + */ + RADEON_COPY_MT(buffer, data, + (int)(dwords * sizeof(u32))); + } else { + /* Texture image width is less than the minimum, so we + * need to pad out each image scanline to the minimum + * width. + */ + for (i = 0; i < tex->height; i++) { + RADEON_COPY_MT(buffer, data, tex_width); + buffer += 8; + data += tex_width; + } + } + } + +#undef RADEON_COPY_MT + byte_offset = (image->y & ~2047) * blit_width; + buf->file_priv = file_priv; + buf->used = size; + offset = dev_priv->gart_buffers_offset + buf->offset; + BEGIN_RING(9); + OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5)); + OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL | + RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_NONE | + (format << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_S | + RADEON_DP_SRC_SOURCE_MEMORY | + RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); + OUT_RING((spitch << 22) | (offset >> 10)); + OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10))); + OUT_RING(0); + OUT_RING((image->x << 16) | (image->y % 2048)); + OUT_RING((image->width << 16) | height); + RADEON_WAIT_UNTIL_2D_IDLE(); + ADVANCE_RING(); + COMMIT_RING(); + + radeon_cp_discard_buffer(dev, buf); + + /* Update the input parameters for next time */ + image->y += height; + image->height -= height; + image->data = (const u8 __user *)image->data + size; + } while (image->height > 0); + + /* Flush the pixel cache after the blit completes. This ensures + * the texture data is written out to memory before rendering + * continues. + */ + BEGIN_RING(4); + RADEON_FLUSH_CACHE(); + RADEON_WAIT_UNTIL_2D_IDLE(); + ADVANCE_RING(); + COMMIT_RING(); + + return 0; +} + +static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + int i; + RING_LOCALS; + DRM_DEBUG("\n"); + + BEGIN_RING(35); + + OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0)); + OUT_RING(0x00000000); + + OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31)); + for (i = 0; i < 32; i++) { + OUT_RING(stipple[i]); + } + + ADVANCE_RING(); +} + +static void radeon_apply_surface_regs(int surf_index, + drm_radeon_private_t *dev_priv) +{ + if (!dev_priv->mmio) + return; + + radeon_do_cp_idle(dev_priv); + + RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index, + dev_priv->surfaces[surf_index].flags); + RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index, + dev_priv->surfaces[surf_index].lower); + RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index, + dev_priv->surfaces[surf_index].upper); +} + +/* Allocates a virtual surface + * doesn't always allocate a real surface, will stretch an existing + * surface when possible. + * + * Note that refcount can be at most 2, since during a free refcount=3 + * might mean we have to allocate a new surface which might not always + * be available. + * For example : we allocate three contigous surfaces ABC. If B is + * freed, we suddenly need two surfaces to store A and C, which might + * not always be available. + */ +static int alloc_surface(drm_radeon_surface_alloc_t *new, + drm_radeon_private_t *dev_priv, + struct drm_file *file_priv) +{ + struct radeon_virt_surface *s; + int i; + int virt_surface_index; + uint32_t new_upper, new_lower; + + new_lower = new->address; + new_upper = new_lower + new->size - 1; + + /* sanity check */ + if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) || + ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != + RADEON_SURF_ADDRESS_FIXED_MASK) + || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0)) + return -1; + + /* make sure there is no overlap with existing surfaces */ + for (i = 0; i < RADEON_MAX_SURFACES; i++) { + if ((dev_priv->surfaces[i].refcount != 0) && + (((new_lower >= dev_priv->surfaces[i].lower) && + (new_lower < dev_priv->surfaces[i].upper)) || + ((new_lower < dev_priv->surfaces[i].lower) && + (new_upper > dev_priv->surfaces[i].lower)))) { + return -1; + } + } + + /* find a virtual surface */ + for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) + if (dev_priv->virt_surfaces[i].file_priv == 0) + break; + if (i == 2 * RADEON_MAX_SURFACES) { + return -1; + } + virt_surface_index = i; + + /* try to reuse an existing surface */ + for (i = 0; i < RADEON_MAX_SURFACES; i++) { + /* extend before */ + if ((dev_priv->surfaces[i].refcount == 1) && + (new->flags == dev_priv->surfaces[i].flags) && + (new_upper + 1 == dev_priv->surfaces[i].lower)) { + s = &(dev_priv->virt_surfaces[virt_surface_index]); + s->surface_index = i; + s->lower = new_lower; + s->upper = new_upper; + s->flags = new->flags; + s->file_priv = file_priv; + dev_priv->surfaces[i].refcount++; + dev_priv->surfaces[i].lower = s->lower; + radeon_apply_surface_regs(s->surface_index, dev_priv); + return virt_surface_index; + } + + /* extend after */ + if ((dev_priv->surfaces[i].refcount == 1) && + (new->flags == dev_priv->surfaces[i].flags) && + (new_lower == dev_priv->surfaces[i].upper + 1)) { + s = &(dev_priv->virt_surfaces[virt_surface_index]); + s->surface_index = i; + s->lower = new_lower; + s->upper = new_upper; + s->flags = new->flags; + s->file_priv = file_priv; + dev_priv->surfaces[i].refcount++; + dev_priv->surfaces[i].upper = s->upper; + radeon_apply_surface_regs(s->surface_index, dev_priv); + return virt_surface_index; + } + } + + /* okay, we need a new one */ + for (i = 0; i < RADEON_MAX_SURFACES; i++) { + if (dev_priv->surfaces[i].refcount == 0) { + s = &(dev_priv->virt_surfaces[virt_surface_index]); + s->surface_index = i; + s->lower = new_lower; + s->upper = new_upper; + s->flags = new->flags; + s->file_priv = file_priv; + dev_priv->surfaces[i].refcount = 1; + dev_priv->surfaces[i].lower = s->lower; + dev_priv->surfaces[i].upper = s->upper; + dev_priv->surfaces[i].flags = s->flags; + radeon_apply_surface_regs(s->surface_index, dev_priv); + return virt_surface_index; + } + } + + /* we didn't find anything */ + return -1; +} + +static int free_surface(struct drm_file *file_priv, + drm_radeon_private_t * dev_priv, + int lower) +{ + struct radeon_virt_surface *s; + int i; + /* find the virtual surface */ + for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) { + s = &(dev_priv->virt_surfaces[i]); + if (s->file_priv) { + if ((lower == s->lower) && (file_priv == s->file_priv)) + { + if (dev_priv->surfaces[s->surface_index]. + lower == s->lower) + dev_priv->surfaces[s->surface_index]. + lower = s->upper; + + if (dev_priv->surfaces[s->surface_index]. + upper == s->upper) + dev_priv->surfaces[s->surface_index]. + upper = s->lower; + + dev_priv->surfaces[s->surface_index].refcount--; + if (dev_priv->surfaces[s->surface_index]. + refcount == 0) + dev_priv->surfaces[s->surface_index]. + flags = 0; + s->file_priv = NULL; + radeon_apply_surface_regs(s->surface_index, + dev_priv); + return 0; + } + } + } + return 1; +} + +static void radeon_surfaces_release(struct drm_file *file_priv, + drm_radeon_private_t * dev_priv) +{ + int i; + for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) { + if (dev_priv->virt_surfaces[i].file_priv == file_priv) + free_surface(file_priv, dev_priv, + dev_priv->virt_surfaces[i].lower); + } +} + +/* ================================================================ + * IOCTL functions + */ +static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_surface_alloc_t *alloc = data; + + if (alloc_surface(alloc, dev_priv, file_priv) == -1) + return -EINVAL; + else + return 0; +} + +static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_surface_free_t *memfree = data; + + if (free_surface(file_priv, dev_priv, memfree->address)) + return -EINVAL; + else + return 0; +} + +static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; + drm_radeon_clear_t *clear = data; + drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS]; + DRM_DEBUG("\n"); + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + RING_SPACE_TEST_WITH_RETURN(dev_priv); + + if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS) + sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS; + + if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes, + sarea_priv->nbox * sizeof(depth_boxes[0]))) + return -EFAULT; + + radeon_cp_dispatch_clear(dev, clear, depth_boxes); + + COMMIT_RING(); + return 0; +} + +/* Not sure why this isn't set all the time: + */ +static int radeon_do_init_pageflip(struct drm_device * dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + RING_LOCALS; + + DRM_DEBUG("\n"); + + BEGIN_RING(6); + RADEON_WAIT_UNTIL_3D_IDLE(); + OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0)); + OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) | + RADEON_CRTC_OFFSET_FLIP_CNTL); + OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0)); + OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) | + RADEON_CRTC_OFFSET_FLIP_CNTL); + ADVANCE_RING(); + + dev_priv->page_flipping = 1; + + if (dev_priv->sarea_priv->pfCurrentPage != 1) + dev_priv->sarea_priv->pfCurrentPage = 0; + + return 0; +} + +/* Swapping and flipping are different operations, need different ioctls. + * They can & should be intermixed to support multiple 3d windows. + */ +static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + DRM_DEBUG("\n"); + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + RING_SPACE_TEST_WITH_RETURN(dev_priv); + + if (!dev_priv->page_flipping) + radeon_do_init_pageflip(dev); + + radeon_cp_dispatch_flip(dev); + + COMMIT_RING(); + return 0; +} + +static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; + DRM_DEBUG("\n"); + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + RING_SPACE_TEST_WITH_RETURN(dev_priv); + + if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS) + sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS; + + radeon_cp_dispatch_swap(dev); + dev_priv->sarea_priv->ctx_owner = 0; + + COMMIT_RING(); + return 0; +} + +static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; + struct drm_device_dma *dma = dev->dma; + struct drm_buf *buf; + drm_radeon_vertex_t *vertex = data; + drm_radeon_tcl_prim_t prim; + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n", + DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard); + + if (vertex->idx < 0 || vertex->idx >= dma->buf_count) { + DRM_ERROR("buffer index %d (of %d max)\n", + vertex->idx, dma->buf_count - 1); + return -EINVAL; + } + if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) { + DRM_ERROR("buffer prim %d\n", vertex->prim); + return -EINVAL; + } + + RING_SPACE_TEST_WITH_RETURN(dev_priv); + VB_AGE_TEST_WITH_RETURN(dev_priv); + + buf = dma->buflist[vertex->idx]; + + if (buf->file_priv != file_priv) { + DRM_ERROR("process %d using buffer owned by %p\n", + DRM_CURRENTPID, buf->file_priv); + return -EINVAL; + } + if (buf->pending) { + DRM_ERROR("sending pending buffer %d\n", vertex->idx); + return -EINVAL; + } + + /* Build up a prim_t record: + */ + if (vertex->count) { + buf->used = vertex->count; /* not used? */ + + if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) { + if (radeon_emit_state(dev_priv, file_priv, + &sarea_priv->context_state, + sarea_priv->tex_state, + sarea_priv->dirty)) { + DRM_ERROR("radeon_emit_state failed\n"); + return -EINVAL; + } + + sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES | + RADEON_UPLOAD_TEX1IMAGES | + RADEON_UPLOAD_TEX2IMAGES | + RADEON_REQUIRE_QUIESCENCE); + } + + prim.start = 0; + prim.finish = vertex->count; /* unused */ + prim.prim = vertex->prim; + prim.numverts = vertex->count; + prim.vc_format = dev_priv->sarea_priv->vc_format; + + radeon_cp_dispatch_vertex(dev, buf, &prim); + } + + if (vertex->discard) { + radeon_cp_discard_buffer(dev, buf); + } + + COMMIT_RING(); + return 0; +} + +static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; + struct drm_device_dma *dma = dev->dma; + struct drm_buf *buf; + drm_radeon_indices_t *elts = data; + drm_radeon_tcl_prim_t prim; + int count; + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n", + DRM_CURRENTPID, elts->idx, elts->start, elts->end, + elts->discard); + + if (elts->idx < 0 || elts->idx >= dma->buf_count) { + DRM_ERROR("buffer index %d (of %d max)\n", + elts->idx, dma->buf_count - 1); + return -EINVAL; + } + if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) { + DRM_ERROR("buffer prim %d\n", elts->prim); + return -EINVAL; + } + + RING_SPACE_TEST_WITH_RETURN(dev_priv); + VB_AGE_TEST_WITH_RETURN(dev_priv); + + buf = dma->buflist[elts->idx]; + + if (buf->file_priv != file_priv) { + DRM_ERROR("process %d using buffer owned by %p\n", + DRM_CURRENTPID, buf->file_priv); + return -EINVAL; + } + if (buf->pending) { + DRM_ERROR("sending pending buffer %d\n", elts->idx); + return -EINVAL; + } + + count = (elts->end - elts->start) / sizeof(u16); + elts->start -= RADEON_INDEX_PRIM_OFFSET; + + if (elts->start & 0x7) { + DRM_ERROR("misaligned buffer 0x%x\n", elts->start); + return -EINVAL; + } + if (elts->start < buf->used) { + DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used); + return -EINVAL; + } + + buf->used = elts->end; + + if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) { + if (radeon_emit_state(dev_priv, file_priv, + &sarea_priv->context_state, + sarea_priv->tex_state, + sarea_priv->dirty)) { + DRM_ERROR("radeon_emit_state failed\n"); + return -EINVAL; + } + + sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES | + RADEON_UPLOAD_TEX1IMAGES | + RADEON_UPLOAD_TEX2IMAGES | + RADEON_REQUIRE_QUIESCENCE); + } + + /* Build up a prim_t record: + */ + prim.start = elts->start; + prim.finish = elts->end; + prim.prim = elts->prim; + prim.offset = 0; /* offset from start of dma buffers */ + prim.numverts = RADEON_MAX_VB_VERTS; /* duh */ + prim.vc_format = dev_priv->sarea_priv->vc_format; + + radeon_cp_dispatch_indices(dev, buf, &prim); + if (elts->discard) { + radeon_cp_discard_buffer(dev, buf); + } + + COMMIT_RING(); + return 0; +} + +static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_texture_t *tex = data; + drm_radeon_tex_image_t image; + int ret; + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + if (tex->image == NULL) { + DRM_ERROR("null texture image!\n"); + return -EINVAL; + } + + if (DRM_COPY_FROM_USER(&image, + (drm_radeon_tex_image_t __user *) tex->image, + sizeof(image))) + return -EFAULT; + + RING_SPACE_TEST_WITH_RETURN(dev_priv); + VB_AGE_TEST_WITH_RETURN(dev_priv); + + ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image); + + return ret; +} + +static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_stipple_t *stipple = data; + u32 mask[32]; + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32))) + return -EFAULT; + + RING_SPACE_TEST_WITH_RETURN(dev_priv); + + radeon_cp_dispatch_stipple(dev, mask); + + COMMIT_RING(); + return 0; +} + +static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_device_dma *dma = dev->dma; + struct drm_buf *buf; + drm_radeon_indirect_t *indirect = data; + RING_LOCALS; + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + DRM_DEBUG("idx=%d s=%d e=%d d=%d\n", + indirect->idx, indirect->start, indirect->end, + indirect->discard); + + if (indirect->idx < 0 || indirect->idx >= dma->buf_count) { + DRM_ERROR("buffer index %d (of %d max)\n", + indirect->idx, dma->buf_count - 1); + return -EINVAL; + } + + buf = dma->buflist[indirect->idx]; + + if (buf->file_priv != file_priv) { + DRM_ERROR("process %d using buffer owned by %p\n", + DRM_CURRENTPID, buf->file_priv); + return -EINVAL; + } + if (buf->pending) { + DRM_ERROR("sending pending buffer %d\n", indirect->idx); + return -EINVAL; + } + + if (indirect->start < buf->used) { + DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n", + indirect->start, buf->used); + return -EINVAL; + } + + RING_SPACE_TEST_WITH_RETURN(dev_priv); + VB_AGE_TEST_WITH_RETURN(dev_priv); + + buf->used = indirect->end; + + /* Wait for the 3D stream to idle before the indirect buffer + * containing 2D acceleration commands is processed. + */ + BEGIN_RING(2); + + RADEON_WAIT_UNTIL_3D_IDLE(); + + ADVANCE_RING(); + + /* Dispatch the indirect buffer full of commands from the + * X server. This is insecure and is thus only available to + * privileged clients. + */ + radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end); + if (indirect->discard) { + radeon_cp_discard_buffer(dev, buf); + } + + COMMIT_RING(); + return 0; +} + +static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; + struct drm_device_dma *dma = dev->dma; + struct drm_buf *buf; + drm_radeon_vertex2_t *vertex = data; + int i; + unsigned char laststate; + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + DRM_DEBUG("pid=%d index=%d discard=%d\n", + DRM_CURRENTPID, vertex->idx, vertex->discard); + + if (vertex->idx < 0 || vertex->idx >= dma->buf_count) { + DRM_ERROR("buffer index %d (of %d max)\n", + vertex->idx, dma->buf_count - 1); + return -EINVAL; + } + + RING_SPACE_TEST_WITH_RETURN(dev_priv); + VB_AGE_TEST_WITH_RETURN(dev_priv); + + buf = dma->buflist[vertex->idx]; + + if (buf->file_priv != file_priv) { + DRM_ERROR("process %d using buffer owned by %p\n", + DRM_CURRENTPID, buf->file_priv); + return -EINVAL; + } + + if (buf->pending) { + DRM_ERROR("sending pending buffer %d\n", vertex->idx); + return -EINVAL; + } + + if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS) + return -EINVAL; + + for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) { + drm_radeon_prim_t prim; + drm_radeon_tcl_prim_t tclprim; + + if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim))) + return -EFAULT; + + if (prim.stateidx != laststate) { + drm_radeon_state_t state; + + if (DRM_COPY_FROM_USER(&state, + &vertex->state[prim.stateidx], + sizeof(state))) + return -EFAULT; + + if (radeon_emit_state2(dev_priv, file_priv, &state)) { + DRM_ERROR("radeon_emit_state2 failed\n"); + return -EINVAL; + } + + laststate = prim.stateidx; + } + + tclprim.start = prim.start; + tclprim.finish = prim.finish; + tclprim.prim = prim.prim; + tclprim.vc_format = prim.vc_format; + + if (prim.prim & RADEON_PRIM_WALK_IND) { + tclprim.offset = prim.numverts * 64; + tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */ + + radeon_cp_dispatch_indices(dev, buf, &tclprim); + } else { + tclprim.numverts = prim.numverts; + tclprim.offset = 0; /* not used */ + + radeon_cp_dispatch_vertex(dev, buf, &tclprim); + } + + if (sarea_priv->nbox == 1) + sarea_priv->nbox = 0; + } + + if (vertex->discard) { + radeon_cp_discard_buffer(dev, buf); + } + + COMMIT_RING(); + return 0; +} + +static int radeon_emit_packets(drm_radeon_private_t * dev_priv, + struct drm_file *file_priv, + drm_radeon_cmd_header_t header, + drm_radeon_kcmd_buffer_t *cmdbuf) +{ + int id = (int)header.packet.packet_id; + int sz, reg; + int *data = (int *)cmdbuf->buf; + RING_LOCALS; + + if (id >= RADEON_MAX_STATE_PACKETS) + return -EINVAL; + + sz = packet[id].len; + reg = packet[id].start; + + if (sz * sizeof(int) > cmdbuf->bufsz) { + DRM_ERROR("Packet size provided larger than data provided\n"); + return -EINVAL; + } + + if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) { + DRM_ERROR("Packet verification failed\n"); + return -EINVAL; + } + + BEGIN_RING(sz + 1); + OUT_RING(CP_PACKET0(reg, (sz - 1))); + OUT_RING_TABLE(data, sz); + ADVANCE_RING(); + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + +static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv, + drm_radeon_cmd_header_t header, + drm_radeon_kcmd_buffer_t *cmdbuf) +{ + int sz = header.scalars.count; + int start = header.scalars.offset; + int stride = header.scalars.stride; + RING_LOCALS; + + BEGIN_RING(3 + sz); + OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0)); + OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); + OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1)); + OUT_RING_TABLE(cmdbuf->buf, sz); + ADVANCE_RING(); + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + +/* God this is ugly + */ +static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv, + drm_radeon_cmd_header_t header, + drm_radeon_kcmd_buffer_t *cmdbuf) +{ + int sz = header.scalars.count; + int start = ((unsigned int)header.scalars.offset) + 0x100; + int stride = header.scalars.stride; + RING_LOCALS; + + BEGIN_RING(3 + sz); + OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0)); + OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); + OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1)); + OUT_RING_TABLE(cmdbuf->buf, sz); + ADVANCE_RING(); + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + +static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv, + drm_radeon_cmd_header_t header, + drm_radeon_kcmd_buffer_t *cmdbuf) +{ + int sz = header.vectors.count; + int start = header.vectors.offset; + int stride = header.vectors.stride; + RING_LOCALS; + + BEGIN_RING(5 + sz); + OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0); + OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0)); + OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); + OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1))); + OUT_RING_TABLE(cmdbuf->buf, sz); + ADVANCE_RING(); + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + +static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv, + drm_radeon_cmd_header_t header, + drm_radeon_kcmd_buffer_t *cmdbuf) +{ + int sz = header.veclinear.count * 4; + int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8); + RING_LOCALS; + + if (!sz) + return 0; + if (sz * 4 > cmdbuf->bufsz) + return -EINVAL; + + BEGIN_RING(5 + sz); + OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0); + OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0)); + OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); + OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1))); + OUT_RING_TABLE(cmdbuf->buf, sz); + ADVANCE_RING(); + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + +static int radeon_emit_packet3(struct drm_device * dev, + struct drm_file *file_priv, + drm_radeon_kcmd_buffer_t *cmdbuf) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + unsigned int cmdsz; + int ret; + RING_LOCALS; + + DRM_DEBUG("\n"); + + if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv, + cmdbuf, &cmdsz))) { + DRM_ERROR("Packet verification failed\n"); + return ret; + } + + BEGIN_RING(cmdsz); + OUT_RING_TABLE(cmdbuf->buf, cmdsz); + ADVANCE_RING(); + + cmdbuf->buf += cmdsz * 4; + cmdbuf->bufsz -= cmdsz * 4; + return 0; +} + +static int radeon_emit_packet3_cliprect(struct drm_device *dev, + struct drm_file *file_priv, + drm_radeon_kcmd_buffer_t *cmdbuf, + int orig_nbox) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_clip_rect box; + unsigned int cmdsz; + int ret; + struct drm_clip_rect __user *boxes = cmdbuf->boxes; + int i = 0; + RING_LOCALS; + + DRM_DEBUG("\n"); + + if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv, + cmdbuf, &cmdsz))) { + DRM_ERROR("Packet verification failed\n"); + return ret; + } + + if (!orig_nbox) + goto out; + + do { + if (i < cmdbuf->nbox) { + if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box))) + return -EFAULT; + /* FIXME The second and subsequent times round + * this loop, send a WAIT_UNTIL_3D_IDLE before + * calling emit_clip_rect(). This fixes a + * lockup on fast machines when sending + * several cliprects with a cmdbuf, as when + * waving a 2D window over a 3D + * window. Something in the commands from user + * space seems to hang the card when they're + * sent several times in a row. That would be + * the correct place to fix it but this works + * around it until I can figure that out - Tim + * Smith */ + if (i) { + BEGIN_RING(2); + RADEON_WAIT_UNTIL_3D_IDLE(); + ADVANCE_RING(); + } + radeon_emit_clip_rect(dev_priv, &box); + } + + BEGIN_RING(cmdsz); + OUT_RING_TABLE(cmdbuf->buf, cmdsz); + ADVANCE_RING(); + + } while (++i < cmdbuf->nbox); + if (cmdbuf->nbox == 1) + cmdbuf->nbox = 0; + + out: + cmdbuf->buf += cmdsz * 4; + cmdbuf->bufsz -= cmdsz * 4; + return 0; +} + +static int radeon_emit_wait(struct drm_device * dev, int flags) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + RING_LOCALS; + + DRM_DEBUG("%x\n", flags); + switch (flags) { + case RADEON_WAIT_2D: + BEGIN_RING(2); + RADEON_WAIT_UNTIL_2D_IDLE(); + ADVANCE_RING(); + break; + case RADEON_WAIT_3D: + BEGIN_RING(2); + RADEON_WAIT_UNTIL_3D_IDLE(); + ADVANCE_RING(); + break; + case RADEON_WAIT_2D | RADEON_WAIT_3D: + BEGIN_RING(2); + RADEON_WAIT_UNTIL_IDLE(); + ADVANCE_RING(); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_device_dma *dma = dev->dma; + struct drm_buf *buf = NULL; + int idx; + drm_radeon_kcmd_buffer_t *cmdbuf = data; + drm_radeon_cmd_header_t header; + int orig_nbox, orig_bufsz; + char *kbuf = NULL; + + LOCK_TEST_WITH_RETURN(dev, file_priv); + + RING_SPACE_TEST_WITH_RETURN(dev_priv); + VB_AGE_TEST_WITH_RETURN(dev_priv); + + if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) { + return -EINVAL; + } + + /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid + * races between checking values and using those values in other code, + * and simply to avoid a lot of function calls to copy in data. + */ + orig_bufsz = cmdbuf->bufsz; + if (orig_bufsz != 0) { + kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER); + if (kbuf == NULL) + return -ENOMEM; + if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf, + cmdbuf->bufsz)) { + drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER); + return -EFAULT; + } + cmdbuf->buf = kbuf; + } + + orig_nbox = cmdbuf->nbox; + + if (dev_priv->microcode_version == UCODE_R300) { + int temp; + temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf); + + if (orig_bufsz != 0) + drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER); + + return temp; + } + + /* microcode_version != r300 */ + while (cmdbuf->bufsz >= sizeof(header)) { + + header.i = *(int *)cmdbuf->buf; + cmdbuf->buf += sizeof(header); + cmdbuf->bufsz -= sizeof(header); + + switch (header.header.cmd_type) { + case RADEON_CMD_PACKET: + DRM_DEBUG("RADEON_CMD_PACKET\n"); + if (radeon_emit_packets + (dev_priv, file_priv, header, cmdbuf)) { + DRM_ERROR("radeon_emit_packets failed\n"); + goto err; + } + break; + + case RADEON_CMD_SCALARS: + DRM_DEBUG("RADEON_CMD_SCALARS\n"); + if (radeon_emit_scalars(dev_priv, header, cmdbuf)) { + DRM_ERROR("radeon_emit_scalars failed\n"); + goto err; + } + break; + + case RADEON_CMD_VECTORS: + DRM_DEBUG("RADEON_CMD_VECTORS\n"); + if (radeon_emit_vectors(dev_priv, header, cmdbuf)) { + DRM_ERROR("radeon_emit_vectors failed\n"); + goto err; + } + break; + + case RADEON_CMD_DMA_DISCARD: + DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n"); + idx = header.dma.buf_idx; + if (idx < 0 || idx >= dma->buf_count) { + DRM_ERROR("buffer index %d (of %d max)\n", + idx, dma->buf_count - 1); + goto err; + } + + buf = dma->buflist[idx]; + if (buf->file_priv != file_priv || buf->pending) { + DRM_ERROR("bad buffer %p %p %d\n", + buf->file_priv, file_priv, + buf->pending); + goto err; + } + + radeon_cp_discard_buffer(dev, buf); + break; + + case RADEON_CMD_PACKET3: + DRM_DEBUG("RADEON_CMD_PACKET3\n"); + if (radeon_emit_packet3(dev, file_priv, cmdbuf)) { + DRM_ERROR("radeon_emit_packet3 failed\n"); + goto err; + } + break; + + case RADEON_CMD_PACKET3_CLIP: + DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n"); + if (radeon_emit_packet3_cliprect + (dev, file_priv, cmdbuf, orig_nbox)) { + DRM_ERROR("radeon_emit_packet3_clip failed\n"); + goto err; + } + break; + + case RADEON_CMD_SCALARS2: + DRM_DEBUG("RADEON_CMD_SCALARS2\n"); + if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) { + DRM_ERROR("radeon_emit_scalars2 failed\n"); + goto err; + } + break; + + case RADEON_CMD_WAIT: + DRM_DEBUG("RADEON_CMD_WAIT\n"); + if (radeon_emit_wait(dev, header.wait.flags)) { + DRM_ERROR("radeon_emit_wait failed\n"); + goto err; + } + break; + case RADEON_CMD_VECLINEAR: + DRM_DEBUG("RADEON_CMD_VECLINEAR\n"); + if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) { + DRM_ERROR("radeon_emit_veclinear failed\n"); + goto err; + } + break; + + default: + DRM_ERROR("bad cmd_type %d at %p\n", + header.header.cmd_type, + cmdbuf->buf - sizeof(header)); + goto err; + } + } + + if (orig_bufsz != 0) + drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER); + + DRM_DEBUG("DONE\n"); + COMMIT_RING(); + return 0; + + err: + if (orig_bufsz != 0) + drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER); + return -EINVAL; +} + +static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_getparam_t *param = data; + int value; + + DRM_DEBUG("pid=%d\n", DRM_CURRENTPID); + + switch (param->param) { + case RADEON_PARAM_GART_BUFFER_OFFSET: + value = dev_priv->gart_buffers_offset; + break; + case RADEON_PARAM_LAST_FRAME: + dev_priv->stats.last_frame_reads++; + value = GET_SCRATCH(0); + break; + case RADEON_PARAM_LAST_DISPATCH: + value = GET_SCRATCH(1); + break; + case RADEON_PARAM_LAST_CLEAR: + dev_priv->stats.last_clear_reads++; + value = GET_SCRATCH(2); + break; + case RADEON_PARAM_IRQ_NR: + value = dev->irq; + break; + case RADEON_PARAM_GART_BASE: + value = dev_priv->gart_vm_start; + break; + case RADEON_PARAM_REGISTER_HANDLE: + value = dev_priv->mmio->offset; + break; + case RADEON_PARAM_STATUS_HANDLE: + value = dev_priv->ring_rptr_offset; + break; +#if BITS_PER_LONG == 32 + /* + * This ioctl() doesn't work on 64-bit platforms because hw_lock is a + * pointer which can't fit into an int-sized variable. According to + * Michel Dänzer, the ioctl() is only used on embedded platforms, so + * not supporting it shouldn't be a problem. If the same functionality + * is needed on 64-bit platforms, a new ioctl() would have to be added, + * so backwards-compatibility for the embedded platforms can be + * maintained. --davidm 4-Feb-2004. + */ + case RADEON_PARAM_SAREA_HANDLE: + /* The lock is the first dword in the sarea. */ + value = (long)dev->lock.hw_lock; + break; +#endif + case RADEON_PARAM_GART_TEX_HANDLE: + value = dev_priv->gart_textures_offset; + break; + case RADEON_PARAM_SCRATCH_OFFSET: + if (!dev_priv->writeback_works) + return -EINVAL; + value = RADEON_SCRATCH_REG_OFFSET; + break; + case RADEON_PARAM_CARD_TYPE: + if (dev_priv->flags & RADEON_IS_PCIE) + value = RADEON_CARD_PCIE; + else if (dev_priv->flags & RADEON_IS_AGP) + value = RADEON_CARD_AGP; + else + value = RADEON_CARD_PCI; + break; + case RADEON_PARAM_VBLANK_CRTC: + value = radeon_vblank_crtc_get(dev); + break; + case RADEON_PARAM_FB_LOCATION: + value = radeon_read_fb_location(dev_priv); + break; + case RADEON_PARAM_NUM_GB_PIPES: + value = dev_priv->num_gb_pipes; + break; + default: + DRM_DEBUG("Invalid parameter %d\n", param->param); + return -EINVAL; + } + + if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) { + DRM_ERROR("copy_to_user\n"); + return -EFAULT; + } + + return 0; +} + +static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_setparam_t *sp = data; + struct drm_radeon_driver_file_fields *radeon_priv; + + switch (sp->param) { + case RADEON_SETPARAM_FB_LOCATION: + radeon_priv = file_priv->driver_priv; + radeon_priv->radeon_fb_delta = dev_priv->fb_location - + sp->value; + break; + case RADEON_SETPARAM_SWITCH_TILING: + if (sp->value == 0) { + DRM_DEBUG("color tiling disabled\n"); + dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO; + dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO; + dev_priv->sarea_priv->tiling_enabled = 0; + } else if (sp->value == 1) { + DRM_DEBUG("color tiling enabled\n"); + dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO; + dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO; + dev_priv->sarea_priv->tiling_enabled = 1; + } + break; + case RADEON_SETPARAM_PCIGART_LOCATION: + dev_priv->pcigart_offset = sp->value; + dev_priv->pcigart_offset_set = 1; + break; + case RADEON_SETPARAM_NEW_MEMMAP: + dev_priv->new_memmap = sp->value; + break; + case RADEON_SETPARAM_PCIGART_TABLE_SIZE: + dev_priv->gart_info.table_size = sp->value; + if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE) + dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE; + break; + case RADEON_SETPARAM_VBLANK_CRTC: + return radeon_vblank_crtc_set(dev, sp->value); + break; + default: + DRM_DEBUG("Invalid parameter %d\n", sp->param); + return -EINVAL; + } + + return 0; +} + +/* When a client dies: + * - Check for and clean up flipped page state + * - Free any alloced GART memory. + * - Free any alloced radeon surfaces. + * + * DRM infrastructure takes care of reclaiming dma buffers. + */ +void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv) +{ + if (dev->dev_private) { + drm_radeon_private_t *dev_priv = dev->dev_private; + dev_priv->page_flipping = 0; + radeon_mem_release(file_priv, dev_priv->gart_heap); + radeon_mem_release(file_priv, dev_priv->fb_heap); + radeon_surfaces_release(file_priv, dev_priv); + } +} + +void radeon_driver_lastclose(struct drm_device *dev) +{ + if (dev->dev_private) { + drm_radeon_private_t *dev_priv = dev->dev_private; + + if (dev_priv->sarea_priv && + dev_priv->sarea_priv->pfCurrentPage != 0) + radeon_cp_dispatch_flip(dev); + } + + radeon_do_release(dev); +} + +int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_radeon_driver_file_fields *radeon_priv; + + DRM_DEBUG("\n"); + radeon_priv = + (struct drm_radeon_driver_file_fields *) + drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES); + + if (!radeon_priv) + return -ENOMEM; + + file_priv->driver_priv = radeon_priv; + + if (dev_priv) + radeon_priv->radeon_fb_delta = dev_priv->fb_location; + else + radeon_priv->radeon_fb_delta = 0; + return 0; +} + +void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv) +{ + struct drm_radeon_driver_file_fields *radeon_priv = + file_priv->driver_priv; + + drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES); +} + +struct drm_ioctl_desc radeon_ioctls[] = { + DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH) +}; + +int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls); |