From 50f153036c9d9e4ae1768d5ca9c2ad4184f7a0b7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 21 Aug 2009 13:21:01 +1000 Subject: drm/radeon/kms: generate the safe register tables. Previously we just made these offline and included them, but no reason we can't generate them at build time. TODO: add rs690 + r100/r200 when done. should we do rs480/rs690 no tcl version? Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r300.c | 45 ++----------------------------------------- 1 file changed, 2 insertions(+), 43 deletions(-) (limited to 'drivers/gpu/drm/radeon/r300.c') diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index c47579dcafa..482d6b296b7 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -33,6 +33,8 @@ #include "radeon_drm.h" #include "radeon_share.h" +#include "r300_reg_safe.h" + /* r300,r350,rv350,rv370,rv380 depends on : */ void r100_hdp_reset(struct radeon_device *rdev); int r100_cp_reset(struct radeon_device *rdev); @@ -953,49 +955,6 @@ static inline void r300_cs_track_clear(struct r300_cs_track *track) } } -static const unsigned r300_reg_safe_bm[159] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF, - 0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000, - 0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, - 0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF, - 0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, - 0x00000000, 0x0000C100, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x0003FC01, 0xFFFFFFF8, 0xFE800B19, -}; - static int r300_packet0_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, unsigned idx, unsigned reg) -- cgit v1.2.3-70-g09d2 From 551ebd837c75fc75df81811a18b7136c39cab487 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 1 Sep 2009 15:25:57 +1000 Subject: drm/radeon/kms: add rn50/r100/r200 CS tracker. This adds the command stream checker for the RN50, R100 and R200 cards. It stops any access to 3D registers on RN50, and does checks on buffer sizes on the r100/r200 cards. It also fixes some texture sizing checks on r300. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/Makefile | 15 +- drivers/gpu/drm/radeon/r100.c | 811 +++++++++++++++++++++++++++++------ drivers/gpu/drm/radeon/r100_track.h | 124 ++++++ drivers/gpu/drm/radeon/r200.c | 456 ++++++++++++++++++++ drivers/gpu/drm/radeon/r300.c | 363 ++-------------- drivers/gpu/drm/radeon/radeon.h | 6 + drivers/gpu/drm/radeon/radeon_asic.h | 1 + drivers/gpu/drm/radeon/radeon_reg.h | 57 ++- drivers/gpu/drm/radeon/reg_srcs/r100 | 105 +++++ drivers/gpu/drm/radeon/reg_srcs/r200 | 184 ++++++++ drivers/gpu/drm/radeon/reg_srcs/rn50 | 30 ++ 11 files changed, 1712 insertions(+), 440 deletions(-) create mode 100644 drivers/gpu/drm/radeon/r100_track.h create mode 100644 drivers/gpu/drm/radeon/r200.c create mode 100644 drivers/gpu/drm/radeon/reg_srcs/r100 create mode 100644 drivers/gpu/drm/radeon/reg_srcs/r200 create mode 100644 drivers/gpu/drm/radeon/reg_srcs/rn50 (limited to 'drivers/gpu/drm/radeon/r300.c') diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index b2213a576a8..6fb84296249 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -11,6 +11,15 @@ hostprogs-y := mkregtable quiet_cmd_mkregtable = MKREGTABLE $@ cmd_mkregtable = $(obj)/mkregtable $< > $@ +$(obj)/rn50_reg_safe.h: $(src)/reg_srcs/rn50 $(obj)/mkregtable + $(call if_changed,mkregtable) + +$(obj)/r100_reg_safe.h: $(src)/reg_srcs/r100 $(obj)/mkregtable + $(call if_changed,mkregtable) + +$(obj)/r200_reg_safe.h: $(src)/reg_srcs/r200 $(obj)/mkregtable + $(call if_changed,mkregtable) + $(obj)/rv515_reg_safe.h: $(src)/reg_srcs/rv515 $(obj)/mkregtable $(call if_changed,mkregtable) @@ -20,6 +29,10 @@ $(obj)/r300_reg_safe.h: $(src)/reg_srcs/r300 $(obj)/mkregtable $(obj)/rs600_reg_safe.h: $(src)/reg_srcs/rs600 $(obj)/mkregtable $(call if_changed,mkregtable) +$(obj)/r100.o: $(obj)/r100_reg_safe.h $(obj)/rn50_reg_safe.h + +$(obj)/r200.o: $(obj)/r200_reg_safe.h + $(obj)/rv515.o: $(obj)/rv515_reg_safe.h $(obj)/r300.o: $(obj)/r300_reg_safe.h @@ -34,7 +47,7 @@ radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \ radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \ radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \ rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o \ - radeon_test.o + radeon_test.o r200.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 44f34f8e2b3..ee3ab62417e 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -34,6 +34,9 @@ #include #include +#include "r100_reg_safe.h" +#include "rn50_reg_safe.h" + /* Firmware Names */ #define FIRMWARE_R100 "radeon/R100_cp.bin" #define FIRMWARE_R200 "radeon/R200_cp.bin" @@ -51,11 +54,14 @@ MODULE_FIRMWARE(FIRMWARE_RS690); MODULE_FIRMWARE(FIRMWARE_RS600); MODULE_FIRMWARE(FIRMWARE_R520); +#include "r100_track.h" + /* This files gather functions specifics to: * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 * * Some of these functions might be used by newer ASICs. */ +int r200_init(struct radeon_device *rdev); void r100_hdp_reset(struct radeon_device *rdev); void r100_gpu_init(struct radeon_device *rdev); int r100_gui_wait_for_idle(struct radeon_device *rdev); @@ -1017,147 +1023,356 @@ int r100_cs_packet_next_reloc(struct radeon_cs_parser *p, return 0; } +static int r100_get_vtx_size(uint32_t vtx_fmt) +{ + int vtx_size; + vtx_size = 2; + /* ordered according to bits in spec */ + if (vtx_fmt & RADEON_SE_VTX_FMT_W0) + vtx_size++; + if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR) + vtx_size += 3; + if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA) + vtx_size++; + if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR) + vtx_size++; + if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC) + vtx_size += 3; + if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG) + vtx_size++; + if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC) + vtx_size++; + if (vtx_fmt & RADEON_SE_VTX_FMT_ST0) + vtx_size += 2; + if (vtx_fmt & RADEON_SE_VTX_FMT_ST1) + vtx_size += 2; + if (vtx_fmt & RADEON_SE_VTX_FMT_Q1) + vtx_size++; + if (vtx_fmt & RADEON_SE_VTX_FMT_ST2) + vtx_size += 2; + if (vtx_fmt & RADEON_SE_VTX_FMT_Q2) + vtx_size++; + if (vtx_fmt & RADEON_SE_VTX_FMT_ST3) + vtx_size += 2; + if (vtx_fmt & RADEON_SE_VTX_FMT_Q3) + vtx_size++; + if (vtx_fmt & RADEON_SE_VTX_FMT_Q0) + vtx_size++; + /* blend weight */ + if (vtx_fmt & (0x7 << 15)) + vtx_size += (vtx_fmt >> 15) & 0x7; + if (vtx_fmt & RADEON_SE_VTX_FMT_N0) + vtx_size += 3; + if (vtx_fmt & RADEON_SE_VTX_FMT_XY1) + vtx_size += 2; + if (vtx_fmt & RADEON_SE_VTX_FMT_Z1) + vtx_size++; + if (vtx_fmt & RADEON_SE_VTX_FMT_W1) + vtx_size++; + if (vtx_fmt & RADEON_SE_VTX_FMT_N1) + vtx_size++; + if (vtx_fmt & RADEON_SE_VTX_FMT_Z) + vtx_size++; + return vtx_size; +} + static int r100_packet0_check(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt) + struct radeon_cs_packet *pkt, + unsigned idx, unsigned reg) { struct radeon_cs_chunk *ib_chunk; struct radeon_cs_reloc *reloc; + struct r100_cs_track *track; volatile uint32_t *ib; uint32_t tmp; - unsigned reg; - unsigned i; - unsigned idx; - bool onereg; int r; + int i, face; u32 tile_flags = 0; ib = p->ib->ptr; ib_chunk = &p->chunks[p->chunk_ib_idx]; - idx = pkt->idx + 1; - reg = pkt->reg; - onereg = false; - if (CP_PACKET0_GET_ONE_REG_WR(ib_chunk->kdata[pkt->idx])) { - onereg = true; - } - for (i = 0; i <= pkt->count; i++, idx++, reg += 4) { - switch (reg) { - case RADEON_CRTC_GUI_TRIG_VLINE: - r = r100_cs_packet_parse_vline(p); - if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); - r100_cs_dump_packet(p, pkt); - return r; - } - break; + track = (struct r100_cs_track *)p->track; + + switch (reg) { + case RADEON_CRTC_GUI_TRIG_VLINE: + r = r100_cs_packet_parse_vline(p); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + break; /* FIXME: only allow PACKET3 blit? easier to check for out of * range access */ - case RADEON_DST_PITCH_OFFSET: - case RADEON_SRC_PITCH_OFFSET: - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); - r100_cs_dump_packet(p, pkt); - return r; - } - tmp = ib_chunk->kdata[idx] & 0x003fffff; - tmp += (((u32)reloc->lobj.gpu_offset) >> 10); - - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) - tile_flags |= RADEON_DST_TILE_MACRO; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { - if (reg == RADEON_SRC_PITCH_OFFSET) { - DRM_ERROR("Cannot src blit from microtiled surface\n"); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - tile_flags |= RADEON_DST_TILE_MICRO; - } + case RADEON_DST_PITCH_OFFSET: + case RADEON_SRC_PITCH_OFFSET: + r = r100_reloc_pitch_offset(p, pkt, idx, reg); + if (r) + return r; + break; + case RADEON_RB3D_DEPTHOFFSET: + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + track->zb.robj = reloc->robj; + track->zb.offset = ib_chunk->kdata[idx]; + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + break; + case RADEON_RB3D_COLOROFFSET: + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + track->cb[0].robj = reloc->robj; + track->cb[0].offset = ib_chunk->kdata[idx]; + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + break; + case RADEON_PP_TXOFFSET_0: + case RADEON_PP_TXOFFSET_1: + case RADEON_PP_TXOFFSET_2: + i = (reg - RADEON_PP_TXOFFSET_0) / 24; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->textures[i].robj = reloc->robj; + break; + case RADEON_PP_CUBIC_OFFSET_T0_0: + case RADEON_PP_CUBIC_OFFSET_T0_1: + case RADEON_PP_CUBIC_OFFSET_T0_2: + case RADEON_PP_CUBIC_OFFSET_T0_3: + case RADEON_PP_CUBIC_OFFSET_T0_4: + i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + track->textures[0].cube_info[i].offset = ib_chunk->kdata[idx]; + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->textures[0].cube_info[i].robj = reloc->robj; + break; + case RADEON_PP_CUBIC_OFFSET_T1_0: + case RADEON_PP_CUBIC_OFFSET_T1_1: + case RADEON_PP_CUBIC_OFFSET_T1_2: + case RADEON_PP_CUBIC_OFFSET_T1_3: + case RADEON_PP_CUBIC_OFFSET_T1_4: + i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + track->textures[1].cube_info[i].offset = ib_chunk->kdata[idx]; + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->textures[1].cube_info[i].robj = reloc->robj; + break; + case RADEON_PP_CUBIC_OFFSET_T2_0: + case RADEON_PP_CUBIC_OFFSET_T2_1: + case RADEON_PP_CUBIC_OFFSET_T2_2: + case RADEON_PP_CUBIC_OFFSET_T2_3: + case RADEON_PP_CUBIC_OFFSET_T2_4: + i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + track->textures[2].cube_info[i].offset = ib_chunk->kdata[idx]; + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->textures[2].cube_info[i].robj = reloc->robj; + break; + case RADEON_RE_WIDTH_HEIGHT: + track->maxy = ((ib_chunk->kdata[idx] >> 16) & 0x7FF); + break; + case RADEON_RB3D_COLORPITCH: + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } - tmp |= tile_flags; - ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp; - break; - case RADEON_RB3D_DEPTHOFFSET: - case RADEON_RB3D_COLOROFFSET: - case R300_RB3D_COLOROFFSET0: - case R300_ZB_DEPTHOFFSET: - case R200_PP_TXOFFSET_0: - case R200_PP_TXOFFSET_1: - case R200_PP_TXOFFSET_2: - case R200_PP_TXOFFSET_3: - case R200_PP_TXOFFSET_4: - case R200_PP_TXOFFSET_5: - case RADEON_PP_TXOFFSET_0: - case RADEON_PP_TXOFFSET_1: - case RADEON_PP_TXOFFSET_2: - case R300_TX_OFFSET_0: - case R300_TX_OFFSET_0+4: - case R300_TX_OFFSET_0+8: - case R300_TX_OFFSET_0+12: - case R300_TX_OFFSET_0+16: - case R300_TX_OFFSET_0+20: - case R300_TX_OFFSET_0+24: - case R300_TX_OFFSET_0+28: - case R300_TX_OFFSET_0+32: - case R300_TX_OFFSET_0+36: - case R300_TX_OFFSET_0+40: - case R300_TX_OFFSET_0+44: - case R300_TX_OFFSET_0+48: - case R300_TX_OFFSET_0+52: - case R300_TX_OFFSET_0+56: - case R300_TX_OFFSET_0+60: - /* rn50 has no 3D engine so fail on any 3d setup */ - if (ASIC_IS_RN50(p->rdev)) { - DRM_ERROR("attempt to use RN50 3D engine failed\n"); - return -EINVAL; - } - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); - break; - case R300_RB3D_COLORPITCH0: - case RADEON_RB3D_COLORPITCH: - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); - r100_cs_dump_packet(p, pkt); - return r; - } + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= RADEON_COLOR_TILE_ENABLE; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) - tile_flags |= RADEON_COLOR_TILE_ENABLE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) - tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; + tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); + tmp |= tile_flags; + ib[idx] = tmp; - tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); - tmp |= tile_flags; - ib[idx] = tmp; + track->cb[0].pitch = ib_chunk->kdata[idx] & RADEON_COLORPITCH_MASK; + break; + case RADEON_RB3D_DEPTHPITCH: + track->zb.pitch = ib_chunk->kdata[idx] & RADEON_DEPTHPITCH_MASK; + break; + case RADEON_RB3D_CNTL: + switch ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { + case 7: + case 8: + case 9: + case 11: + case 12: + track->cb[0].cpp = 1; break; - case RADEON_RB3D_ZPASS_ADDR: - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + case 3: + case 4: + case 15: + track->cb[0].cpp = 2; + break; + case 6: + track->cb[0].cpp = 4; + break; + default: + DRM_ERROR("Invalid color buffer format (%d) !\n", + ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); + return -EINVAL; + } + track->z_enabled = !!(ib_chunk->kdata[idx] & RADEON_Z_ENABLE); + break; + case RADEON_RB3D_ZSTENCILCNTL: + switch (ib_chunk->kdata[idx] & 0xf) { + case 0: + track->zb.cpp = 2; + break; + case 2: + case 3: + case 4: + case 5: + case 9: + case 11: + track->zb.cpp = 4; break; default: - /* FIXME: we don't want to allow anyothers packet */ break; } - if (onereg) { - /* FIXME: forbid onereg write to register on relocate */ + break; + case RADEON_RB3D_ZPASS_ADDR: + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + break; + case RADEON_PP_CNTL: + { + uint32_t temp = ib_chunk->kdata[idx] >> 4; + for (i = 0; i < track->num_texture; i++) + track->textures[i].enabled = !!(temp & (1 << i)); + } + break; + case RADEON_SE_VF_CNTL: + track->vap_vf_cntl = ib_chunk->kdata[idx]; + break; + case RADEON_SE_VTX_FMT: + track->vtx_size = r100_get_vtx_size(ib_chunk->kdata[idx]); + break; + case RADEON_PP_TEX_SIZE_0: + case RADEON_PP_TEX_SIZE_1: + case RADEON_PP_TEX_SIZE_2: + i = (reg - RADEON_PP_TEX_SIZE_0) / 8; + track->textures[i].width = (ib_chunk->kdata[idx] & RADEON_TEX_USIZE_MASK) + 1; + track->textures[i].height = ((ib_chunk->kdata[idx] & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; + break; + case RADEON_PP_TEX_PITCH_0: + case RADEON_PP_TEX_PITCH_1: + case RADEON_PP_TEX_PITCH_2: + i = (reg - RADEON_PP_TEX_PITCH_0) / 8; + track->textures[i].pitch = ib_chunk->kdata[idx] + 32; + break; + case RADEON_PP_TXFILTER_0: + case RADEON_PP_TXFILTER_1: + case RADEON_PP_TXFILTER_2: + i = (reg - RADEON_PP_TXFILTER_0) / 24; + track->textures[i].num_levels = ((ib_chunk->kdata[idx] & RADEON_MAX_MIP_LEVEL_MASK) + >> RADEON_MAX_MIP_LEVEL_SHIFT); + tmp = (ib_chunk->kdata[idx] >> 23) & 0x7; + if (tmp == 2 || tmp == 6) + track->textures[i].roundup_w = false; + tmp = (ib_chunk->kdata[idx] >> 27) & 0x7; + if (tmp == 2 || tmp == 6) + track->textures[i].roundup_h = false; + break; + case RADEON_PP_TXFORMAT_0: + case RADEON_PP_TXFORMAT_1: + case RADEON_PP_TXFORMAT_2: + i = (reg - RADEON_PP_TXFORMAT_0) / 24; + if (ib_chunk->kdata[idx] & RADEON_TXFORMAT_NON_POWER2) { + track->textures[i].use_pitch = 1; + } else { + track->textures[i].use_pitch = 0; + track->textures[i].width = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); + track->textures[i].height = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + } + if (ib_chunk->kdata[idx] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) + track->textures[i].tex_coord_type = 2; + switch ((ib_chunk->kdata[idx] & RADEON_TXFORMAT_FORMAT_MASK)) { + case RADEON_TXFORMAT_I8: + case RADEON_TXFORMAT_RGB332: + case RADEON_TXFORMAT_Y8: + track->textures[i].cpp = 1; + break; + case RADEON_TXFORMAT_AI88: + case RADEON_TXFORMAT_ARGB1555: + case RADEON_TXFORMAT_RGB565: + case RADEON_TXFORMAT_ARGB4444: + case RADEON_TXFORMAT_VYUY422: + case RADEON_TXFORMAT_YVYU422: + case RADEON_TXFORMAT_DXT1: + case RADEON_TXFORMAT_SHADOW16: + case RADEON_TXFORMAT_LDUDV655: + case RADEON_TXFORMAT_DUDV88: + track->textures[i].cpp = 2; break; + case RADEON_TXFORMAT_ARGB8888: + case RADEON_TXFORMAT_RGBA8888: + case RADEON_TXFORMAT_DXT23: + case RADEON_TXFORMAT_DXT45: + case RADEON_TXFORMAT_SHADOW32: + case RADEON_TXFORMAT_LDUDUV8888: + track->textures[i].cpp = 4; + break; + } + track->textures[i].cube_info[4].width = 1 << ((ib_chunk->kdata[idx] >> 16) & 0xf); + track->textures[i].cube_info[4].height = 1 << ((ib_chunk->kdata[idx] >> 20) & 0xf); + break; + case RADEON_PP_CUBIC_FACES_0: + case RADEON_PP_CUBIC_FACES_1: + case RADEON_PP_CUBIC_FACES_2: + tmp = ib_chunk->kdata[idx]; + i = (reg - RADEON_PP_CUBIC_FACES_0) / 4; + for (face = 0; face < 4; face++) { + track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); + track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); } + break; + default: + printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", + reg, idx); + return -EINVAL; } return 0; } @@ -1186,6 +1401,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, { struct radeon_cs_chunk *ib_chunk; struct radeon_cs_reloc *reloc; + struct r100_cs_track *track; unsigned idx; unsigned i, c; volatile uint32_t *ib; @@ -1194,9 +1410,11 @@ static int r100_packet3_check(struct radeon_cs_parser *p, ib = p->ib->ptr; ib_chunk = &p->chunks[p->chunk_ib_idx]; idx = pkt->idx + 1; + track = (struct r100_cs_track *)p->track; switch (pkt->opcode) { case PACKET3_3D_LOAD_VBPNTR: c = ib_chunk->kdata[idx++]; + track->num_arrays = c; for (i = 0; i < (c - 1); i += 2, idx += 3) { r = r100_cs_packet_next_reloc(p, &reloc); if (r) { @@ -1206,6 +1424,9 @@ static int r100_packet3_check(struct radeon_cs_parser *p, return r; } ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8; + track->arrays[i + 0].esize &= 0x7F; r = r100_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("No reloc for packet3 %d\n", @@ -1214,6 +1435,9 @@ static int r100_packet3_check(struct radeon_cs_parser *p, return r; } ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 1].robj = reloc->robj; + track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24; + track->arrays[i + 1].esize &= 0x7F; } if (c & 1) { r = r100_cs_packet_next_reloc(p, &reloc); @@ -1224,6 +1448,9 @@ static int r100_packet3_check(struct radeon_cs_parser *p, return r; } ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8; + track->arrays[i + 0].esize &= 0x7F; } break; case PACKET3_INDX_BUFFER: @@ -1240,7 +1467,6 @@ static int r100_packet3_check(struct radeon_cs_parser *p, } break; case 0x23: - /* FIXME: cleanup */ /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */ r = r100_cs_packet_next_reloc(p, &reloc); if (r) { @@ -1249,18 +1475,71 @@ static int r100_packet3_check(struct radeon_cs_parser *p, return r; } ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->num_arrays = 1; + track->vtx_size = r100_get_vtx_size(ib_chunk->kdata[idx+2]); + + track->arrays[0].robj = reloc->robj; + track->arrays[0].esize = track->vtx_size; + + track->max_indx = ib_chunk->kdata[idx+1]; + + track->vap_vf_cntl = ib_chunk->kdata[idx+3]; + track->immd_dwords = pkt->count - 1; + r = r100_cs_track_check(p->rdev, track); + if (r) + return r; break; case PACKET3_3D_DRAW_IMMD: + if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) { + DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); + return -EINVAL; + } + track->vap_vf_cntl = ib_chunk->kdata[idx+1]; + track->immd_dwords = pkt->count - 1; + r = r100_cs_track_check(p->rdev, track); + if (r) + return r; + break; /* triggers drawing using in-packet vertex data */ case PACKET3_3D_DRAW_IMMD_2: + if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) { + DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); + return -EINVAL; + } + track->vap_vf_cntl = ib_chunk->kdata[idx]; + track->immd_dwords = pkt->count; + r = r100_cs_track_check(p->rdev, track); + if (r) + return r; + break; /* triggers drawing using in-packet vertex data */ case PACKET3_3D_DRAW_VBUF_2: + track->vap_vf_cntl = ib_chunk->kdata[idx]; + r = r100_cs_track_check(p->rdev, track); + if (r) + return r; + break; /* triggers drawing of vertex buffers setup elsewhere */ case PACKET3_3D_DRAW_INDX_2: + track->vap_vf_cntl = ib_chunk->kdata[idx]; + r = r100_cs_track_check(p->rdev, track); + if (r) + return r; + break; /* triggers drawing using indices to vertex buffer */ case PACKET3_3D_DRAW_VBUF: + track->vap_vf_cntl = ib_chunk->kdata[idx + 1]; + r = r100_cs_track_check(p->rdev, track); + if (r) + return r; + break; /* triggers drawing of vertex buffers setup elsewhere */ case PACKET3_3D_DRAW_INDX: + track->vap_vf_cntl = ib_chunk->kdata[idx + 1]; + r = r100_cs_track_check(p->rdev, track); + if (r) + return r; + break; /* triggers drawing using indices to vertex buffer */ case PACKET3_NOP: break; @@ -1274,8 +1553,11 @@ static int r100_packet3_check(struct radeon_cs_parser *p, int r100_cs_parse(struct radeon_cs_parser *p) { struct radeon_cs_packet pkt; + struct r100_cs_track track; int r; + r100_cs_track_clear(p->rdev, &track); + p->track = &track; do { r = r100_cs_packet_parse(p, &pkt, p->idx); if (r) { @@ -1284,7 +1566,16 @@ int r100_cs_parse(struct radeon_cs_parser *p) p->idx += pkt.count + 2; switch (pkt.type) { case PACKET_TYPE0: - r = r100_packet0_check(p, &pkt); + if (p->rdev->family >= CHIP_R200) + r = r100_cs_parse_packet0(p, &pkt, + p->rdev->config.r100.reg_safe_bm, + p->rdev->config.r100.reg_safe_bm_size, + &r200_packet0_check); + else + r = r100_cs_parse_packet0(p, &pkt, + p->rdev->config.r100.reg_safe_bm, + p->rdev->config.r100.reg_safe_bm_size, + &r100_packet0_check); break; case PACKET_TYPE2: break; @@ -1683,6 +1974,15 @@ void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) int r100_init(struct radeon_device *rdev) { + if (ASIC_IS_RN50(rdev)) { + rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm; + rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm); + } else if (rdev->family < CHIP_R200) { + rdev->config.r100.reg_safe_bm = r100_reg_safe_bm; + rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm); + } else { + return r200_init(rdev); + } return 0; } @@ -2383,3 +2683,274 @@ void r100_bandwidth_update(struct radeon_device *rdev) (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); } } + +static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t) +{ + DRM_ERROR("pitch %d\n", t->pitch); + DRM_ERROR("width %d\n", t->width); + DRM_ERROR("height %d\n", t->height); + DRM_ERROR("num levels %d\n", t->num_levels); + DRM_ERROR("depth %d\n", t->txdepth); + DRM_ERROR("bpp %d\n", t->cpp); + DRM_ERROR("coordinate type %d\n", t->tex_coord_type); + DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); + DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); +} + +static int r100_cs_track_cube(struct radeon_device *rdev, + struct r100_cs_track *track, unsigned idx) +{ + unsigned face, w, h; + struct radeon_object *cube_robj; + unsigned long size; + + for (face = 0; face < 5; face++) { + cube_robj = track->textures[idx].cube_info[face].robj; + w = track->textures[idx].cube_info[face].width; + h = track->textures[idx].cube_info[face].height; + + size = w * h; + size *= track->textures[idx].cpp; + + size += track->textures[idx].cube_info[face].offset; + + if (size > radeon_object_size(cube_robj)) { + DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", + size, radeon_object_size(cube_robj)); + r100_cs_track_texture_print(&track->textures[idx]); + return -1; + } + } + return 0; +} + +static int r100_cs_track_texture_check(struct radeon_device *rdev, + struct r100_cs_track *track) +{ + struct radeon_object *robj; + unsigned long size; + unsigned u, i, w, h; + int ret; + + for (u = 0; u < track->num_texture; u++) { + if (!track->textures[u].enabled) + continue; + robj = track->textures[u].robj; + if (robj == NULL) { + DRM_ERROR("No texture bound to unit %u\n", u); + return -EINVAL; + } + size = 0; + for (i = 0; i <= track->textures[u].num_levels; i++) { + if (track->textures[u].use_pitch) { + if (rdev->family < CHIP_R300) + w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); + else + w = track->textures[u].pitch / (1 << i); + } else { + w = track->textures[u].width / (1 << i); + if (rdev->family >= CHIP_RV515) + w |= track->textures[u].width_11; + if (track->textures[u].roundup_w) + w = roundup_pow_of_two(w); + } + h = track->textures[u].height / (1 << i); + if (rdev->family >= CHIP_RV515) + h |= track->textures[u].height_11; + if (track->textures[u].roundup_h) + h = roundup_pow_of_two(h); + size += w * h; + } + size *= track->textures[u].cpp; + switch (track->textures[u].tex_coord_type) { + case 0: + break; + case 1: + size *= (1 << track->textures[u].txdepth); + break; + case 2: + if (track->separate_cube) { + ret = r100_cs_track_cube(rdev, track, u); + if (ret) + return ret; + } else + size *= 6; + break; + default: + DRM_ERROR("Invalid texture coordinate type %u for unit " + "%u\n", track->textures[u].tex_coord_type, u); + return -EINVAL; + } + if (size > radeon_object_size(robj)) { + DRM_ERROR("Texture of unit %u needs %lu bytes but is " + "%lu\n", u, size, radeon_object_size(robj)); + r100_cs_track_texture_print(&track->textures[u]); + return -EINVAL; + } + } + return 0; +} + +int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) +{ + unsigned i; + unsigned long size; + unsigned prim_walk; + unsigned nverts; + + for (i = 0; i < track->num_cb; i++) { + if (track->cb[i].robj == NULL) { + DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); + return -EINVAL; + } + size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; + size += track->cb[i].offset; + if (size > radeon_object_size(track->cb[i].robj)) { + DRM_ERROR("[drm] Buffer too small for color buffer %d " + "(need %lu have %lu) !\n", i, size, + radeon_object_size(track->cb[i].robj)); + DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", + i, track->cb[i].pitch, track->cb[i].cpp, + track->cb[i].offset, track->maxy); + return -EINVAL; + } + } + if (track->z_enabled) { + if (track->zb.robj == NULL) { + DRM_ERROR("[drm] No buffer for z buffer !\n"); + return -EINVAL; + } + size = track->zb.pitch * track->zb.cpp * track->maxy; + size += track->zb.offset; + if (size > radeon_object_size(track->zb.robj)) { + DRM_ERROR("[drm] Buffer too small for z buffer " + "(need %lu have %lu) !\n", size, + radeon_object_size(track->zb.robj)); + DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", + track->zb.pitch, track->zb.cpp, + track->zb.offset, track->maxy); + return -EINVAL; + } + } + prim_walk = (track->vap_vf_cntl >> 4) & 0x3; + nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; + switch (prim_walk) { + case 1: + for (i = 0; i < track->num_arrays; i++) { + size = track->arrays[i].esize * track->max_indx * 4; + if (track->arrays[i].robj == NULL) { + DRM_ERROR("(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); + return -EINVAL; + } + if (size > radeon_object_size(track->arrays[i].robj)) { + DRM_ERROR("(PW %u) Vertex array %u need %lu dwords " + "have %lu dwords\n", prim_walk, i, + size >> 2, + radeon_object_size(track->arrays[i].robj) >> 2); + DRM_ERROR("Max indices %u\n", track->max_indx); + return -EINVAL; + } + } + break; + case 2: + for (i = 0; i < track->num_arrays; i++) { + size = track->arrays[i].esize * (nverts - 1) * 4; + if (track->arrays[i].robj == NULL) { + DRM_ERROR("(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); + return -EINVAL; + } + if (size > radeon_object_size(track->arrays[i].robj)) { + DRM_ERROR("(PW %u) Vertex array %u need %lu dwords " + "have %lu dwords\n", prim_walk, i, size >> 2, + radeon_object_size(track->arrays[i].robj) >> 2); + return -EINVAL; + } + } + break; + case 3: + size = track->vtx_size * nverts; + if (size != track->immd_dwords) { + DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", + track->immd_dwords, size); + DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", + nverts, track->vtx_size); + return -EINVAL; + } + break; + default: + DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", + prim_walk); + return -EINVAL; + } + return r100_cs_track_texture_check(rdev, track); +} + +void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) +{ + unsigned i, face; + + if (rdev->family < CHIP_R300) { + track->num_cb = 1; + if (rdev->family <= CHIP_RS200) + track->num_texture = 3; + else + track->num_texture = 6; + track->maxy = 2048; + track->separate_cube = 1; + } else { + track->num_cb = 4; + track->num_texture = 16; + track->maxy = 4096; + track->separate_cube = 0; + } + + for (i = 0; i < track->num_cb; i++) { + track->cb[i].robj = NULL; + track->cb[i].pitch = 8192; + track->cb[i].cpp = 16; + track->cb[i].offset = 0; + } + track->z_enabled = true; + track->zb.robj = NULL; + track->zb.pitch = 8192; + track->zb.cpp = 4; + track->zb.offset = 0; + track->vtx_size = 0x7F; + track->immd_dwords = 0xFFFFFFFFUL; + track->num_arrays = 11; + track->max_indx = 0x00FFFFFFUL; + for (i = 0; i < track->num_arrays; i++) { + track->arrays[i].robj = NULL; + track->arrays[i].esize = 0x7F; + } + for (i = 0; i < track->num_texture; i++) { + track->textures[i].pitch = 16536; + track->textures[i].width = 16536; + track->textures[i].height = 16536; + track->textures[i].width_11 = 1 << 11; + track->textures[i].height_11 = 1 << 11; + track->textures[i].num_levels = 12; + if (rdev->family <= CHIP_RS200) { + track->textures[i].tex_coord_type = 0; + track->textures[i].txdepth = 0; + } else { + track->textures[i].txdepth = 16; + track->textures[i].tex_coord_type = 1; + } + track->textures[i].cpp = 64; + track->textures[i].robj = NULL; + /* CS IB emission code makes sure texture unit are disabled */ + track->textures[i].enabled = false; + track->textures[i].roundup_w = true; + track->textures[i].roundup_h = true; + if (track->separate_cube) + for (face = 0; face < 5; face++) { + track->textures[i].cube_info[face].robj = NULL; + track->textures[i].cube_info[face].width = 16536; + track->textures[i].cube_info[face].height = 16536; + track->textures[i].cube_info[face].offset = 0; + } + } +} diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h new file mode 100644 index 00000000000..70a82eda394 --- /dev/null +++ b/drivers/gpu/drm/radeon/r100_track.h @@ -0,0 +1,124 @@ + +#define R100_TRACK_MAX_TEXTURE 3 +#define R200_TRACK_MAX_TEXTURE 6 +#define R300_TRACK_MAX_TEXTURE 16 + +#define R100_MAX_CB 1 +#define R300_MAX_CB 4 + +/* + * CS functions + */ +struct r100_cs_track_cb { + struct radeon_object *robj; + unsigned pitch; + unsigned cpp; + unsigned offset; +}; + +struct r100_cs_track_array { + struct radeon_object *robj; + unsigned esize; +}; + +struct r100_cs_cube_info { + struct radeon_object *robj; + unsigned offset; + unsigned width; + unsigned height; +}; + +struct r100_cs_track_texture { + struct radeon_object *robj; + struct r100_cs_cube_info cube_info[5]; /* info for 5 non-primary faces */ + unsigned pitch; + unsigned width; + unsigned height; + unsigned num_levels; + unsigned cpp; + unsigned tex_coord_type; + unsigned txdepth; + unsigned width_11; + unsigned height_11; + bool use_pitch; + bool enabled; + bool roundup_w; + bool roundup_h; +}; + +struct r100_cs_track_limits { + unsigned num_cb; + unsigned num_texture; + unsigned max_levels; +}; + +struct r100_cs_track { + struct radeon_device *rdev; + unsigned num_cb; + unsigned num_texture; + unsigned maxy; + unsigned vtx_size; + unsigned vap_vf_cntl; + unsigned immd_dwords; + unsigned num_arrays; + unsigned max_indx; + struct r100_cs_track_array arrays[11]; + struct r100_cs_track_cb cb[R300_MAX_CB]; + struct r100_cs_track_cb zb; + struct r100_cs_track_texture textures[R300_TRACK_MAX_TEXTURE]; + bool z_enabled; + bool separate_cube; + +}; + +int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track); +void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track); +int r100_cs_packet_next_reloc(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc); +void r100_cs_dump_packet(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt); + +int r100_cs_packet_parse_vline(struct radeon_cs_parser *p); + +int r200_packet0_check(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, unsigned reg); + +static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, + unsigned reg) +{ + int r; + u32 tile_flags = 0; + u32 tmp; + struct radeon_cs_reloc *reloc; + struct radeon_cs_chunk *ib_chunk; + + ib_chunk = &p->chunks[p->chunk_ib_idx]; + + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + tmp = ib_chunk->kdata[idx] & 0x003fffff; + tmp += (((u32)reloc->lobj.gpu_offset) >> 10); + + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= RADEON_DST_TILE_MACRO; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + if (reg == RADEON_SRC_PITCH_OFFSET) { + DRM_ERROR("Cannot src blit from microtiled surface\n"); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + tile_flags |= RADEON_DST_TILE_MICRO; + } + + tmp |= tile_flags; + p->ib->ptr[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp; + return 0; +} diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c new file mode 100644 index 00000000000..568c74bfba3 --- /dev/null +++ b/drivers/gpu/drm/radeon/r200.c @@ -0,0 +1,456 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_reg.h" +#include "radeon.h" + +#include "r200_reg_safe.h" + +#include "r100_track.h" + +static int r200_get_vtx_size_0(uint32_t vtx_fmt_0) +{ + int vtx_size, i; + vtx_size = 2; + + if (vtx_fmt_0 & R200_VTX_Z0) + vtx_size++; + if (vtx_fmt_0 & R200_VTX_W0) + vtx_size++; + /* blend weight */ + if (vtx_fmt_0 & (0x7 << R200_VTX_WEIGHT_COUNT_SHIFT)) + vtx_size += (vtx_fmt_0 >> R200_VTX_WEIGHT_COUNT_SHIFT) & 0x7; + if (vtx_fmt_0 & R200_VTX_PV_MATRIX_SEL) + vtx_size++; + if (vtx_fmt_0 & R200_VTX_N0) + vtx_size += 3; + if (vtx_fmt_0 & R200_VTX_POINT_SIZE) + vtx_size++; + if (vtx_fmt_0 & R200_VTX_DISCRETE_FOG) + vtx_size++; + if (vtx_fmt_0 & R200_VTX_SHININESS_0) + vtx_size++; + if (vtx_fmt_0 & R200_VTX_SHININESS_1) + vtx_size++; + for (i = 0; i < 8; i++) { + int color_size = (vtx_fmt_0 >> (11 + 2*i)) & 0x3; + switch (color_size) { + case 0: break; + case 1: vtx_size++; break; + case 2: vtx_size += 3; break; + case 3: vtx_size += 4; break; + } + } + if (vtx_fmt_0 & R200_VTX_XY1) + vtx_size += 2; + if (vtx_fmt_0 & R200_VTX_Z1) + vtx_size++; + if (vtx_fmt_0 & R200_VTX_W1) + vtx_size++; + if (vtx_fmt_0 & R200_VTX_N1) + vtx_size += 3; + return vtx_size; +} + +static int r200_get_vtx_size_1(uint32_t vtx_fmt_1) +{ + int vtx_size, i, tex_size; + vtx_size = 0; + for (i = 0; i < 6; i++) { + tex_size = (vtx_fmt_1 >> (i * 3)) & 0x7; + if (tex_size > 4) + continue; + vtx_size += tex_size; + } + return vtx_size; +} + +int r200_packet0_check(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, unsigned reg) +{ + struct radeon_cs_chunk *ib_chunk; + struct radeon_cs_reloc *reloc; + struct r100_cs_track *track; + volatile uint32_t *ib; + uint32_t tmp; + int r; + int i; + int face; + u32 tile_flags = 0; + + ib = p->ib->ptr; + ib_chunk = &p->chunks[p->chunk_ib_idx]; + track = (struct r100_cs_track *)p->track; + + switch (reg) { + case RADEON_CRTC_GUI_TRIG_VLINE: + r = r100_cs_packet_parse_vline(p); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + break; + /* FIXME: only allow PACKET3 blit? easier to check for out of + * range access */ + case RADEON_DST_PITCH_OFFSET: + case RADEON_SRC_PITCH_OFFSET: + r = r100_reloc_pitch_offset(p, pkt, idx, reg); + if (r) + return r; + break; + case RADEON_RB3D_DEPTHOFFSET: + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + track->zb.robj = reloc->robj; + track->zb.offset = ib_chunk->kdata[idx]; + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + break; + case RADEON_RB3D_COLOROFFSET: + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + track->cb[0].robj = reloc->robj; + track->cb[0].offset = ib_chunk->kdata[idx]; + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + break; + case R200_PP_TXOFFSET_0: + case R200_PP_TXOFFSET_1: + case R200_PP_TXOFFSET_2: + case R200_PP_TXOFFSET_3: + case R200_PP_TXOFFSET_4: + case R200_PP_TXOFFSET_5: + i = (reg - R200_PP_TXOFFSET_0) / 24; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->textures[i].robj = reloc->robj; + break; + case R200_PP_CUBIC_OFFSET_F1_0: + case R200_PP_CUBIC_OFFSET_F2_0: + case R200_PP_CUBIC_OFFSET_F3_0: + case R200_PP_CUBIC_OFFSET_F4_0: + case R200_PP_CUBIC_OFFSET_F5_0: + case R200_PP_CUBIC_OFFSET_F1_1: + case R200_PP_CUBIC_OFFSET_F2_1: + case R200_PP_CUBIC_OFFSET_F3_1: + case R200_PP_CUBIC_OFFSET_F4_1: + case R200_PP_CUBIC_OFFSET_F5_1: + case R200_PP_CUBIC_OFFSET_F1_2: + case R200_PP_CUBIC_OFFSET_F2_2: + case R200_PP_CUBIC_OFFSET_F3_2: + case R200_PP_CUBIC_OFFSET_F4_2: + case R200_PP_CUBIC_OFFSET_F5_2: + case R200_PP_CUBIC_OFFSET_F1_3: + case R200_PP_CUBIC_OFFSET_F2_3: + case R200_PP_CUBIC_OFFSET_F3_3: + case R200_PP_CUBIC_OFFSET_F4_3: + case R200_PP_CUBIC_OFFSET_F5_3: + case R200_PP_CUBIC_OFFSET_F1_4: + case R200_PP_CUBIC_OFFSET_F2_4: + case R200_PP_CUBIC_OFFSET_F3_4: + case R200_PP_CUBIC_OFFSET_F4_4: + case R200_PP_CUBIC_OFFSET_F5_4: + case R200_PP_CUBIC_OFFSET_F1_5: + case R200_PP_CUBIC_OFFSET_F2_5: + case R200_PP_CUBIC_OFFSET_F3_5: + case R200_PP_CUBIC_OFFSET_F4_5: + case R200_PP_CUBIC_OFFSET_F5_5: + i = (reg - R200_PP_TXOFFSET_0) / 24; + face = (reg - ((i * 24) + R200_PP_TXOFFSET_0)) / 4; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + track->textures[i].cube_info[face - 1].offset = ib_chunk->kdata[idx]; + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->textures[i].cube_info[face - 1].robj = reloc->robj; + break; + case RADEON_RE_WIDTH_HEIGHT: + track->maxy = ((ib_chunk->kdata[idx] >> 16) & 0x7FF); + break; + case RADEON_RB3D_COLORPITCH: + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= RADEON_COLOR_TILE_ENABLE; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; + + tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); + tmp |= tile_flags; + ib[idx] = tmp; + + track->cb[0].pitch = ib_chunk->kdata[idx] & RADEON_COLORPITCH_MASK; + break; + case RADEON_RB3D_DEPTHPITCH: + track->zb.pitch = ib_chunk->kdata[idx] & RADEON_DEPTHPITCH_MASK; + break; + case RADEON_RB3D_CNTL: + switch ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { + case 7: + case 8: + case 9: + case 11: + case 12: + track->cb[0].cpp = 1; + break; + case 3: + case 4: + case 15: + track->cb[0].cpp = 2; + break; + case 6: + track->cb[0].cpp = 4; + break; + default: + DRM_ERROR("Invalid color buffer format (%d) !\n", + ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); + return -EINVAL; + } + if (ib_chunk->kdata[idx] & RADEON_DEPTHXY_OFFSET_ENABLE) { + DRM_ERROR("No support for depth xy offset in kms\n"); + return -EINVAL; + } + + track->z_enabled = !!(ib_chunk->kdata[idx] & RADEON_Z_ENABLE); + break; + case RADEON_RB3D_ZSTENCILCNTL: + switch (ib_chunk->kdata[idx] & 0xf) { + case 0: + track->zb.cpp = 2; + break; + case 2: + case 3: + case 4: + case 5: + case 9: + case 11: + track->zb.cpp = 4; + break; + default: + break; + } + break; + case RADEON_RB3D_ZPASS_ADDR: + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + break; + case RADEON_PP_CNTL: + { + uint32_t temp = ib_chunk->kdata[idx] >> 4; + for (i = 0; i < track->num_texture; i++) + track->textures[i].enabled = !!(temp & (1 << i)); + } + break; + case RADEON_SE_VF_CNTL: + track->vap_vf_cntl = ib_chunk->kdata[idx]; + break; + case 0x210c: + /* VAP_VF_MAX_VTX_INDX */ + track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL; + break; + case R200_SE_VTX_FMT_0: + track->vtx_size = r200_get_vtx_size_0(ib_chunk->kdata[idx]); + break; + case R200_SE_VTX_FMT_1: + track->vtx_size += r200_get_vtx_size_1(ib_chunk->kdata[idx]); + break; + case R200_PP_TXSIZE_0: + case R200_PP_TXSIZE_1: + case R200_PP_TXSIZE_2: + case R200_PP_TXSIZE_3: + case R200_PP_TXSIZE_4: + case R200_PP_TXSIZE_5: + i = (reg - R200_PP_TXSIZE_0) / 32; + track->textures[i].width = (ib_chunk->kdata[idx] & RADEON_TEX_USIZE_MASK) + 1; + track->textures[i].height = ((ib_chunk->kdata[idx] & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; + break; + case R200_PP_TXPITCH_0: + case R200_PP_TXPITCH_1: + case R200_PP_TXPITCH_2: + case R200_PP_TXPITCH_3: + case R200_PP_TXPITCH_4: + case R200_PP_TXPITCH_5: + i = (reg - R200_PP_TXPITCH_0) / 32; + track->textures[i].pitch = ib_chunk->kdata[idx] + 32; + break; + case R200_PP_TXFILTER_0: + case R200_PP_TXFILTER_1: + case R200_PP_TXFILTER_2: + case R200_PP_TXFILTER_3: + case R200_PP_TXFILTER_4: + case R200_PP_TXFILTER_5: + i = (reg - R200_PP_TXFILTER_0) / 32; + track->textures[i].num_levels = ((ib_chunk->kdata[idx] & R200_MAX_MIP_LEVEL_MASK) + >> R200_MAX_MIP_LEVEL_SHIFT); + tmp = (ib_chunk->kdata[idx] >> 23) & 0x7; + if (tmp == 2 || tmp == 6) + track->textures[i].roundup_w = false; + tmp = (ib_chunk->kdata[idx] >> 27) & 0x7; + if (tmp == 2 || tmp == 6) + track->textures[i].roundup_h = false; + break; + case R200_PP_TXMULTI_CTL_0: + case R200_PP_TXMULTI_CTL_1: + case R200_PP_TXMULTI_CTL_2: + case R200_PP_TXMULTI_CTL_3: + case R200_PP_TXMULTI_CTL_4: + case R200_PP_TXMULTI_CTL_5: + i = (reg - R200_PP_TXMULTI_CTL_0) / 32; + break; + case R200_PP_TXFORMAT_X_0: + case R200_PP_TXFORMAT_X_1: + case R200_PP_TXFORMAT_X_2: + case R200_PP_TXFORMAT_X_3: + case R200_PP_TXFORMAT_X_4: + case R200_PP_TXFORMAT_X_5: + i = (reg - R200_PP_TXFORMAT_X_0) / 32; + track->textures[i].txdepth = ib_chunk->kdata[idx] & 0x7; + tmp = (ib_chunk->kdata[idx] >> 16) & 0x3; + /* 2D, 3D, CUBE */ + switch (tmp) { + case 0: + case 5: + case 6: + case 7: + track->textures[i].tex_coord_type = 0; + break; + case 1: + track->textures[i].tex_coord_type = 1; + break; + case 2: + track->textures[i].tex_coord_type = 2; + break; + } + break; + case R200_PP_TXFORMAT_0: + case R200_PP_TXFORMAT_1: + case R200_PP_TXFORMAT_2: + case R200_PP_TXFORMAT_3: + case R200_PP_TXFORMAT_4: + case R200_PP_TXFORMAT_5: + i = (reg - R200_PP_TXFORMAT_0) / 32; + if (ib_chunk->kdata[idx] & R200_TXFORMAT_NON_POWER2) { + track->textures[i].use_pitch = 1; + } else { + track->textures[i].use_pitch = 0; + track->textures[i].width = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); + track->textures[i].height = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + } + switch ((ib_chunk->kdata[idx] & RADEON_TXFORMAT_FORMAT_MASK)) { + case R200_TXFORMAT_I8: + case R200_TXFORMAT_RGB332: + case R200_TXFORMAT_Y8: + track->textures[i].cpp = 1; + break; + case R200_TXFORMAT_DXT1: + case R200_TXFORMAT_AI88: + case R200_TXFORMAT_ARGB1555: + case R200_TXFORMAT_RGB565: + case R200_TXFORMAT_ARGB4444: + case R200_TXFORMAT_VYUY422: + case R200_TXFORMAT_YVYU422: + case R200_TXFORMAT_LDVDU655: + case R200_TXFORMAT_DVDU88: + case R200_TXFORMAT_AVYU4444: + track->textures[i].cpp = 2; + break; + case R200_TXFORMAT_ARGB8888: + case R200_TXFORMAT_RGBA8888: + case R200_TXFORMAT_ABGR8888: + case R200_TXFORMAT_BGR111110: + case R200_TXFORMAT_LDVDU8888: + case R200_TXFORMAT_DXT23: + case R200_TXFORMAT_DXT45: + track->textures[i].cpp = 4; + break; + } + track->textures[i].cube_info[4].width = 1 << ((ib_chunk->kdata[idx] >> 16) & 0xf); + track->textures[i].cube_info[4].height = 1 << ((ib_chunk->kdata[idx] >> 20) & 0xf); + break; + case R200_PP_CUBIC_FACES_0: + case R200_PP_CUBIC_FACES_1: + case R200_PP_CUBIC_FACES_2: + case R200_PP_CUBIC_FACES_3: + case R200_PP_CUBIC_FACES_4: + case R200_PP_CUBIC_FACES_5: + tmp = ib_chunk->kdata[idx]; + i = (reg - R200_PP_CUBIC_FACES_0) / 32; + for (face = 0; face < 4; face++) { + track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); + track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); + } + break; + default: + printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", + reg, idx); + return -EINVAL; + } + return 0; +} + +int r200_init(struct radeon_device *rdev) +{ + rdev->config.r100.reg_safe_bm = r200_reg_safe_bm; + rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r200_reg_safe_bm); + return 0; +} diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 9f2460cf9db..33a2c557eac 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -32,6 +32,7 @@ #include "radeon.h" #include "radeon_drm.h" #include "radeon_share.h" +#include "r100_track.h" #include "r300_reg_safe.h" @@ -49,14 +50,10 @@ int r100_cs_packet_parse(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, unsigned idx); int r100_cs_packet_parse_vline(struct radeon_cs_parser *p); -int r100_cs_packet_next_reloc(struct radeon_cs_parser *p, - struct radeon_cs_reloc **cs_reloc); int r100_cs_parse_packet0(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, const unsigned *auth, unsigned n, radeon_packet0_check_t check); -void r100_cs_dump_packet(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt); int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, struct radeon_object *robj); @@ -706,264 +703,13 @@ int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev) /* * CS functions */ -struct r300_cs_track_cb { - struct radeon_object *robj; - unsigned pitch; - unsigned cpp; - unsigned offset; -}; - -struct r300_cs_track_array { - struct radeon_object *robj; - unsigned esize; -}; - -struct r300_cs_track_texture { - struct radeon_object *robj; - unsigned pitch; - unsigned width; - unsigned height; - unsigned num_levels; - unsigned cpp; - unsigned tex_coord_type; - unsigned txdepth; - unsigned width_11; - unsigned height_11; - bool use_pitch; - bool enabled; - bool roundup_w; - bool roundup_h; -}; - -struct r300_cs_track { - unsigned num_cb; - unsigned maxy; - unsigned vtx_size; - unsigned vap_vf_cntl; - unsigned immd_dwords; - unsigned num_arrays; - unsigned max_indx; - struct r300_cs_track_array arrays[11]; - struct r300_cs_track_cb cb[4]; - struct r300_cs_track_cb zb; - struct r300_cs_track_texture textures[16]; - bool z_enabled; -}; - -static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t) -{ - DRM_ERROR("pitch %d\n", t->pitch); - DRM_ERROR("width %d\n", t->width); - DRM_ERROR("height %d\n", t->height); - DRM_ERROR("num levels %d\n", t->num_levels); - DRM_ERROR("depth %d\n", t->txdepth); - DRM_ERROR("bpp %d\n", t->cpp); - DRM_ERROR("coordinate type %d\n", t->tex_coord_type); - DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); - DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); -} - -static inline int r300_cs_track_texture_check(struct radeon_device *rdev, - struct r300_cs_track *track) -{ - struct radeon_object *robj; - unsigned long size; - unsigned u, i, w, h; - - for (u = 0; u < 16; u++) { - if (!track->textures[u].enabled) - continue; - robj = track->textures[u].robj; - if (robj == NULL) { - DRM_ERROR("No texture bound to unit %u\n", u); - return -EINVAL; - } - size = 0; - for (i = 0; i <= track->textures[u].num_levels; i++) { - if (track->textures[u].use_pitch) { - w = track->textures[u].pitch / (1 << i); - } else { - w = track->textures[u].width / (1 << i); - if (rdev->family >= CHIP_RV515) - w |= track->textures[u].width_11; - if (track->textures[u].roundup_w) - w = roundup_pow_of_two(w); - } - h = track->textures[u].height / (1 << i); - if (rdev->family >= CHIP_RV515) - h |= track->textures[u].height_11; - if (track->textures[u].roundup_h) - h = roundup_pow_of_two(h); - size += w * h; - } - size *= track->textures[u].cpp; - switch (track->textures[u].tex_coord_type) { - case 0: - break; - case 1: - size *= (1 << track->textures[u].txdepth); - break; - case 2: - size *= 6; - break; - default: - DRM_ERROR("Invalid texture coordinate type %u for unit " - "%u\n", track->textures[u].tex_coord_type, u); - return -EINVAL; - } - if (size > radeon_object_size(robj)) { - DRM_ERROR("Texture of unit %u needs %lu bytes but is " - "%lu\n", u, size, radeon_object_size(robj)); - r300_cs_track_texture_print(&track->textures[u]); - return -EINVAL; - } - } - return 0; -} - -int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track) -{ - unsigned i; - unsigned long size; - unsigned prim_walk; - unsigned nverts; - - for (i = 0; i < track->num_cb; i++) { - if (track->cb[i].robj == NULL) { - DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); - return -EINVAL; - } - size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; - size += track->cb[i].offset; - if (size > radeon_object_size(track->cb[i].robj)) { - DRM_ERROR("[drm] Buffer too small for color buffer %d " - "(need %lu have %lu) !\n", i, size, - radeon_object_size(track->cb[i].robj)); - DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", - i, track->cb[i].pitch, track->cb[i].cpp, - track->cb[i].offset, track->maxy); - return -EINVAL; - } - } - if (track->z_enabled) { - if (track->zb.robj == NULL) { - DRM_ERROR("[drm] No buffer for z buffer !\n"); - return -EINVAL; - } - size = track->zb.pitch * track->zb.cpp * track->maxy; - size += track->zb.offset; - if (size > radeon_object_size(track->zb.robj)) { - DRM_ERROR("[drm] Buffer too small for z buffer " - "(need %lu have %lu) !\n", size, - radeon_object_size(track->zb.robj)); - return -EINVAL; - } - } - prim_walk = (track->vap_vf_cntl >> 4) & 0x3; - nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; - switch (prim_walk) { - case 1: - for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * track->max_indx * 4; - if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); - return -EINVAL; - } - if (size > radeon_object_size(track->arrays[i].robj)) { - DRM_ERROR("(PW %u) Vertex array %u need %lu dwords " - "have %lu dwords\n", prim_walk, i, - size >> 2, - radeon_object_size(track->arrays[i].robj) >> 2); - DRM_ERROR("Max indices %u\n", track->max_indx); - return -EINVAL; - } - } - break; - case 2: - for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * (nverts - 1) * 4; - if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); - return -EINVAL; - } - if (size > radeon_object_size(track->arrays[i].robj)) { - DRM_ERROR("(PW %u) Vertex array %u need %lu dwords " - "have %lu dwords\n", prim_walk, i, size >> 2, - radeon_object_size(track->arrays[i].robj) >> 2); - return -EINVAL; - } - } - break; - case 3: - size = track->vtx_size * nverts; - if (size != track->immd_dwords) { - DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", - track->immd_dwords, size); - DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", - nverts, track->vtx_size); - return -EINVAL; - } - break; - default: - DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", - prim_walk); - return -EINVAL; - } - return r300_cs_track_texture_check(rdev, track); -} - -static inline void r300_cs_track_clear(struct r300_cs_track *track) -{ - unsigned i; - - track->num_cb = 4; - track->maxy = 4096; - for (i = 0; i < track->num_cb; i++) { - track->cb[i].robj = NULL; - track->cb[i].pitch = 8192; - track->cb[i].cpp = 16; - track->cb[i].offset = 0; - } - track->z_enabled = true; - track->zb.robj = NULL; - track->zb.pitch = 8192; - track->zb.cpp = 4; - track->zb.offset = 0; - track->vtx_size = 0x7F; - track->immd_dwords = 0xFFFFFFFFUL; - track->num_arrays = 11; - track->max_indx = 0x00FFFFFFUL; - for (i = 0; i < track->num_arrays; i++) { - track->arrays[i].robj = NULL; - track->arrays[i].esize = 0x7F; - } - for (i = 0; i < 16; i++) { - track->textures[i].pitch = 16536; - track->textures[i].width = 16536; - track->textures[i].height = 16536; - track->textures[i].width_11 = 1 << 11; - track->textures[i].height_11 = 1 << 11; - track->textures[i].num_levels = 12; - track->textures[i].txdepth = 16; - track->textures[i].cpp = 64; - track->textures[i].tex_coord_type = 1; - track->textures[i].robj = NULL; - /* CS IB emission code makes sure texture unit are disabled */ - track->textures[i].enabled = false; - track->textures[i].roundup_w = true; - track->textures[i].roundup_h = true; - } -} - static int r300_packet0_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, unsigned idx, unsigned reg) { struct radeon_cs_chunk *ib_chunk; struct radeon_cs_reloc *reloc; - struct r300_cs_track *track; + struct r100_cs_track *track; volatile uint32_t *ib; uint32_t tmp, tile_flags = 0; unsigned i; @@ -971,7 +717,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, ib = p->ib->ptr; ib_chunk = &p->chunks[p->chunk_ib_idx]; - track = (struct r300_cs_track*)p->track; + track = (struct r100_cs_track *)p->track; switch(reg) { case AVIVO_D1MODE_VLINE_START_END: case RADEON_CRTC_GUI_TRIG_VLINE: @@ -985,28 +731,9 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; case RADEON_DST_PITCH_OFFSET: case RADEON_SRC_PITCH_OFFSET: - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); - r100_cs_dump_packet(p, pkt); + r = r100_reloc_pitch_offset(p, pkt, idx, reg); + if (r) return r; - } - tmp = ib_chunk->kdata[idx] & 0x003fffff; - tmp += (((u32)reloc->lobj.gpu_offset) >> 10); - - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) - tile_flags |= RADEON_DST_TILE_MACRO; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { - if (reg == RADEON_SRC_PITCH_OFFSET) { - DRM_ERROR("Cannot src blit from microtiled surface\n"); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - tile_flags |= RADEON_DST_TILE_MICRO; - } - tmp |= tile_flags; - ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp; break; case R300_RB3D_COLOROFFSET0: case R300_RB3D_COLOROFFSET1: @@ -1215,42 +942,41 @@ static int r300_packet0_check(struct radeon_cs_parser *p, tmp = (ib_chunk->kdata[idx] >> 25) & 0x3; track->textures[i].tex_coord_type = tmp; switch ((ib_chunk->kdata[idx] & 0x1F)) { - case 0: - case 2: - case 5: - case 18: - case 20: - case 21: + case R300_TX_FORMAT_X8: + case R300_TX_FORMAT_Y4X4: + case R300_TX_FORMAT_Z3Y3X2: track->textures[i].cpp = 1; break; - case 1: - case 3: - case 6: - case 7: - case 10: - case 11: - case 19: - case 22: - case 24: + case R300_TX_FORMAT_X16: + case R300_TX_FORMAT_Y8X8: + case R300_TX_FORMAT_Z5Y6X5: + case R300_TX_FORMAT_Z6Y5X5: + case R300_TX_FORMAT_W4Z4Y4X4: + case R300_TX_FORMAT_W1Z5Y5X5: + case R300_TX_FORMAT_DXT1: + case R300_TX_FORMAT_D3DMFT_CxV8U8: + case R300_TX_FORMAT_B8G8_B8G8: + case R300_TX_FORMAT_G8R8_G8B8: track->textures[i].cpp = 2; break; - case 4: - case 8: - case 9: - case 12: - case 13: - case 23: - case 25: - case 27: - case 30: + case R300_TX_FORMAT_Y16X16: + case R300_TX_FORMAT_Z11Y11X10: + case R300_TX_FORMAT_Z10Y11X11: + case R300_TX_FORMAT_W8Z8Y8X8: + case R300_TX_FORMAT_W2Z10Y10X10: + case 0x17: + case R300_TX_FORMAT_FL_I32: + case 0x1e: + case R300_TX_FORMAT_DXT3: + case R300_TX_FORMAT_DXT5: track->textures[i].cpp = 4; break; - case 14: - case 26: - case 28: + case R300_TX_FORMAT_W16Z16Y16X16: + case R300_TX_FORMAT_FL_R16G16B16A16: + case R300_TX_FORMAT_FL_I32A32: track->textures[i].cpp = 8; break; - case 29: + case R300_TX_FORMAT_FL_R32G32B32A32: track->textures[i].cpp = 16; break; default: @@ -1278,11 +1004,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case 0x443C: /* TX_FILTER0_[0-15] */ i = (reg - 0x4400) >> 2; - tmp = ib_chunk->kdata[idx] & 0x7;; + tmp = ib_chunk->kdata[idx] & 0x7; if (tmp == 2 || tmp == 4 || tmp == 6) { track->textures[i].roundup_w = false; } - tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;; + tmp = (ib_chunk->kdata[idx] >> 3) & 0x7; if (tmp == 2 || tmp == 4 || tmp == 6) { track->textures[i].roundup_h = false; } @@ -1370,8 +1096,9 @@ static int r300_packet3_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt) { struct radeon_cs_chunk *ib_chunk; + struct radeon_cs_reloc *reloc; - struct r300_cs_track *track; + struct r100_cs_track *track; volatile uint32_t *ib; unsigned idx; unsigned i, c; @@ -1380,7 +1107,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, ib = p->ib->ptr; ib_chunk = &p->chunks[p->chunk_ib_idx]; idx = pkt->idx + 1; - track = (struct r300_cs_track*)p->track; + track = (struct r100_cs_track *)p->track; switch(pkt->opcode) { case PACKET3_3D_LOAD_VBPNTR: c = ib_chunk->kdata[idx++] & 0x1F; @@ -1447,7 +1174,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, } track->vap_vf_cntl = ib_chunk->kdata[idx+1]; track->immd_dwords = pkt->count - 1; - r = r300_cs_track_check(p->rdev, track); + r = r100_cs_track_check(p->rdev, track); if (r) { return r; } @@ -1462,35 +1189,35 @@ static int r300_packet3_check(struct radeon_cs_parser *p, } track->vap_vf_cntl = ib_chunk->kdata[idx]; track->immd_dwords = pkt->count; - r = r300_cs_track_check(p->rdev, track); + r = r100_cs_track_check(p->rdev, track); if (r) { return r; } break; case PACKET3_3D_DRAW_VBUF: track->vap_vf_cntl = ib_chunk->kdata[idx + 1]; - r = r300_cs_track_check(p->rdev, track); + r = r100_cs_track_check(p->rdev, track); if (r) { return r; } break; case PACKET3_3D_DRAW_VBUF_2: track->vap_vf_cntl = ib_chunk->kdata[idx]; - r = r300_cs_track_check(p->rdev, track); + r = r100_cs_track_check(p->rdev, track); if (r) { return r; } break; case PACKET3_3D_DRAW_INDX: track->vap_vf_cntl = ib_chunk->kdata[idx + 1]; - r = r300_cs_track_check(p->rdev, track); + r = r100_cs_track_check(p->rdev, track); if (r) { return r; } break; case PACKET3_3D_DRAW_INDX_2: track->vap_vf_cntl = ib_chunk->kdata[idx]; - r = r300_cs_track_check(p->rdev, track); + r = r100_cs_track_check(p->rdev, track); if (r) { return r; } @@ -1507,10 +1234,10 @@ static int r300_packet3_check(struct radeon_cs_parser *p, int r300_cs_parse(struct radeon_cs_parser *p) { struct radeon_cs_packet pkt; - struct r300_cs_track track; + struct r100_cs_track track; int r; - r300_cs_track_clear(&track); + r100_cs_track_clear(p->rdev, &track); p->track = &track; do { r = r100_cs_packet_parse(p, &pkt, p->idx); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 149974d13aa..6c35c3c2919 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -604,8 +604,14 @@ struct radeon_asic { void (*bandwidth_update)(struct radeon_device *rdev); }; +struct r100_asic { + const unsigned *reg_safe_bm; + unsigned reg_safe_bm_size; +}; + union radeon_asic_config { struct r300_asic r300; + struct r100_asic r100; }; diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 8ace15156c4..c9cbd8ae1f9 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -42,6 +42,7 @@ void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable); * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */ int r100_init(struct radeon_device *rdev); +int r200_init(struct radeon_device *rdev); uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg); void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); void r100_errata(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h index 4df43f62c67..404b39bf343 100644 --- a/drivers/gpu/drm/radeon/radeon_reg.h +++ b/drivers/gpu/drm/radeon/radeon_reg.h @@ -1945,6 +1945,11 @@ # define RADEON_TXFORMAT_DXT1 (12 << 0) # define RADEON_TXFORMAT_DXT23 (14 << 0) # define RADEON_TXFORMAT_DXT45 (15 << 0) +# define RADEON_TXFORMAT_SHADOW16 (16 << 0) +# define RADEON_TXFORMAT_SHADOW32 (17 << 0) +# define RADEON_TXFORMAT_DUDV88 (18 << 0) +# define RADEON_TXFORMAT_LDUDV655 (19 << 0) +# define RADEON_TXFORMAT_LDUDUV8888 (20 << 0) # define RADEON_TXFORMAT_FORMAT_MASK (31 << 0) # define RADEON_TXFORMAT_FORMAT_SHIFT 0 # define RADEON_TXFORMAT_APPLE_YUV_MODE (1 << 5) @@ -2203,7 +2208,7 @@ # define RADEON_ROP_ENABLE (1 << 6) # define RADEON_STENCIL_ENABLE (1 << 7) # define RADEON_Z_ENABLE (1 << 8) -# define RADEON_DEPTH_XZ_OFFEST_ENABLE (1 << 9) +# define RADEON_DEPTHXY_OFFSET_ENABLE (1 << 9) # define RADEON_RB3D_COLOR_FORMAT_SHIFT 10 # define RADEON_COLOR_FORMAT_ARGB1555 3 @@ -2773,7 +2778,12 @@ # define R200_TXFORMAT_DXT1 (12 << 0) # define R200_TXFORMAT_DXT23 (14 << 0) # define R200_TXFORMAT_DXT45 (15 << 0) +# define R200_TXFORMAT_DVDU88 (18 << 0) +# define R200_TXFORMAT_LDVDU655 (19 << 0) +# define R200_TXFORMAT_LDVDU8888 (20 << 0) +# define R200_TXFORMAT_GR1616 (21 << 0) # define R200_TXFORMAT_ABGR8888 (22 << 0) +# define R200_TXFORMAT_BGR111110 (23 << 0) # define R200_TXFORMAT_FORMAT_MASK (31 << 0) # define R200_TXFORMAT_FORMAT_SHIFT 0 # define R200_TXFORMAT_ALPHA_IN_MAP (1 << 6) @@ -2818,6 +2828,13 @@ #define R200_PP_TXPITCH_4 0x2c90 /* NPOT only */ #define R200_PP_TXPITCH_5 0x2cb0 /* NPOT only */ +#define R200_PP_CUBIC_FACES_0 0x2c18 +#define R200_PP_CUBIC_FACES_1 0x2c38 +#define R200_PP_CUBIC_FACES_2 0x2c58 +#define R200_PP_CUBIC_FACES_3 0x2c78 +#define R200_PP_CUBIC_FACES_4 0x2c98 +#define R200_PP_CUBIC_FACES_5 0x2cb8 + #define R200_PP_TXOFFSET_0 0x2d00 # define R200_TXO_ENDIAN_NO_SWAP (0 << 0) # define R200_TXO_ENDIAN_BYTE_SWAP (1 << 0) @@ -2829,11 +2846,44 @@ # define R200_TXO_MICRO_TILE (1 << 3) # define R200_TXO_OFFSET_MASK 0xffffffe0 # define R200_TXO_OFFSET_SHIFT 5 +#define R200_PP_CUBIC_OFFSET_F1_0 0x2d04 +#define R200_PP_CUBIC_OFFSET_F2_0 0x2d08 +#define R200_PP_CUBIC_OFFSET_F3_0 0x2d0c +#define R200_PP_CUBIC_OFFSET_F4_0 0x2d10 +#define R200_PP_CUBIC_OFFSET_F5_0 0x2d14 + #define R200_PP_TXOFFSET_1 0x2d18 +#define R200_PP_CUBIC_OFFSET_F1_1 0x2d1c +#define R200_PP_CUBIC_OFFSET_F2_1 0x2d20 +#define R200_PP_CUBIC_OFFSET_F3_1 0x2d24 +#define R200_PP_CUBIC_OFFSET_F4_1 0x2d28 +#define R200_PP_CUBIC_OFFSET_F5_1 0x2d2c + #define R200_PP_TXOFFSET_2 0x2d30 +#define R200_PP_CUBIC_OFFSET_F1_2 0x2d34 +#define R200_PP_CUBIC_OFFSET_F2_2 0x2d38 +#define R200_PP_CUBIC_OFFSET_F3_2 0x2d3c +#define R200_PP_CUBIC_OFFSET_F4_2 0x2d40 +#define R200_PP_CUBIC_OFFSET_F5_2 0x2d44 + #define R200_PP_TXOFFSET_3 0x2d48 +#define R200_PP_CUBIC_OFFSET_F1_3 0x2d4c +#define R200_PP_CUBIC_OFFSET_F2_3 0x2d50 +#define R200_PP_CUBIC_OFFSET_F3_3 0x2d54 +#define R200_PP_CUBIC_OFFSET_F4_3 0x2d58 +#define R200_PP_CUBIC_OFFSET_F5_3 0x2d5c #define R200_PP_TXOFFSET_4 0x2d60 +#define R200_PP_CUBIC_OFFSET_F1_4 0x2d64 +#define R200_PP_CUBIC_OFFSET_F2_4 0x2d68 +#define R200_PP_CUBIC_OFFSET_F3_4 0x2d6c +#define R200_PP_CUBIC_OFFSET_F4_4 0x2d70 +#define R200_PP_CUBIC_OFFSET_F5_4 0x2d74 #define R200_PP_TXOFFSET_5 0x2d78 +#define R200_PP_CUBIC_OFFSET_F1_5 0x2d7c +#define R200_PP_CUBIC_OFFSET_F2_5 0x2d80 +#define R200_PP_CUBIC_OFFSET_F3_5 0x2d84 +#define R200_PP_CUBIC_OFFSET_F4_5 0x2d88 +#define R200_PP_CUBIC_OFFSET_F5_5 0x2d8c #define R200_PP_TFACTOR_0 0x2ee0 #define R200_PP_TFACTOR_1 0x2ee4 @@ -3175,6 +3225,11 @@ # define R200_FORCE_INORDER_PROC (1<<31) #define R200_PP_CNTL_X 0x2cc4 #define R200_PP_TXMULTI_CTL_0 0x2c1c +#define R200_PP_TXMULTI_CTL_1 0x2c3c +#define R200_PP_TXMULTI_CTL_2 0x2c5c +#define R200_PP_TXMULTI_CTL_3 0x2c7c +#define R200_PP_TXMULTI_CTL_4 0x2c9c +#define R200_PP_TXMULTI_CTL_5 0x2cbc #define R200_SE_VTX_STATE_CNTL 0x2180 # define R200_UPDATE_USER_COLOR_0_ENA_MASK (1<<16) diff --git a/drivers/gpu/drm/radeon/reg_srcs/r100 b/drivers/gpu/drm/radeon/reg_srcs/r100 new file mode 100644 index 00000000000..f7ee062f118 --- /dev/null +++ b/drivers/gpu/drm/radeon/reg_srcs/r100 @@ -0,0 +1,105 @@ +r100 0x3294 +0x1434 SRC_Y_X +0x1438 DST_Y_X +0x143C DST_HEIGHT_WIDTH +0x146C DP_GUI_MASTER_CNTL +0x1474 BRUSH_Y_X +0x1478 DP_BRUSH_BKGD_CLR +0x147C DP_BRUSH_FRGD_CLR +0x1480 BRUSH_DATA0 +0x1484 BRUSH_DATA1 +0x1598 DST_WIDTH_HEIGHT +0x15C0 CLR_CMP_CNTL +0x15C4 CLR_CMP_CLR_SRC +0x15C8 CLR_CMP_CLR_DST +0x15CC CLR_CMP_MSK +0x15D8 DP_SRC_FRGD_CLR +0x15DC DP_SRC_BKGD_CLR +0x1600 DST_LINE_START +0x1604 DST_LINE_END +0x1608 DST_LINE_PATCOUNT +0x16C0 DP_CNTL +0x16CC DP_WRITE_MSK +0x16D0 DP_CNTL_XDIR_YDIR_YMAJOR +0x16E8 DEFAULT_SC_BOTTOM_RIGHT +0x16EC SC_TOP_LEFT +0x16F0 SC_BOTTOM_RIGHT +0x16F4 SRC_SC_BOTTOM_RIGHT +0x1714 DSTCACHE_CTLSTAT +0x1720 WAIT_UNTIL +0x172C RBBM_GUICNTL +0x1810 FOG_3D_TABLE_START +0x1814 FOG_3D_TABLE_END +0x1a14 FOG_TABLE_INDEX +0x1a18 FOG_TABLE_DATA +0x1c14 PP_MISC +0x1c18 PP_FOG_COLOR +0x1c1c RE_SOLID_COLOR +0x1c20 RB3D_BLENDCNTL +0x1c4c SE_CNTL +0x1c50 SE_COORD_FMT +0x1c60 PP_TXCBLEND_0 +0x1c64 PP_TXABLEND_0 +0x1c68 PP_TFACTOR_0 +0x1c78 PP_TXCBLEND_1 +0x1c7c PP_TXABLEND_1 +0x1c80 PP_TFACTOR_1 +0x1c90 PP_TXCBLEND_2 +0x1c94 PP_TXABLEND_2 +0x1c98 PP_TFACTOR_2 +0x1cc8 RE_STIPPLE_ADDR +0x1ccc RE_STIPPLE_DATA +0x1cd0 RE_LINE_PATTERN +0x1cd4 RE_LINE_STATE +0x1d40 PP_BORDER_COLOR0 +0x1d44 PP_BORDER_COLOR1 +0x1d48 PP_BORDER_COLOR2 +0x1d7c RB3D_STENCILREFMASK +0x1d80 RB3D_ROPCNTL +0x1d84 RB3D_PLANEMASK +0x1d98 VAP_VPORT_XSCALE +0x1d9C VAP_VPORT_XOFFSET +0x1da0 VAP_VPORT_YSCALE +0x1da4 VAP_VPORT_YOFFSET +0x1da8 VAP_VPORT_ZSCALE +0x1dac VAP_VPORT_ZOFFSET +0x1db0 SE_ZBIAS_FACTOR +0x1db4 SE_ZBIAS_CONSTANT +0x1db8 SE_LINE_WIDTH +0x2140 SE_CNTL_STATUS +0x2200 SE_TCL_VECTOR_INDX_REG +0x2204 SE_TCL_VECTOR_DATA_REG +0x2208 SE_TCL_SCALAR_INDX_REG +0x220c SE_TCL_SCALAR_DATA_REG +0x2210 SE_TCL_MATERIAL_EMISSIVE_RED +0x2214 SE_TCL_MATERIAL_EMISSIVE_GREEN +0x2218 SE_TCL_MATERIAL_EMISSIVE_BLUE +0x221c SE_TCL_MATERIAL_EMISSIVE_ALPHA +0x2220 SE_TCL_MATERIAL_AMBIENT_RED +0x2224 SE_TCL_MATERIAL_AMBIENT_GREEN +0x2228 SE_TCL_MATERIAL_AMBIENT_BLUE +0x222c SE_TCL_MATERIAL_AMBIENT_ALPHA +0x2230 SE_TCL_MATERIAL_DIFFUSE_RED +0x2234 SE_TCL_MATERIAL_DIFFUSE_GREEN +0x2238 SE_TCL_MATERIAL_DIFFUSE_BLUE +0x223c SE_TCL_MATERIAL_DIFFUSE_ALPHA +0x2240 SE_TCL_MATERIAL_SPECULAR_RED +0x2244 SE_TCL_MATERIAL_SPECULAR_GREEN +0x2248 SE_TCL_MATERIAL_SPECULAR_BLUE +0x224c SE_TCL_MATERIAL_SPECULAR_ALPHA +0x2250 SE_TCL_SHININESS +0x2254 SE_TCL_OUTPUT_VTX_FMT +0x2258 SE_TCL_OUTPUT_VTX_SEL +0x225c SE_TCL_MATRIX_SELECT_0 +0x2260 SE_TCL_MATRIX_SELECT_1 +0x2264 SE_TCL_UCP_VERT_BLEND_CNTL +0x2268 SE_TCL_TEXTURE_PROC_CTL +0x226c SE_TCL_LIGHT_MODEL_CTL +0x2270 SE_TCL_PER_LIGHT_CTL_0 +0x2274 SE_TCL_PER_LIGHT_CTL_1 +0x2278 SE_TCL_PER_LIGHT_CTL_2 +0x227c SE_TCL_PER_LIGHT_CTL_3 +0x2284 SE_TCL_STATE_FLUSH +0x26c0 RE_TOP_LEFT +0x26c4 RE_MISC +0x3290 RB3D_ZPASS_DATA diff --git a/drivers/gpu/drm/radeon/reg_srcs/r200 b/drivers/gpu/drm/radeon/reg_srcs/r200 new file mode 100644 index 00000000000..6021c8849a1 --- /dev/null +++ b/drivers/gpu/drm/radeon/reg_srcs/r200 @@ -0,0 +1,184 @@ +r200 0x3294 +0x1434 SRC_Y_X +0x1438 DST_Y_X +0x143C DST_HEIGHT_WIDTH +0x146C DP_GUI_MASTER_CNTL +0x1474 BRUSH_Y_X +0x1478 DP_BRUSH_BKGD_CLR +0x147C DP_BRUSH_FRGD_CLR +0x1480 BRUSH_DATA0 +0x1484 BRUSH_DATA1 +0x1598 DST_WIDTH_HEIGHT +0x15C0 CLR_CMP_CNTL +0x15C4 CLR_CMP_CLR_SRC +0x15C8 CLR_CMP_CLR_DST +0x15CC CLR_CMP_MSK +0x15D8 DP_SRC_FRGD_CLR +0x15DC DP_SRC_BKGD_CLR +0x1600 DST_LINE_START +0x1604 DST_LINE_END +0x1608 DST_LINE_PATCOUNT +0x16C0 DP_CNTL +0x16CC DP_WRITE_MSK +0x16D0 DP_CNTL_XDIR_YDIR_YMAJOR +0x16E8 DEFAULT_SC_BOTTOM_RIGHT +0x16EC SC_TOP_LEFT +0x16F0 SC_BOTTOM_RIGHT +0x16F4 SRC_SC_BOTTOM_RIGHT +0x1714 DSTCACHE_CTLSTAT +0x1720 WAIT_UNTIL +0x172C RBBM_GUICNTL +0x1c14 PP_MISC +0x1c18 PP_FOG_COLOR +0x1c1c RE_SOLID_COLOR +0x1c20 RB3D_BLENDCNTL +0x1c4c SE_CNTL +0x1c50 RE_CNTL +0x1cc8 RE_STIPPLE_ADDR +0x1ccc RE_STIPPLE_DATA +0x1cd0 RE_LINE_PATTERN +0x1cd4 RE_LINE_STATE +0x1cd8 RE_SCISSOR_TL_0 +0x1cdc RE_SCISSOR_BR_0 +0x1ce0 RE_SCISSOR_TL_1 +0x1ce4 RE_SCISSOR_BR_1 +0x1ce8 RE_SCISSOR_TL_2 +0x1cec RE_SCISSOR_BR_2 +0x1d60 RB3D_DEPTHXY_OFFSET +0x1d7c RB3D_STENCILREFMASK +0x1d80 RB3D_ROPCNTL +0x1d84 RB3D_PLANEMASK +0x1d98 VAP_VPORT_XSCALE +0x1d9c VAP_VPORT_XOFFSET +0x1da0 VAP_VPORT_YSCALE +0x1da4 VAP_VPORT_YOFFSET +0x1da8 VAP_VPORT_ZSCALE +0x1dac VAP_VPORT_ZOFFSET +0x1db0 SE_ZBIAS_FACTOR +0x1db4 SE_ZBIAS_CONSTANT +0x1db8 SE_LINE_WIDTH +0x2080 SE_VAP_CNTL +0x2090 SE_TCL_OUTPUT_VTX_FMT_0 +0x2094 SE_TCL_OUTPUT_VTX_FMT_1 +0x20b0 SE_VTE_CNTL +0x2140 SE_CNTL_STATUS +0x2180 SE_VTX_STATE_CNTL +0x2200 SE_TCL_VECTOR_INDX_REG +0x2204 SE_TCL_VECTOR_DATA_REG +0x2208 SE_TCL_SCALAR_INDX_REG +0x220c SE_TCL_SCALAR_DATA_REG +0x2230 SE_TCL_MATRIX_SEL_0 +0x2234 SE_TCL_MATRIX_SEL_1 +0x2238 SE_TCL_MATRIX_SEL_2 +0x223c SE_TCL_MATRIX_SEL_3 +0x2240 SE_TCL_MATRIX_SEL_4 +0x2250 SE_TCL_OUTPUT_VTX_COMP_SEL +0x2254 SE_TCL_INPUT_VTX_VECTOR_ADDR_0 +0x2258 SE_TCL_INPUT_VTX_VECTOR_ADDR_1 +0x225c SE_TCL_INPUT_VTX_VECTOR_ADDR_2 +0x2260 SE_TCL_INPUT_VTX_VECTOR_ADDR_3 +0x2268 SE_TCL_LIGHT_MODEL_CTL_0 +0x226c SE_TCL_LIGHT_MODEL_CTL_1 +0x2270 SE_TCL_PER_LIGHT_CTL_0 +0x2274 SE_TCL_PER_LIGHT_CTL_1 +0x2278 SE_TCL_PER_LIGHT_CTL_2 +0x227c SE_TCL_PER_LIGHT_CTL_3 +0x2284 VAP_PVS_STATE_FLUSH_REG +0x22a8 SE_TCL_TEX_PROC_CTL_2 +0x22ac SE_TCL_TEX_PROC_CTL_3 +0x22b0 SE_TCL_TEX_PROC_CTL_0 +0x22b4 SE_TCL_TEX_PROC_CTL_1 +0x22b8 SE_TCL_TEX_CYL_WRAP_CTL +0x22c0 SE_TCL_UCP_VERT_BLEND_CNTL +0x22c4 SE_TCL_POINT_SPRITE_CNTL +0x2648 RE_POINTSIZE +0x26c0 RE_TOP_LEFT +0x26c4 RE_MISC +0x26f0 RE_AUX_SCISSOR_CNTL +0x2c14 PP_BORDER_COLOR_0 +0x2c34 PP_BORDER_COLOR_1 +0x2c54 PP_BORDER_COLOR_2 +0x2c74 PP_BORDER_COLOR_3 +0x2c94 PP_BORDER_COLOR_4 +0x2cb4 PP_BORDER_COLOR_5 +0x2cc4 PP_CNTL_X +0x2cf8 PP_TRI_PERF +0x2cfc PP_PERF_CNTL +0x2d9c PP_TAM_DEBUG3 +0x2ee0 PP_TFACTOR_0 +0x2ee4 PP_TFACTOR_1 +0x2ee8 PP_TFACTOR_2 +0x2eec PP_TFACTOR_3 +0x2ef0 PP_TFACTOR_4 +0x2ef4 PP_TFACTOR_5 +0x2ef8 PP_TFACTOR_6 +0x2efc PP_TFACTOR_7 +0x2f00 PP_TXCBLEND_0 +0x2f04 PP_TXCBLEND2_0 +0x2f08 PP_TXABLEND_0 +0x2f0c PP_TXABLEND2_0 +0x2f10 PP_TXCBLEND_1 +0x2f14 PP_TXCBLEND2_1 +0x2f18 PP_TXABLEND_1 +0x2f1c PP_TXABLEND2_1 +0x2f20 PP_TXCBLEND_2 +0x2f24 PP_TXCBLEND2_2 +0x2f28 PP_TXABLEND_2 +0x2f2c PP_TXABLEND2_2 +0x2f30 PP_TXCBLEND_3 +0x2f34 PP_TXCBLEND2_3 +0x2f38 PP_TXABLEND_3 +0x2f3c PP_TXABLEND2_3 +0x2f40 PP_TXCBLEND_4 +0x2f44 PP_TXCBLEND2_4 +0x2f48 PP_TXABLEND_4 +0x2f4c PP_TXABLEND2_4 +0x2f50 PP_TXCBLEND_5 +0x2f54 PP_TXCBLEND2_5 +0x2f58 PP_TXABLEND_5 +0x2f5c PP_TXABLEND2_5 +0x2f60 PP_TXCBLEND_6 +0x2f64 PP_TXCBLEND2_6 +0x2f68 PP_TXABLEND_6 +0x2f6c PP_TXABLEND2_6 +0x2f70 PP_TXCBLEND_7 +0x2f74 PP_TXCBLEND2_7 +0x2f78 PP_TXABLEND_7 +0x2f7c PP_TXABLEND2_7 +0x2f80 PP_TXCBLEND_8 +0x2f84 PP_TXCBLEND2_8 +0x2f88 PP_TXABLEND_8 +0x2f8c PP_TXABLEND2_8 +0x2f90 PP_TXCBLEND_9 +0x2f94 PP_TXCBLEND2_9 +0x2f98 PP_TXABLEND_9 +0x2f9c PP_TXABLEND2_9 +0x2fa0 PP_TXCBLEND_10 +0x2fa4 PP_TXCBLEND2_10 +0x2fa8 PP_TXABLEND_10 +0x2fac PP_TXABLEND2_10 +0x2fb0 PP_TXCBLEND_11 +0x2fb4 PP_TXCBLEND2_11 +0x2fb8 PP_TXABLEND_11 +0x2fbc PP_TXABLEND2_11 +0x2fc0 PP_TXCBLEND_12 +0x2fc4 PP_TXCBLEND2_12 +0x2fc8 PP_TXABLEND_12 +0x2fcc PP_TXABLEND2_12 +0x2fd0 PP_TXCBLEND_13 +0x2fd4 PP_TXCBLEND2_13 +0x2fd8 PP_TXABLEND_13 +0x2fdc PP_TXABLEND2_13 +0x2fe0 PP_TXCBLEND_14 +0x2fe4 PP_TXCBLEND2_14 +0x2fe8 PP_TXABLEND_14 +0x2fec PP_TXABLEND2_14 +0x2ff0 PP_TXCBLEND_15 +0x2ff4 PP_TXCBLEND2_15 +0x2ff8 PP_TXABLEND_15 +0x2ffc PP_TXABLEND2_15 +0x3218 RB3D_BLENCOLOR +0x321c RB3D_ABLENDCNTL +0x3220 RB3D_CBLENDCNTL +0x3290 RB3D_ZPASS_DATA + diff --git a/drivers/gpu/drm/radeon/reg_srcs/rn50 b/drivers/gpu/drm/radeon/reg_srcs/rn50 new file mode 100644 index 00000000000..2687b630726 --- /dev/null +++ b/drivers/gpu/drm/radeon/reg_srcs/rn50 @@ -0,0 +1,30 @@ +rn50 0x3294 +0x1434 SRC_Y_X +0x1438 DST_Y_X +0x143C DST_HEIGHT_WIDTH +0x146C DP_GUI_MASTER_CNTL +0x1474 BRUSH_Y_X +0x1478 DP_BRUSH_BKGD_CLR +0x147C DP_BRUSH_FRGD_CLR +0x1480 BRUSH_DATA0 +0x1484 BRUSH_DATA1 +0x1598 DST_WIDTH_HEIGHT +0x15C0 CLR_CMP_CNTL +0x15C4 CLR_CMP_CLR_SRC +0x15C8 CLR_CMP_CLR_DST +0x15CC CLR_CMP_MSK +0x15D8 DP_SRC_FRGD_CLR +0x15DC DP_SRC_BKGD_CLR +0x1600 DST_LINE_START +0x1604 DST_LINE_END +0x1608 DST_LINE_PATCOUNT +0x16C0 DP_CNTL +0x16CC DP_WRITE_MSK +0x16D0 DP_CNTL_XDIR_YDIR_YMAJOR +0x16E8 DEFAULT_SC_BOTTOM_RIGHT +0x16EC SC_TOP_LEFT +0x16F0 SC_BOTTOM_RIGHT +0x16F4 SRC_SC_BOTTOM_RIGHT +0x1714 DSTCACHE_CTLSTAT +0x1720 WAIT_UNTIL +0x172C RBBM_GUICNTL -- cgit v1.2.3-70-g09d2 From 3ce0a23d2d253185df24e22e3d5f89800bb3dd1c Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 8 Sep 2009 10:10:24 +1000 Subject: drm/radeon/kms: add r600 KMS support This adds the r600 KMS + CS support to the Linux kernel. The r600 TTM support is quite basic and still needs more work esp around using interrupts, but the polled fencing should work okay for now. Also currently TTM is using memcpy to do VRAM moves, the code is here to use a 3D blit to do this, but isn't fully debugged yet. Authors: Alex Deucher Dave Airlie Jerome Glisse Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/Makefile | 5 +- drivers/gpu/drm/radeon/atombios_crtc.c | 1 + drivers/gpu/drm/radeon/avivod.h | 60 + drivers/gpu/drm/radeon/r100.c | 135 ++- drivers/gpu/drm/radeon/r100d.h | 76 ++ drivers/gpu/drm/radeon/r300.c | 3 +- drivers/gpu/drm/radeon/r300.h | 36 - drivers/gpu/drm/radeon/r300d.h | 76 ++ drivers/gpu/drm/radeon/r600.c | 1714 ++++++++++++++++++++++++++-- drivers/gpu/drm/radeon/r600_blit.c | 855 ++++++++++++++ drivers/gpu/drm/radeon/r600_blit_kms.c | 777 +++++++++++++ drivers/gpu/drm/radeon/r600_blit_shaders.c | 1072 +++++++++++++++++ drivers/gpu/drm/radeon/r600_blit_shaders.h | 14 + drivers/gpu/drm/radeon/r600_cp.c | 252 +++- drivers/gpu/drm/radeon/r600_cs.c | 658 +++++++++++ drivers/gpu/drm/radeon/r600d.h | 661 +++++++++++ drivers/gpu/drm/radeon/radeon.h | 120 +- drivers/gpu/drm/radeon/radeon_asic.h | 156 ++- drivers/gpu/drm/radeon/radeon_atombios.c | 4 - drivers/gpu/drm/radeon/radeon_clocks.c | 10 +- drivers/gpu/drm/radeon/radeon_device.c | 340 +++--- drivers/gpu/drm/radeon/radeon_drv.h | 141 ++- drivers/gpu/drm/radeon/radeon_fence.c | 54 +- drivers/gpu/drm/radeon/radeon_reg.h | 18 + drivers/gpu/drm/radeon/radeon_ring.c | 119 +- drivers/gpu/drm/radeon/radeon_share.h | 77 ++ drivers/gpu/drm/radeon/radeon_state.c | 18 +- drivers/gpu/drm/radeon/radeon_ttm.c | 7 +- drivers/gpu/drm/radeon/rs400.c | 2 +- drivers/gpu/drm/radeon/rs780.c | 102 -- drivers/gpu/drm/radeon/rv515.c | 2 +- drivers/gpu/drm/radeon/rv515d.h | 220 ++++ drivers/gpu/drm/radeon/rv515r.h | 170 --- drivers/gpu/drm/radeon/rv770.c | 987 +++++++++++++++- drivers/gpu/drm/radeon/rv770d.h | 341 ++++++ 35 files changed, 8483 insertions(+), 800 deletions(-) create mode 100644 drivers/gpu/drm/radeon/avivod.h create mode 100644 drivers/gpu/drm/radeon/r100d.h delete mode 100644 drivers/gpu/drm/radeon/r300.h create mode 100644 drivers/gpu/drm/radeon/r300d.h create mode 100644 drivers/gpu/drm/radeon/r600_blit.c create mode 100644 drivers/gpu/drm/radeon/r600_blit_kms.c create mode 100644 drivers/gpu/drm/radeon/r600_blit_shaders.c create mode 100644 drivers/gpu/drm/radeon/r600_blit_shaders.h create mode 100644 drivers/gpu/drm/radeon/r600_cs.c create mode 100644 drivers/gpu/drm/radeon/r600d.h delete mode 100644 drivers/gpu/drm/radeon/rs780.c create mode 100644 drivers/gpu/drm/radeon/rv515d.h delete mode 100644 drivers/gpu/drm/radeon/rv515r.h create mode 100644 drivers/gpu/drm/radeon/rv770d.h (limited to 'drivers/gpu/drm/radeon/r300.c') diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index c5db0c4fe78..14c3fe69272 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -46,8 +46,9 @@ radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \ radeon_encoders.o radeon_display.o radeon_cursor.o radeon_i2c.o \ radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \ radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \ - rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o \ - radeon_test.o r200.o radeon_legacy_tv.o + rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \ + r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \ + r600_blit_kms.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 8e31e992ec5..a7edd0f2ac3 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -389,6 +389,7 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode) pll_flags |= RADEON_PLL_USE_REF_DIV; } radeon_encoder = to_radeon_encoder(encoder); + break; } } diff --git a/drivers/gpu/drm/radeon/avivod.h b/drivers/gpu/drm/radeon/avivod.h new file mode 100644 index 00000000000..d4e6e6e4a93 --- /dev/null +++ b/drivers/gpu/drm/radeon/avivod.h @@ -0,0 +1,60 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2009 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef AVIVOD_H +#define AVIVOD_H + + +#define D1CRTC_CONTROL 0x6080 +#define CRTC_EN (1 << 0) +#define D1CRTC_UPDATE_LOCK 0x60E8 +#define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110 +#define D1GRPH_SECONDARY_SURFACE_ADDRESS 0x6118 + +#define D2CRTC_CONTROL 0x6880 +#define D2CRTC_UPDATE_LOCK 0x68E8 +#define D2GRPH_PRIMARY_SURFACE_ADDRESS 0x6910 +#define D2GRPH_SECONDARY_SURFACE_ADDRESS 0x6918 + +#define D1VGA_CONTROL 0x0330 +#define DVGA_CONTROL_MODE_ENABLE (1 << 0) +#define DVGA_CONTROL_TIMING_SELECT (1 << 8) +#define DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9) +#define DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10) +#define DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16) +#define DVGA_CONTROL_ROTATE (1 << 24) +#define D2VGA_CONTROL 0x0338 + +#define VGA_HDP_CONTROL 0x328 +#define VGA_MEM_PAGE_SELECT_EN (1 << 0) +#define VGA_MEMORY_DISABLE (1 << 4) +#define VGA_RBBM_LOCK_DISABLE (1 << 8) +#define VGA_SOFT_RESET (1 << 16) +#define VGA_MEMORY_BASE_ADDRESS 0x0310 +#define VGA_RENDER_CONTROL 0x0300 +#define VGA_VSTATUS_CNTL_MASK 0x00030000 + +#endif diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index ee3ab62417e..5708c07ce73 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -31,6 +31,8 @@ #include "radeon_drm.h" #include "radeon_reg.h" #include "radeon.h" +#include "r100d.h" + #include #include @@ -391,9 +393,9 @@ int r100_wb_init(struct radeon_device *rdev) return r; } } - WREG32(0x774, rdev->wb.gpu_addr); - WREG32(0x70C, rdev->wb.gpu_addr + 1024); - WREG32(0x770, 0xff); + WREG32(RADEON_SCRATCH_ADDR, rdev->wb.gpu_addr); + WREG32(RADEON_CP_RB_RPTR_ADDR, rdev->wb.gpu_addr + 1024); + WREG32(RADEON_SCRATCH_UMSK, 0xff); return 0; } @@ -559,18 +561,18 @@ static int r100_cp_init_microcode(struct radeon_device *rdev) fw_name = FIRMWARE_R520; } - err = request_firmware(&rdev->fw, fw_name, &pdev->dev); + err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev); platform_device_unregister(pdev); if (err) { printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n", fw_name); - } else if (rdev->fw->size % 8) { + } else if (rdev->me_fw->size % 8) { printk(KERN_ERR "radeon_cp: Bogus length %zu in firmware \"%s\"\n", - rdev->fw->size, fw_name); + rdev->me_fw->size, fw_name); err = -EINVAL; - release_firmware(rdev->fw); - rdev->fw = NULL; + release_firmware(rdev->me_fw); + rdev->me_fw = NULL; } return err; } @@ -584,9 +586,9 @@ static void r100_cp_load_microcode(struct radeon_device *rdev) "programming pipes. Bad things might happen.\n"); } - if (rdev->fw) { - size = rdev->fw->size / 4; - fw_data = (const __be32 *)&rdev->fw->data[0]; + if (rdev->me_fw) { + size = rdev->me_fw->size / 4; + fw_data = (const __be32 *)&rdev->me_fw->data[0]; WREG32(RADEON_CP_ME_RAM_ADDR, 0); for (i = 0; i < size; i += 2) { WREG32(RADEON_CP_ME_RAM_DATAH, @@ -632,7 +634,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) DRM_INFO("radeon: cp idle (0x%08X)\n", tmp); } - if (!rdev->fw) { + if (!rdev->me_fw) { r = r100_cp_init_microcode(rdev); if (r) { DRM_ERROR("Failed to load firmware!\n"); @@ -765,6 +767,12 @@ int r100_cp_reset(struct radeon_device *rdev) return -1; } +void r100_cp_commit(struct radeon_device *rdev) +{ + WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); + (void)RREG32(RADEON_CP_RB_WPTR); +} + /* * CS functions @@ -2954,3 +2962,106 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track } } } + +int r100_ring_test(struct radeon_device *rdev) +{ + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ring_lock(rdev, 2); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + radeon_scratch_free(rdev, scratch); + return r; + } + radeon_ring_write(rdev, PACKET0(scratch, 0)); + radeon_ring_write(rdev, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) { + break; + } + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ring test succeeded in %d usecs\n", i); + } else { + DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + return r; +} + +void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1)); + radeon_ring_write(rdev, ib->gpu_addr); + radeon_ring_write(rdev, ib->length_dw); +} + +int r100_ib_test(struct radeon_device *rdev) +{ + struct radeon_ib *ib; + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ib_get(rdev, &ib); + if (r) { + return r; + } + ib->ptr[0] = PACKET0(scratch, 0); + ib->ptr[1] = 0xDEADBEEF; + ib->ptr[2] = PACKET2(0); + ib->ptr[3] = PACKET2(0); + ib->ptr[4] = PACKET2(0); + ib->ptr[5] = PACKET2(0); + ib->ptr[6] = PACKET2(0); + ib->ptr[7] = PACKET2(0); + ib->length_dw = 8; + r = radeon_ib_schedule(rdev, ib); + if (r) { + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); + return r; + } + r = radeon_fence_wait(ib->fence, false); + if (r) { + return r; + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) { + break; + } + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ib test succeeded in %u usecs\n", i); + } else { + DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); + return r; +} diff --git a/drivers/gpu/drm/radeon/r100d.h b/drivers/gpu/drm/radeon/r100d.h new file mode 100644 index 00000000000..6da7d92c321 --- /dev/null +++ b/drivers/gpu/drm/radeon/r100d.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef __R100D_H__ +#define __R100D_H__ + +#define CP_PACKET0 0x00000000 +#define PACKET0_BASE_INDEX_SHIFT 0 +#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) +#define PACKET0_COUNT_SHIFT 16 +#define PACKET0_COUNT_MASK (0x3fff << 16) +#define CP_PACKET1 0x40000000 +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) +#define CP_PACKET3 0xC0000000 +#define PACKET3_IT_OPCODE_SHIFT 8 +#define PACKET3_IT_OPCODE_MASK (0xff << 8) +#define PACKET3_COUNT_SHIFT 16 +#define PACKET3_COUNT_MASK (0x3fff << 16) +/* PACKET3 op code */ +#define PACKET3_NOP 0x10 +#define PACKET3_3D_DRAW_VBUF 0x28 +#define PACKET3_3D_DRAW_IMMD 0x29 +#define PACKET3_3D_DRAW_INDX 0x2A +#define PACKET3_3D_LOAD_VBPNTR 0x2F +#define PACKET3_INDX_BUFFER 0x33 +#define PACKET3_3D_DRAW_VBUF_2 0x34 +#define PACKET3_3D_DRAW_IMMD_2 0x35 +#define PACKET3_3D_DRAW_INDX_2 0x36 +#define PACKET3_BITBLT_MULTI 0x9B + +#define PACKET0(reg, n) (CP_PACKET0 | \ + REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ + REG_SET(PACKET0_COUNT, (n))) +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) +#define PACKET3(op, n) (CP_PACKET3 | \ + REG_SET(PACKET3_IT_OPCODE, (op)) | \ + REG_SET(PACKET3_COUNT, (n))) + +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) +#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) + +#endif diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 33a2c557eac..a5f82f7beed 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -33,6 +33,7 @@ #include "radeon_drm.h" #include "radeon_share.h" #include "r100_track.h" +#include "r300d.h" #include "r300_reg_safe.h" @@ -127,7 +128,7 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev) WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp); rv370_pcie_gart_tlb_flush(rdev); DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n", - rdev->mc.gtt_size >> 20, table_addr); + (unsigned)(rdev->mc.gtt_size >> 20), table_addr); rdev->gart.ready = true; return 0; } diff --git a/drivers/gpu/drm/radeon/r300.h b/drivers/gpu/drm/radeon/r300.h deleted file mode 100644 index 8486b4da9d6..00000000000 --- a/drivers/gpu/drm/radeon/r300.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2008 Advanced Micro Devices, Inc. - * Copyright 2008 Red Hat Inc. - * Copyright 2009 Jerome Glisse. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Dave Airlie - * Alex Deucher - * Jerome Glisse - */ -#ifndef R300_H -#define R300_H - -struct r300_asic { - const unsigned *reg_safe_bm; - unsigned reg_safe_bm_size; -}; - -#endif diff --git a/drivers/gpu/drm/radeon/r300d.h b/drivers/gpu/drm/radeon/r300d.h new file mode 100644 index 00000000000..63ec076f2cd --- /dev/null +++ b/drivers/gpu/drm/radeon/r300d.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef __R300D_H__ +#define __R300D_H__ + +#define CP_PACKET0 0x00000000 +#define PACKET0_BASE_INDEX_SHIFT 0 +#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) +#define PACKET0_COUNT_SHIFT 16 +#define PACKET0_COUNT_MASK (0x3fff << 16) +#define CP_PACKET1 0x40000000 +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) +#define CP_PACKET3 0xC0000000 +#define PACKET3_IT_OPCODE_SHIFT 8 +#define PACKET3_IT_OPCODE_MASK (0xff << 8) +#define PACKET3_COUNT_SHIFT 16 +#define PACKET3_COUNT_MASK (0x3fff << 16) +/* PACKET3 op code */ +#define PACKET3_NOP 0x10 +#define PACKET3_3D_DRAW_VBUF 0x28 +#define PACKET3_3D_DRAW_IMMD 0x29 +#define PACKET3_3D_DRAW_INDX 0x2A +#define PACKET3_3D_LOAD_VBPNTR 0x2F +#define PACKET3_INDX_BUFFER 0x33 +#define PACKET3_3D_DRAW_VBUF_2 0x34 +#define PACKET3_3D_DRAW_IMMD_2 0x35 +#define PACKET3_3D_DRAW_INDX_2 0x36 +#define PACKET3_BITBLT_MULTI 0x9B + +#define PACKET0(reg, n) (CP_PACKET0 | \ + REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ + REG_SET(PACKET0_COUNT, (n))) +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) +#define PACKET3(op, n) (CP_PACKET3 | \ + REG_SET(PACKET3_IT_OPCODE, (op)) | \ + REG_SET(PACKET3_COUNT, (n))) + +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) +#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) + +#endif diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 538cd907df6..d8fcef44a69 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -25,12 +25,46 @@ * Alex Deucher * Jerome Glisse */ +#include +#include +#include #include "drmP.h" -#include "radeon_reg.h" +#include "radeon_drm.h" #include "radeon.h" +#include "radeon_mode.h" +#include "radeon_share.h" +#include "r600d.h" +#include "avivod.h" +#include "atom.h" -/* r600,rv610,rv630,rv620,rv635,rv670 depends on : */ -void rs600_mc_disable_clients(struct radeon_device *rdev); +#define PFP_UCODE_SIZE 576 +#define PM4_UCODE_SIZE 1792 +#define R700_PFP_UCODE_SIZE 848 +#define R700_PM4_UCODE_SIZE 1360 + +/* Firmware Names */ +MODULE_FIRMWARE("radeon/R600_pfp.bin"); +MODULE_FIRMWARE("radeon/R600_me.bin"); +MODULE_FIRMWARE("radeon/RV610_pfp.bin"); +MODULE_FIRMWARE("radeon/RV610_me.bin"); +MODULE_FIRMWARE("radeon/RV630_pfp.bin"); +MODULE_FIRMWARE("radeon/RV630_me.bin"); +MODULE_FIRMWARE("radeon/RV620_pfp.bin"); +MODULE_FIRMWARE("radeon/RV620_me.bin"); +MODULE_FIRMWARE("radeon/RV635_pfp.bin"); +MODULE_FIRMWARE("radeon/RV635_me.bin"); +MODULE_FIRMWARE("radeon/RV670_pfp.bin"); +MODULE_FIRMWARE("radeon/RV670_me.bin"); +MODULE_FIRMWARE("radeon/RS780_pfp.bin"); +MODULE_FIRMWARE("radeon/RS780_me.bin"); +MODULE_FIRMWARE("radeon/RV770_pfp.bin"); +MODULE_FIRMWARE("radeon/RV770_me.bin"); +MODULE_FIRMWARE("radeon/RV730_pfp.bin"); +MODULE_FIRMWARE("radeon/RV730_me.bin"); +MODULE_FIRMWARE("radeon/RV710_pfp.bin"); +MODULE_FIRMWARE("radeon/RV710_me.bin"); + +int r600_debugfs_mc_info_init(struct radeon_device *rdev); /* This files gather functions specifics to: * r600,rv610,rv630,rv620,rv635,rv670 @@ -39,87 +73,270 @@ void rs600_mc_disable_clients(struct radeon_device *rdev); */ int r600_mc_wait_for_idle(struct radeon_device *rdev); void r600_gpu_init(struct radeon_device *rdev); +void r600_fini(struct radeon_device *rdev); /* - * MC + * R600 PCIE GART */ -int r600_mc_init(struct radeon_device *rdev) +int r600_gart_clear_page(struct radeon_device *rdev, int i) { - uint32_t tmp; + void __iomem *ptr = (void *)rdev->gart.table.vram.ptr; + u64 pte; - r600_gpu_init(rdev); + if (i < 0 || i > rdev->gart.num_gpu_pages) + return -EINVAL; + pte = 0; + writeq(pte, ((void __iomem *)ptr) + (i * 8)); + return 0; +} - /* setup the gart before changing location so we can ask to - * discard unmapped mc request - */ - /* FIXME: disable out of gart access */ - tmp = rdev->mc.gtt_location / 4096; - tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp); - tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096; - tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp); - - rs600_mc_disable_clients(rdev); - if (r600_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); +void r600_pcie_gart_tlb_flush(struct radeon_device *rdev) +{ + unsigned i; + u32 tmp; + + WREG32(VM_CONTEXT0_INVALIDATION_LOW_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (rdev->mc.gtt_end - 1) >> 12); + WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1)); + for (i = 0; i < rdev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32(VM_CONTEXT0_REQUEST_RESPONSE); + tmp = (tmp & RESPONSE_TYPE_MASK) >> RESPONSE_TYPE_SHIFT; + if (tmp == 2) { + printk(KERN_WARNING "[drm] r600 flush TLB failed\n"); + return; + } + if (tmp) { + return; + } + udelay(1); + } +} + +int r600_pcie_gart_enable(struct radeon_device *rdev) +{ + u32 tmp; + int r, i; + + /* Initialize common gart structure */ + r = radeon_gart_init(rdev); + if (r) { + return r; + } + rdev->gart.table_size = rdev->gart.num_gpu_pages * 8; + r = radeon_gart_table_vram_alloc(rdev); + if (r) { + return r; } + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | + ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL2, 0); + WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1)); + /* Setup TLB control */ + tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING | + SYSTEM_ACCESS_MODE_NOT_IN_SYS | + EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) | + ENABLE_WAIT_L2_QUERY; + WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp | ENABLE_L1_STRICT_ORDERING); + WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); + WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); + WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); + WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, + (u32)(rdev->dummy_page.addr >> 12)); + for (i = 1; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); - tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1; - tmp = REG_SET(R600_MC_FB_TOP, tmp >> 24); - tmp |= REG_SET(R600_MC_FB_BASE, rdev->mc.vram_location >> 24); - WREG32(R600_MC_VM_FB_LOCATION, tmp); - tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; - tmp = REG_SET(R600_MC_AGP_TOP, tmp >> 22); - WREG32(R600_MC_VM_AGP_TOP, tmp); - tmp = REG_SET(R600_MC_AGP_BOT, rdev->mc.gtt_location >> 22); - WREG32(R600_MC_VM_AGP_BOT, tmp); + r600_pcie_gart_tlb_flush(rdev); + rdev->gart.ready = true; return 0; } -void r600_mc_fini(struct radeon_device *rdev) +void r600_pcie_gart_disable(struct radeon_device *rdev) { - /* FIXME: implement */ -} + u32 tmp; + int i; + /* Clear ptes*/ + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + r600_pcie_gart_tlb_flush(rdev); + /* Disable all tables */ + for (i = 0; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); -/* - * Global GPU functions - */ -void r600_errata(struct radeon_device *rdev) -{ - rdev->pll_errata = 0; + /* Disable L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1)); + /* Setup L1 TLB control */ + tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) | + ENABLE_WAIT_L2_QUERY; + WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp); } int r600_mc_wait_for_idle(struct radeon_device *rdev) { - /* FIXME: implement */ - return 0; + unsigned i; + u32 tmp; + + for (i = 0; i < rdev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32(R_000E50_SRBM_STATUS) & 0x3F00; + if (!tmp) + return 0; + udelay(1); + } + return -1; } -void r600_gpu_init(struct radeon_device *rdev) +static void r600_mc_resume(struct radeon_device *rdev) { - /* FIXME: implement */ -} + u32 d1vga_control, d2vga_control; + u32 vga_render_control, vga_hdp_control; + u32 d1crtc_control, d2crtc_control; + u32 new_d1grph_primary, new_d1grph_secondary; + u32 new_d2grph_primary, new_d2grph_secondary; + u64 old_vram_start; + u32 tmp; + int i, j; + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); -/* - * VRAM info - */ -void r600_vram_get_type(struct radeon_device *rdev) + d1vga_control = RREG32(D1VGA_CONTROL); + d2vga_control = RREG32(D2VGA_CONTROL); + vga_render_control = RREG32(VGA_RENDER_CONTROL); + vga_hdp_control = RREG32(VGA_HDP_CONTROL); + d1crtc_control = RREG32(D1CRTC_CONTROL); + d2crtc_control = RREG32(D2CRTC_CONTROL); + old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24; + new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS); + new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS); + new_d1grph_primary += rdev->mc.vram_start - old_vram_start; + new_d1grph_secondary += rdev->mc.vram_start - old_vram_start; + new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS); + new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS); + new_d2grph_primary += rdev->mc.vram_start - old_vram_start; + new_d2grph_secondary += rdev->mc.vram_start - old_vram_start; + + /* Stop all video */ + WREG32(D1VGA_CONTROL, 0); + WREG32(D2VGA_CONTROL, 0); + WREG32(VGA_RENDER_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, 0); + WREG32(D2CRTC_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); + } + + /* Lockout access through VGA aperture*/ + WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); + + /* Update configuration */ + WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); + tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16; + tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); + WREG32(MC_VM_FB_LOCATION, tmp); + WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); + WREG32(HDP_NONSURFACE_INFO, (2 << 7)); + WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF); + if (rdev->flags & RADEON_IS_AGP) { + WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16); + WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16); + WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22); + } else { + WREG32(MC_VM_AGP_BASE, 0); + WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); + WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); + } + WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary); + WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary); + WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary); + WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary); + WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start); + + /* Unlock host access */ + WREG32(VGA_HDP_CONTROL, vga_hdp_control); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); + } + + /* Restore video state */ + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, d1crtc_control); + WREG32(D2CRTC_CONTROL, d2crtc_control); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + WREG32(D1VGA_CONTROL, d1vga_control); + WREG32(D2VGA_CONTROL, d2vga_control); + WREG32(VGA_RENDER_CONTROL, vga_render_control); +} + +int r600_mc_init(struct radeon_device *rdev) { - uint32_t tmp; + fixed20_12 a; + u32 tmp; int chansize; + int r; + /* Get VRAM informations */ rdev->mc.vram_width = 128; rdev->mc.vram_is_ddr = true; - - tmp = RREG32(R600_RAMCFG); - if (tmp & R600_CHANSIZE_OVERRIDE) { + tmp = RREG32(RAMCFG); + if (tmp & CHANSIZE_OVERRIDE) { chansize = 16; - } else if (tmp & R600_CHANSIZE) { + } else if (tmp & CHANSIZE_MASK) { chansize = 64; } else { chansize = 32; @@ -135,36 +352,1391 @@ void r600_vram_get_type(struct radeon_device *rdev) (rdev->family == CHIP_RV635)) { rdev->mc.vram_width = 2 * chansize; } + /* Could aper size report 0 ? */ + rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); + rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); + /* Setup GPU memory space */ + rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); + rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); + if (rdev->flags & RADEON_IS_AGP) { + r = radeon_agp_init(rdev); + if (r) + return r; + /* gtt_size is setup by radeon_agp_init */ + rdev->mc.gtt_location = rdev->mc.agp_base; + tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size; + /* Try to put vram before or after AGP because we + * we want SYSTEM_APERTURE to cover both VRAM and + * AGP so that GPU can catch out of VRAM/AGP access + */ + if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) { + /* Enought place before */ + rdev->mc.vram_location = rdev->mc.gtt_location - + rdev->mc.mc_vram_size; + } else if (tmp > rdev->mc.mc_vram_size) { + /* Enought place after */ + rdev->mc.vram_location = rdev->mc.gtt_location + + rdev->mc.gtt_size; + } else { + /* Try to setup VRAM then AGP might not + * not work on some card + */ + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + } + } else { + if (rdev->family == CHIP_RS780 || rdev->family == CHIP_RS880) { + rdev->mc.vram_location = (RREG32(MC_VM_FB_LOCATION) & + 0xFFFF) << 24; + rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size; + if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) { + /* Enough place after vram */ + rdev->mc.gtt_location = tmp; + } else if (rdev->mc.vram_location >= rdev->mc.gtt_size) { + /* Enough place before vram */ + rdev->mc.gtt_location = 0; + } else { + /* Not enough place after or before shrink + * gart size + */ + if (rdev->mc.vram_location > (0xFFFFFFFFUL - tmp)) { + rdev->mc.gtt_location = 0; + rdev->mc.gtt_size = rdev->mc.vram_location; + } else { + rdev->mc.gtt_location = tmp; + rdev->mc.gtt_size = 0xFFFFFFFFUL - tmp; + } + } + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + } else { + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + } + } + rdev->mc.vram_start = rdev->mc.vram_location; + rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size; + rdev->mc.gtt_start = rdev->mc.gtt_location; + rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size; + /* FIXME: we should enforce default clock in case GPU is not in + * default setup + */ + a.full = rfixed_const(100); + rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk); + rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a); + return 0; } -void r600_vram_info(struct radeon_device *rdev) +/* We doesn't check that the GPU really needs a reset we simply do the + * reset, it's up to the caller to determine if the GPU needs one. We + * might add an helper function to check that. + */ +int r600_gpu_soft_reset(struct radeon_device *rdev) { - r600_vram_get_type(rdev); - rdev->mc.real_vram_size = RREG32(R600_CONFIG_MEMSIZE); - rdev->mc.mc_vram_size = rdev->mc.real_vram_size; + u32 grbm_busy_mask = S_008010_VC_BUSY(1) | S_008010_VGT_BUSY_NO_DMA(1) | + S_008010_VGT_BUSY(1) | S_008010_TA03_BUSY(1) | + S_008010_TC_BUSY(1) | S_008010_SX_BUSY(1) | + S_008010_SH_BUSY(1) | S_008010_SPI03_BUSY(1) | + S_008010_SMX_BUSY(1) | S_008010_SC_BUSY(1) | + S_008010_PA_BUSY(1) | S_008010_DB03_BUSY(1) | + S_008010_CR_BUSY(1) | S_008010_CB03_BUSY(1) | + S_008010_GUI_ACTIVE(1); + u32 grbm2_busy_mask = S_008014_SPI0_BUSY(1) | S_008014_SPI1_BUSY(1) | + S_008014_SPI2_BUSY(1) | S_008014_SPI3_BUSY(1) | + S_008014_TA0_BUSY(1) | S_008014_TA1_BUSY(1) | + S_008014_TA2_BUSY(1) | S_008014_TA3_BUSY(1) | + S_008014_DB0_BUSY(1) | S_008014_DB1_BUSY(1) | + S_008014_DB2_BUSY(1) | S_008014_DB3_BUSY(1) | + S_008014_CB0_BUSY(1) | S_008014_CB1_BUSY(1) | + S_008014_CB2_BUSY(1) | S_008014_CB3_BUSY(1); + u32 srbm_reset = 0; - /* Could aper size report 0 ? */ - rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); - rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); + /* Disable CP parsing/prefetching */ + WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(0xff)); + /* Check if any of the rendering block is busy and reset it */ + if ((RREG32(R_008010_GRBM_STATUS) & grbm_busy_mask) || + (RREG32(R_008014_GRBM_STATUS2) & grbm2_busy_mask)) { + WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CR(1) | + S_008020_SOFT_RESET_DB(1) | + S_008020_SOFT_RESET_CB(1) | + S_008020_SOFT_RESET_PA(1) | + S_008020_SOFT_RESET_SC(1) | + S_008020_SOFT_RESET_SMX(1) | + S_008020_SOFT_RESET_SPI(1) | + S_008020_SOFT_RESET_SX(1) | + S_008020_SOFT_RESET_SH(1) | + S_008020_SOFT_RESET_TC(1) | + S_008020_SOFT_RESET_TA(1) | + S_008020_SOFT_RESET_VC(1) | + S_008020_SOFT_RESET_VGT(1)); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + udelay(50); + WREG32(R_008020_GRBM_SOFT_RESET, 0); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + } + /* Reset CP (we always reset CP) */ + WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CP(1)); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + udelay(50); + WREG32(R_008020_GRBM_SOFT_RESET, 0); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + /* Reset others GPU block if necessary */ + if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_RLC(1); + if (G_000E50_GRBM_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_GRBM(1); + if (G_000E50_HI_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_IH(1); + if (G_000E50_VMC_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_VMC(1); + if (G_000E50_MCB_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDZ_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDY_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDX_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDW_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_RLC(1); + if (G_000E50_SEM_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_SEM(1); + WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset); + (void)RREG32(R_000E60_SRBM_SOFT_RESET); + udelay(50); + WREG32(R_000E60_SRBM_SOFT_RESET, 0); + (void)RREG32(R_000E60_SRBM_SOFT_RESET); + /* Wait a little for things to settle down */ + udelay(50); + return 0; +} + +int r600_gpu_reset(struct radeon_device *rdev) +{ + return r600_gpu_soft_reset(rdev); +} + +static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes, + u32 num_backends, + u32 backend_disable_mask) +{ + u32 backend_map = 0; + u32 enabled_backends_mask; + u32 enabled_backends_count; + u32 cur_pipe; + u32 swizzle_pipe[R6XX_MAX_PIPES]; + u32 cur_backend; + u32 i; + + if (num_tile_pipes > R6XX_MAX_PIPES) + num_tile_pipes = R6XX_MAX_PIPES; + if (num_tile_pipes < 1) + num_tile_pipes = 1; + if (num_backends > R6XX_MAX_BACKENDS) + num_backends = R6XX_MAX_BACKENDS; + if (num_backends < 1) + num_backends = 1; + + enabled_backends_mask = 0; + enabled_backends_count = 0; + for (i = 0; i < R6XX_MAX_BACKENDS; ++i) { + if (((backend_disable_mask >> i) & 1) == 0) { + enabled_backends_mask |= (1 << i); + ++enabled_backends_count; + } + if (enabled_backends_count == num_backends) + break; + } + + if (enabled_backends_count == 0) { + enabled_backends_mask = 1; + enabled_backends_count = 1; + } + + if (enabled_backends_count != num_backends) + num_backends = enabled_backends_count; + + memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES); + switch (num_tile_pipes) { + case 1: + swizzle_pipe[0] = 0; + break; + case 2: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + break; + case 3: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + break; + case 4: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + break; + case 5: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + swizzle_pipe[4] = 4; + break; + case 6: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 5; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + break; + case 7: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + swizzle_pipe[6] = 5; + break; + case 8: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + swizzle_pipe[6] = 5; + swizzle_pipe[7] = 7; + break; + } + + cur_backend = 0; + for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { + while (((1 << cur_backend) & enabled_backends_mask) == 0) + cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; + + backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); + + cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; + } + + return backend_map; +} + +int r600_count_pipe_bits(uint32_t val) +{ + int i, ret = 0; + + for (i = 0; i < 32; i++) { + ret += val & 1; + val >>= 1; + } + return ret; } +void r600_gpu_init(struct radeon_device *rdev) +{ + u32 tiling_config; + u32 ramcfg; + u32 tmp; + int i, j; + u32 sq_config; + u32 sq_gpr_resource_mgmt_1 = 0; + u32 sq_gpr_resource_mgmt_2 = 0; + u32 sq_thread_resource_mgmt = 0; + u32 sq_stack_resource_mgmt_1 = 0; + u32 sq_stack_resource_mgmt_2 = 0; + + /* FIXME: implement */ + switch (rdev->family) { + case CHIP_R600: + rdev->config.r600.max_pipes = 4; + rdev->config.r600.max_tile_pipes = 8; + rdev->config.r600.max_simds = 4; + rdev->config.r600.max_backends = 4; + rdev->config.r600.max_gprs = 256; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 256; + rdev->config.r600.max_hw_contexts = 8; + rdev->config.r600.max_gs_threads = 16; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 2; + break; + case CHIP_RV630: + case CHIP_RV635: + rdev->config.r600.max_pipes = 2; + rdev->config.r600.max_tile_pipes = 2; + rdev->config.r600.max_simds = 3; + rdev->config.r600.max_backends = 1; + rdev->config.r600.max_gprs = 128; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 128; + rdev->config.r600.max_hw_contexts = 8; + rdev->config.r600.max_gs_threads = 4; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 2; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + rdev->config.r600.max_pipes = 1; + rdev->config.r600.max_tile_pipes = 1; + rdev->config.r600.max_simds = 2; + rdev->config.r600.max_backends = 1; + rdev->config.r600.max_gprs = 128; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 128; + rdev->config.r600.max_hw_contexts = 4; + rdev->config.r600.max_gs_threads = 4; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 1; + break; + case CHIP_RV670: + rdev->config.r600.max_pipes = 4; + rdev->config.r600.max_tile_pipes = 4; + rdev->config.r600.max_simds = 4; + rdev->config.r600.max_backends = 4; + rdev->config.r600.max_gprs = 192; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 256; + rdev->config.r600.max_hw_contexts = 8; + rdev->config.r600.max_gs_threads = 16; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 2; + break; + default: + break; + } + + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + + WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); + + /* Setup tiling */ + tiling_config = 0; + ramcfg = RREG32(RAMCFG); + switch (rdev->config.r600.max_tile_pipes) { + case 1: + tiling_config |= PIPE_TILING(0); + break; + case 2: + tiling_config |= PIPE_TILING(1); + break; + case 4: + tiling_config |= PIPE_TILING(2); + break; + case 8: + tiling_config |= PIPE_TILING(3); + break; + default: + break; + } + tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); + tiling_config |= GROUP_SIZE(0); + tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT; + if (tmp > 3) { + tiling_config |= ROW_TILING(3); + tiling_config |= SAMPLE_SPLIT(3); + } else { + tiling_config |= ROW_TILING(tmp); + tiling_config |= SAMPLE_SPLIT(tmp); + } + tiling_config |= BANK_SWAPS(1); + tmp = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes, + rdev->config.r600.max_backends, + (0xff << rdev->config.r600.max_backends) & 0xff); + tiling_config |= BACKEND_MAP(tmp); + WREG32(GB_TILING_CONFIG, tiling_config); + WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); + WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); + + tmp = BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK); + WREG32(CC_RB_BACKEND_DISABLE, tmp); + + /* Setup pipes */ + tmp = INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK); + tmp |= INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK); + WREG32(CC_GC_SHADER_PIPE_CONFIG, tmp); + WREG32(GC_USER_SHADER_PIPE_CONFIG, tmp); + + tmp = R6XX_MAX_BACKENDS - r600_count_pipe_bits(tmp & INACTIVE_QD_PIPES_MASK); + WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); + WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK); + + /* Setup some CP states */ + WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | ROQ_IB2_START(0x2b))); + WREG32(CP_MEQ_THRESHOLDS, (MEQ_END(0x40) | ROQ_END(0x40))); + + WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | SYNC_GRADIENT | + SYNC_WALKER | SYNC_ALIGNER)); + /* Setup various GPU states */ + if (rdev->family == CHIP_RV670) + WREG32(ARB_GDEC_RD_CNTL, 0x00000021); + + tmp = RREG32(SX_DEBUG_1); + tmp |= SMX_EVENT_RELEASE; + if ((rdev->family > CHIP_R600)) + tmp |= ENABLE_NEW_SMX_ADDRESS; + WREG32(SX_DEBUG_1, tmp); + + if (((rdev->family) == CHIP_R600) || + ((rdev->family) == CHIP_RV630) || + ((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + WREG32(DB_DEBUG, PREZ_MUST_WAIT_FOR_POSTZ_DONE); + } else { + WREG32(DB_DEBUG, 0); + } + WREG32(DB_WATERMARKS, (DEPTH_FREE(4) | DEPTH_CACHELINE_FREE(16) | + DEPTH_FLUSH(16) | DEPTH_PENDING_FREE(4))); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + WREG32(VGT_NUM_INSTANCES, 0); + + WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0)); + WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(0)); + + tmp = RREG32(SQ_MS_FIFO_SIZES); + if (((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + tmp = (CACHE_FIFO_SIZE(0xa) | + FETCH_FIFO_HIWATER(0xa) | + DONE_FIFO_HIWATER(0xe0) | + ALU_UPDATE_FIFO_HIWATER(0x8)); + } else if (((rdev->family) == CHIP_R600) || + ((rdev->family) == CHIP_RV630)) { + tmp &= ~DONE_FIFO_HIWATER(0xff); + tmp |= DONE_FIFO_HIWATER(0x4); + } + WREG32(SQ_MS_FIFO_SIZES, tmp); + + /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT + * should be adjusted as needed by the 2D/3D drivers. This just sets default values + */ + sq_config = RREG32(SQ_CONFIG); + sq_config &= ~(PS_PRIO(3) | + VS_PRIO(3) | + GS_PRIO(3) | + ES_PRIO(3)); + sq_config |= (DX9_CONSTS | + VC_ENABLE | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + + if ((rdev->family) == CHIP_R600) { + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(124) | + NUM_VS_GPRS(124) | + NUM_CLAUSE_TEMP_GPRS(4)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(0) | + NUM_ES_GPRS(0)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(136) | + NUM_VS_THREADS(48) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(4)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(128) | + NUM_VS_STACK_ENTRIES(128)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(0) | + NUM_ES_STACK_ENTRIES(0)); + } else if (((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + /* no vertex cache */ + sq_config &= ~VC_ENABLE; + + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) | + NUM_VS_GPRS(44) | + NUM_CLAUSE_TEMP_GPRS(2)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) | + NUM_ES_GPRS(17)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(79) | + NUM_VS_THREADS(78) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(31)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) | + NUM_VS_STACK_ENTRIES(40)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) | + NUM_ES_STACK_ENTRIES(16)); + } else if (((rdev->family) == CHIP_RV630) || + ((rdev->family) == CHIP_RV635)) { + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) | + NUM_VS_GPRS(44) | + NUM_CLAUSE_TEMP_GPRS(2)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(18) | + NUM_ES_GPRS(18)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(79) | + NUM_VS_THREADS(78) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(31)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) | + NUM_VS_STACK_ENTRIES(40)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) | + NUM_ES_STACK_ENTRIES(16)); + } else if ((rdev->family) == CHIP_RV670) { + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) | + NUM_VS_GPRS(44) | + NUM_CLAUSE_TEMP_GPRS(2)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) | + NUM_ES_GPRS(17)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(79) | + NUM_VS_THREADS(78) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(31)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(64) | + NUM_VS_STACK_ENTRIES(64)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(64) | + NUM_ES_STACK_ENTRIES(64)); + } + + WREG32(SQ_CONFIG, sq_config); + WREG32(SQ_GPR_RESOURCE_MGMT_1, sq_gpr_resource_mgmt_1); + WREG32(SQ_GPR_RESOURCE_MGMT_2, sq_gpr_resource_mgmt_2); + WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); + WREG32(SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1); + WREG32(SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2); + + if (((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(TC_ONLY)); + } else { + WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC)); + } + + /* More default values. 2D/3D driver should adjust as needed */ + WREG32(PA_SC_AA_SAMPLE_LOCS_2S, (S0_X(0xc) | S0_Y(0x4) | + S1_X(0x4) | S1_Y(0xc))); + WREG32(PA_SC_AA_SAMPLE_LOCS_4S, (S0_X(0xe) | S0_Y(0xe) | + S1_X(0x2) | S1_Y(0x2) | + S2_X(0xa) | S2_Y(0x6) | + S3_X(0x6) | S3_Y(0xa))); + WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD0, (S0_X(0xe) | S0_Y(0xb) | + S1_X(0x4) | S1_Y(0xc) | + S2_X(0x1) | S2_Y(0x6) | + S3_X(0xa) | S3_Y(0xe))); + WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD1, (S4_X(0x6) | S4_Y(0x1) | + S5_X(0x0) | S5_Y(0x0) | + S6_X(0xb) | S6_Y(0x4) | + S7_X(0x7) | S7_Y(0x8))); + + WREG32(VGT_STRMOUT_EN, 0); + tmp = rdev->config.r600.max_pipes * 16; + switch (rdev->family) { + case CHIP_RV610: + case CHIP_RS780: + case CHIP_RV620: + tmp += 32; + break; + case CHIP_RV670: + tmp += 128; + break; + default: + break; + } + if (tmp > 256) { + tmp = 256; + } + WREG32(VGT_ES_PER_GS, 128); + WREG32(VGT_GS_PER_ES, tmp); + WREG32(VGT_GS_PER_VS, 2); + WREG32(VGT_GS_VERTEX_REUSE, 16); + + /* more default values. 2D/3D driver should adjust as needed */ + WREG32(PA_SC_LINE_STIPPLE_STATE, 0); + WREG32(VGT_STRMOUT_EN, 0); + WREG32(SX_MISC, 0); + WREG32(PA_SC_MODE_CNTL, 0); + WREG32(PA_SC_AA_CONFIG, 0); + WREG32(PA_SC_LINE_STIPPLE, 0); + WREG32(SPI_INPUT_Z, 0); + WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2)); + WREG32(CB_COLOR7_FRAG, 0); + + /* Clear render buffer base addresses */ + WREG32(CB_COLOR0_BASE, 0); + WREG32(CB_COLOR1_BASE, 0); + WREG32(CB_COLOR2_BASE, 0); + WREG32(CB_COLOR3_BASE, 0); + WREG32(CB_COLOR4_BASE, 0); + WREG32(CB_COLOR5_BASE, 0); + WREG32(CB_COLOR6_BASE, 0); + WREG32(CB_COLOR7_BASE, 0); + WREG32(CB_COLOR7_FRAG, 0); + + switch (rdev->family) { + case CHIP_RV610: + case CHIP_RS780: + case CHIP_RV620: + tmp = TC_L2_SIZE(8); + break; + case CHIP_RV630: + case CHIP_RV635: + tmp = TC_L2_SIZE(4); + break; + case CHIP_R600: + tmp = TC_L2_SIZE(0) | L2_DISABLE_LATE_HIT; + break; + default: + tmp = TC_L2_SIZE(0); + break; + } + WREG32(TC_CNTL, tmp); + + tmp = RREG32(HDP_HOST_PATH_CNTL); + WREG32(HDP_HOST_PATH_CNTL, tmp); + + tmp = RREG32(ARB_POP); + tmp |= ENABLE_TC128; + WREG32(ARB_POP, tmp); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA | + NUM_CLIP_SEQ(3))); + WREG32(PA_SC_ENHANCE, FORCE_EOV_MAX_CLK_CNT(4095)); +} + + /* * Indirect registers accessor */ -uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg) +u32 r600_pciep_rreg(struct radeon_device *rdev, u32 reg) +{ + u32 r; + + WREG32(PCIE_PORT_INDEX, ((reg) & 0xff)); + (void)RREG32(PCIE_PORT_INDEX); + r = RREG32(PCIE_PORT_DATA); + return r; +} + +void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + WREG32(PCIE_PORT_INDEX, ((reg) & 0xff)); + (void)RREG32(PCIE_PORT_INDEX); + WREG32(PCIE_PORT_DATA, (v)); + (void)RREG32(PCIE_PORT_DATA); +} + + +/* + * CP & Ring + */ +void r600_cp_stop(struct radeon_device *rdev) +{ + WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); +} + +int r600_cp_init_microcode(struct radeon_device *rdev) +{ + struct platform_device *pdev; + const char *chip_name; + size_t pfp_req_size, me_req_size; + char fw_name[30]; + int err; + + DRM_DEBUG("\n"); + + pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0); + err = IS_ERR(pdev); + if (err) { + printk(KERN_ERR "radeon_cp: Failed to register firmware\n"); + return -EINVAL; + } + + switch (rdev->family) { + case CHIP_R600: chip_name = "R600"; break; + case CHIP_RV610: chip_name = "RV610"; break; + case CHIP_RV630: chip_name = "RV630"; break; + case CHIP_RV620: chip_name = "RV620"; break; + case CHIP_RV635: chip_name = "RV635"; break; + case CHIP_RV670: chip_name = "RV670"; break; + case CHIP_RS780: + case CHIP_RS880: chip_name = "RS780"; break; + case CHIP_RV770: chip_name = "RV770"; break; + case CHIP_RV730: + case CHIP_RV740: chip_name = "RV730"; break; + case CHIP_RV710: chip_name = "RV710"; break; + default: BUG(); + } + + if (rdev->family >= CHIP_RV770) { + pfp_req_size = R700_PFP_UCODE_SIZE * 4; + me_req_size = R700_PM4_UCODE_SIZE * 4; + } else { + pfp_req_size = PFP_UCODE_SIZE * 4; + me_req_size = PM4_UCODE_SIZE * 12; + } + + DRM_INFO("Loading %s CP Microcode\n", chip_name); + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); + err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->pfp_fw->size != pfp_req_size) { + printk(KERN_ERR + "r600_cp: Bogus length %zu in firmware \"%s\"\n", + rdev->pfp_fw->size, fw_name); + err = -EINVAL; + goto out; + } + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); + err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->me_fw->size != me_req_size) { + printk(KERN_ERR + "r600_cp: Bogus length %zu in firmware \"%s\"\n", + rdev->me_fw->size, fw_name); + err = -EINVAL; + } +out: + platform_device_unregister(pdev); + + if (err) { + if (err != -EINVAL) + printk(KERN_ERR + "r600_cp: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(rdev->pfp_fw); + rdev->pfp_fw = NULL; + release_firmware(rdev->me_fw); + rdev->me_fw = NULL; + } + return err; +} + +static int r600_cp_load_microcode(struct radeon_device *rdev) +{ + const __be32 *fw_data; + int i; + + if (!rdev->me_fw || !rdev->pfp_fw) + return -EINVAL; + + r600_cp_stop(rdev); + + WREG32(CP_RB_CNTL, RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3)); + + /* Reset cp */ + WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); + RREG32(GRBM_SOFT_RESET); + mdelay(15); + WREG32(GRBM_SOFT_RESET, 0); + + WREG32(CP_ME_RAM_WADDR, 0); + + fw_data = (const __be32 *)rdev->me_fw->data; + WREG32(CP_ME_RAM_WADDR, 0); + for (i = 0; i < PM4_UCODE_SIZE * 3; i++) + WREG32(CP_ME_RAM_DATA, + be32_to_cpup(fw_data++)); + + fw_data = (const __be32 *)rdev->pfp_fw->data; + WREG32(CP_PFP_UCODE_ADDR, 0); + for (i = 0; i < PFP_UCODE_SIZE; i++) + WREG32(CP_PFP_UCODE_DATA, + be32_to_cpup(fw_data++)); + + WREG32(CP_PFP_UCODE_ADDR, 0); + WREG32(CP_ME_RAM_WADDR, 0); + WREG32(CP_ME_RAM_RADDR, 0); + return 0; +} + +int r600_cp_start(struct radeon_device *rdev) +{ + int r; + uint32_t cp_me; + + r = radeon_ring_lock(rdev, 7); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } + radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5)); + radeon_ring_write(rdev, 0x1); + if (rdev->family < CHIP_RV770) { + radeon_ring_write(rdev, 0x3); + radeon_ring_write(rdev, rdev->config.r600.max_hw_contexts - 1); + } else { + radeon_ring_write(rdev, 0x0); + radeon_ring_write(rdev, rdev->config.rv770.max_hw_contexts - 1); + } + radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_unlock_commit(rdev); + + cp_me = 0xff; + WREG32(R_0086D8_CP_ME_CNTL, cp_me); + return 0; +} + +int r600_cp_resume(struct radeon_device *rdev) +{ + u32 tmp; + u32 rb_bufsz; + int r; + + /* Reset cp */ + WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); + RREG32(GRBM_SOFT_RESET); + mdelay(15); + WREG32(GRBM_SOFT_RESET, 0); + + /* Set ring buffer size */ + rb_bufsz = drm_order(rdev->cp.ring_size / 8); +#ifdef __BIG_ENDIAN + WREG32(CP_RB_CNTL, BUF_SWAP_32BIT | RB_NO_UPDATE | + (drm_order(4096/8) << 8) | rb_bufsz); +#else + WREG32(CP_RB_CNTL, RB_NO_UPDATE | (drm_order(4096/8) << 8) | rb_bufsz); +#endif + WREG32(CP_SEM_WAIT_TIMER, 0x4); + + /* Set the write pointer delay */ + WREG32(CP_RB_WPTR_DELAY, 0); + + /* Initialize the ring buffer's read and write pointers */ + tmp = RREG32(CP_RB_CNTL); + WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA); + WREG32(CP_RB_RPTR_WR, 0); + WREG32(CP_RB_WPTR, 0); + WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFF); + WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->cp.gpu_addr)); + mdelay(1); + WREG32(CP_RB_CNTL, tmp); + + WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8); + WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); + + rdev->cp.rptr = RREG32(CP_RB_RPTR); + rdev->cp.wptr = RREG32(CP_RB_WPTR); + + r600_cp_start(rdev); + rdev->cp.ready = true; + r = radeon_ring_test(rdev); + if (r) { + rdev->cp.ready = false; + return r; + } + return 0; +} + +void r600_cp_commit(struct radeon_device *rdev) +{ + WREG32(CP_RB_WPTR, rdev->cp.wptr); + (void)RREG32(CP_RB_WPTR); +} + +void r600_ring_init(struct radeon_device *rdev, unsigned ring_size) +{ + u32 rb_bufsz; + + /* Align ring size */ + rb_bufsz = drm_order(ring_size / 8); + ring_size = (1 << (rb_bufsz + 1)) * 4; + rdev->cp.ring_size = ring_size; + rdev->cp.align_mask = 16 - 1; +} + + +/* + * GPU scratch registers helpers function. + */ +void r600_scratch_init(struct radeon_device *rdev) +{ + int i; + + rdev->scratch.num_reg = 7; + for (i = 0; i < rdev->scratch.num_reg; i++) { + rdev->scratch.free[i] = true; + rdev->scratch.reg[i] = SCRATCH_REG0 + (i * 4); + } +} + +int r600_ring_test(struct radeon_device *rdev) +{ + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ring_lock(rdev, 3); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + radeon_scratch_free(rdev, scratch); + return r; + } + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); + radeon_ring_write(rdev, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ring test succeeded in %d usecs\n", i); + } else { + DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + return r; +} + +/* + * Writeback + */ +int r600_wb_init(struct radeon_device *rdev) +{ + int r; + + if (rdev->wb.wb_obj == NULL) { + r = radeon_object_create(rdev, NULL, 4096, + true, + RADEON_GEM_DOMAIN_GTT, + false, &rdev->wb.wb_obj); + if (r) { + DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r); + return r; + } + r = radeon_object_pin(rdev->wb.wb_obj, + RADEON_GEM_DOMAIN_GTT, + &rdev->wb.gpu_addr); + if (r) { + DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r); + return r; + } + r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb); + if (r) { + DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r); + return r; + } + } + WREG32(SCRATCH_ADDR, (rdev->wb.gpu_addr >> 8) & 0xFFFFFFFF); + WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + 1024) & 0xFFFFFFFC); + WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + 1024) & 0xFF); + WREG32(SCRATCH_UMSK, 0xff); + return 0; +} + +void r600_wb_fini(struct radeon_device *rdev) +{ + if (rdev->wb.wb_obj) { + radeon_object_kunmap(rdev->wb.wb_obj); + radeon_object_unpin(rdev->wb.wb_obj); + radeon_object_unref(&rdev->wb.wb_obj); + rdev->wb.wb = NULL; + rdev->wb.wb_obj = NULL; + } +} + + +/* + * CS + */ +void r600_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + /* Emit fence sequence & fire IRQ */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); + radeon_ring_write(rdev, fence->seq); +} + +int r600_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, + uint64_t dst_offset, + unsigned num_pages, + struct radeon_fence *fence) +{ + /* FIXME: implement */ + return 0; +} + +int r600_copy_blit(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_pages, struct radeon_fence *fence) +{ + r600_blit_prepare_copy(rdev, num_pages * 4096); + r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * 4096); + r600_blit_done_copy(rdev, fence); + return 0; +} + +int r600_irq_process(struct radeon_device *rdev) +{ + /* FIXME: implement */ + return 0; +} + +int r600_irq_set(struct radeon_device *rdev) +{ + /* FIXME: implement */ + return 0; +} + +int r600_set_surface_reg(struct radeon_device *rdev, int reg, + uint32_t tiling_flags, uint32_t pitch, + uint32_t offset, uint32_t obj_size) +{ + /* FIXME: implement */ + return 0; +} + +void r600_clear_surface_reg(struct radeon_device *rdev, int reg) +{ + /* FIXME: implement */ +} + + +bool r600_card_posted(struct radeon_device *rdev) +{ + uint32_t reg; + + /* first check CRTCs */ + reg = RREG32(D1CRTC_CONTROL) | + RREG32(D2CRTC_CONTROL); + if (reg & CRTC_EN) + return true; + + /* then check MEM_SIZE, in case the crtcs are off */ + if (RREG32(CONFIG_MEMSIZE)) + return true; + + return false; +} + +int r600_resume(struct radeon_device *rdev) +{ + int r; + + r600_gpu_reset(rdev); + r600_mc_resume(rdev); + r = r600_pcie_gart_enable(rdev); + if (r) + return r; + r600_gpu_init(rdev); + r = radeon_ring_init(rdev, rdev->cp.ring_size); + if (r) + return r; + r = r600_cp_load_microcode(rdev); + if (r) + return r; + r = r600_cp_resume(rdev); + if (r) + return r; + r = r600_wb_init(rdev); + if (r) + return r; + return 0; +} + +int r600_suspend(struct radeon_device *rdev) +{ + /* FIXME: we should wait for ring to be empty */ + r600_cp_stop(rdev); + return 0; +} + +/* Plan is to move initialization in that function and use + * helper function so that radeon_device_init pretty much + * do nothing more than calling asic specific function. This + * should also allow to remove a bunch of callback function + * like vram_info. + */ +int r600_init(struct radeon_device *rdev) { - uint32_t r; + int r; - WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff)); - (void)RREG32(R600_PCIE_PORT_INDEX); - r = RREG32(R600_PCIE_PORT_DATA); + rdev->new_init_path = true; + r = radeon_dummy_page_init(rdev); + if (r) + return r; + if (r600_debugfs_mc_info_init(rdev)) { + DRM_ERROR("Failed to register debugfs file for mc !\n"); + } + /* This don't do much */ + r = radeon_gem_init(rdev); + if (r) + return r; + /* Read BIOS */ + if (!radeon_get_bios(rdev)) { + if (ASIC_IS_AVIVO(rdev)) + return -EINVAL; + } + /* Must be an ATOMBIOS */ + if (!rdev->is_atom_bios) + return -EINVAL; + r = radeon_atombios_init(rdev); + if (r) + return r; + /* Post card if necessary */ + if (!r600_card_posted(rdev) && rdev->bios) { + DRM_INFO("GPU not posted. posting now...\n"); + atom_asic_init(rdev->mode_info.atom_context); + } + /* Initialize scratch registers */ + r600_scratch_init(rdev); + /* Initialize surface registers */ + radeon_surface_init(rdev); + r = radeon_clocks_init(rdev); + if (r) + return r; + /* Fence driver */ + r = radeon_fence_driver_init(rdev); + if (r) + return r; + r = r600_mc_init(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + r600_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return r600_init(rdev); + } + return r; + } + /* Memory manager */ + r = radeon_object_init(rdev); + if (r) + return r; + rdev->cp.ring_obj = NULL; + r600_ring_init(rdev, 1024 * 1024); + + if (!rdev->me_fw || !rdev->pfp_fw) { + r = r600_cp_init_microcode(rdev); + if (r) { + DRM_ERROR("Failed to load firmware!\n"); + return r; + } + } + + r = r600_resume(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + r600_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return r600_init(rdev); + } + return r; + } + r = radeon_ib_pool_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); + return r; + } + r = r600_blit_init(rdev); + if (r) { + DRM_ERROR("radeon: failled blitter (%d).\n", r); + return r; + } + r = radeon_ib_test(rdev); + if (r) { + DRM_ERROR("radeon: failled testing IB (%d).\n", r); + return r; + } + return 0; +} + +void r600_fini(struct radeon_device *rdev) +{ + /* Suspend operations */ + r600_suspend(rdev); + + r600_blit_fini(rdev); + radeon_ring_fini(rdev); + r600_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); + radeon_gem_fini(rdev); + radeon_fence_driver_fini(rdev); + radeon_clocks_fini(rdev); +#if __OS_HAS_AGP + if (rdev->flags & RADEON_IS_AGP) + radeon_agp_fini(rdev); +#endif + radeon_object_fini(rdev); + if (rdev->is_atom_bios) + radeon_atombios_fini(rdev); + else + radeon_combios_fini(rdev); + kfree(rdev->bios); + rdev->bios = NULL; + radeon_dummy_page_fini(rdev); +} + + +/* + * CS stuff + */ +void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + /* FIXME: implement */ + radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(rdev, ib->gpu_addr & 0xFFFFFFFC); + radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF); + radeon_ring_write(rdev, ib->length_dw); +} + +int r600_ib_test(struct radeon_device *rdev) +{ + struct radeon_ib *ib; + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ib_get(rdev, &ib); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + ib->ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1); + ib->ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + ib->ptr[2] = 0xDEADBEEF; + ib->ptr[3] = PACKET2(0); + ib->ptr[4] = PACKET2(0); + ib->ptr[5] = PACKET2(0); + ib->ptr[6] = PACKET2(0); + ib->ptr[7] = PACKET2(0); + ib->ptr[8] = PACKET2(0); + ib->ptr[9] = PACKET2(0); + ib->ptr[10] = PACKET2(0); + ib->ptr[11] = PACKET2(0); + ib->ptr[12] = PACKET2(0); + ib->ptr[13] = PACKET2(0); + ib->ptr[14] = PACKET2(0); + ib->ptr[15] = PACKET2(0); + ib->length_dw = 16; + r = radeon_ib_schedule(rdev, ib); + if (r) { + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + return r; + } + r = radeon_fence_wait(ib->fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + return r; + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ib test succeeded in %u usecs\n", i); + } else { + DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); return r; } -void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) + + + +/* + * Debugfs info + */ +#if defined(CONFIG_DEBUG_FS) + +static int r600_debugfs_cp_ring_info(struct seq_file *m, void *data) { - WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff)); - (void)RREG32(R600_PCIE_PORT_INDEX); - WREG32(R600_PCIE_PORT_DATA, (v)); - (void)RREG32(R600_PCIE_PORT_DATA); + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + uint32_t rdp, wdp; + unsigned count, i, j; + + radeon_ring_free_size(rdev); + rdp = RREG32(CP_RB_RPTR); + wdp = RREG32(CP_RB_WPTR); + count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask; + seq_printf(m, "CP_STAT 0x%08x\n", RREG32(CP_STAT)); + seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); + seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); + seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw); + seq_printf(m, "%u dwords in ring\n", count); + for (j = 0; j <= count; j++) { + i = (rdp + j) & rdev->cp.ptr_mask; + seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]); + } + return 0; +} + +static int r600_debugfs_mc_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + + DREG32_SYS(m, rdev, R_000E50_SRBM_STATUS); + DREG32_SYS(m, rdev, VM_L2_STATUS); + return 0; +} + +static struct drm_info_list r600_mc_info_list[] = { + {"r600_mc_info", r600_debugfs_mc_info, 0, NULL}, + {"r600_ring_info", r600_debugfs_cp_ring_info, 0, NULL}, +}; +#endif + +int r600_debugfs_mc_info_init(struct radeon_device *rdev) +{ +#if defined(CONFIG_DEBUG_FS) + return radeon_debugfs_add_files(rdev, r600_mc_info_list, ARRAY_SIZE(r600_mc_info_list)); +#else + return 0; +#endif } diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c new file mode 100644 index 00000000000..c51402e9249 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit.c @@ -0,0 +1,855 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher + */ +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" + +#include "r600_blit_shaders.h" + +#define DI_PT_RECTLIST 0x11 +#define DI_INDEX_SIZE_16_BIT 0x0 +#define DI_SRC_SEL_AUTO_INDEX 0x2 + +#define FMT_8 0x1 +#define FMT_5_6_5 0x8 +#define FMT_8_8_8_8 0x1a +#define COLOR_8 0x1 +#define COLOR_5_6_5 0x8 +#define COLOR_8_8_8_8 0x1a + +static inline void +set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr) +{ + u32 cb_color_info; + int pitch, slice; + RING_LOCALS; + DRM_DEBUG("\n"); + + h = (h + 7) & ~7; + if (h < 8) + h = 8; + + cb_color_info = ((format << 2) | (1 << 27)); + pitch = (w / 8) - 1; + slice = ((w * h) / 64) - 1; + + if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) && + ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) { + BEGIN_RING(21 + 2); + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(gpu_addr >> 8); + OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0)); + OUT_RING(2 << 0); + } else { + BEGIN_RING(21); + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(gpu_addr >> 8); + } + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((pitch << 0) | (slice << 10)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(cb_color_info); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + ADVANCE_RING(); +} + +static inline void +cp_set_surface_sync(drm_radeon_private_t *dev_priv, + u32 sync_type, u32 size, u64 mc_addr) +{ + u32 cp_coher_size; + RING_LOCALS; + DRM_DEBUG("\n"); + + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + BEGIN_RING(5); + OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3)); + OUT_RING(sync_type); + OUT_RING(cp_coher_size); + OUT_RING((mc_addr >> 8)); + OUT_RING(10); /* poll interval */ + ADVANCE_RING(); +} + +static inline void +set_shaders(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + u64 gpu_addr; + int shader_size, i; + u32 *vs, *ps; + uint32_t sq_pgm_resources; + RING_LOCALS; + DRM_DEBUG("\n"); + + /* load shaders */ + vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset); + ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256); + + shader_size = r6xx_vs_size; + for (i = 0; i < shader_size; i++) + vs[i] = r6xx_vs[i]; + shader_size = r6xx_ps_size; + for (i = 0; i < shader_size; i++) + ps[i] = r6xx_ps[i]; + + dev_priv->blit_vb->used = 512; + + gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset; + + /* setup shader regs */ + sq_pgm_resources = (1 << 0); + + BEGIN_RING(9 + 12); + /* VS */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(gpu_addr >> 8); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(sq_pgm_resources); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + /* PS */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((gpu_addr + 256) >> 8); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(sq_pgm_resources | (1 << 28)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(2); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + ADVANCE_RING(); + + cp_set_surface_sync(dev_priv, + R600_SH_ACTION_ENA, 512, gpu_addr); +} + +static inline void +set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) +{ + uint32_t sq_vtx_constant_word2; + RING_LOCALS; + DRM_DEBUG("\n"); + + sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8)); + + BEGIN_RING(9); + OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + OUT_RING(0x460); + OUT_RING(gpu_addr & 0xffffffff); + OUT_RING(48 - 1); + OUT_RING(sq_vtx_constant_word2); + OUT_RING(1 << 0); + OUT_RING(0); + OUT_RING(0); + OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30); + ADVANCE_RING(); + + if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, 48, gpu_addr); + else + cp_set_surface_sync(dev_priv, + R600_VC_ACTION_ENA, 48, gpu_addr); +} + +static inline void +set_tex_resource(drm_radeon_private_t *dev_priv, + int format, int w, int h, int pitch, u64 gpu_addr) +{ + uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; + RING_LOCALS; + DRM_DEBUG("\n"); + + if (h < 1) + h = 1; + + sq_tex_resource_word0 = (1 << 0); + sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | + ((w - 1) << 19)); + + sq_tex_resource_word1 = (format << 26); + sq_tex_resource_word1 |= ((h - 1) << 0); + + sq_tex_resource_word4 = ((1 << 14) | + (0 << 16) | + (1 << 19) | + (2 << 22) | + (3 << 25)); + + BEGIN_RING(9); + OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + OUT_RING(0); + OUT_RING(sq_tex_resource_word0); + OUT_RING(sq_tex_resource_word1); + OUT_RING(gpu_addr >> 8); + OUT_RING(gpu_addr >> 8); + OUT_RING(sq_tex_resource_word4); + OUT_RING(0); + OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30); + ADVANCE_RING(); + +} + +static inline void +set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2) +{ + RING_LOCALS; + DRM_DEBUG("\n"); + + BEGIN_RING(12); + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); + OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((x1 << 0) | (y1 << 16)); + OUT_RING((x2 << 0) | (y2 << 16)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); + OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); + OUT_RING((x2 << 0) | (y2 << 16)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); + OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); + OUT_RING((x2 << 0) | (y2 << 16)); + ADVANCE_RING(); +} + +static inline void +draw_auto(drm_radeon_private_t *dev_priv) +{ + RING_LOCALS; + DRM_DEBUG("\n"); + + BEGIN_RING(10); + OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2); + OUT_RING(DI_PT_RECTLIST); + + OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + OUT_RING(DI_INDEX_SIZE_16_BIT); + + OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + OUT_RING(1); + + OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + OUT_RING(3); + OUT_RING(DI_SRC_SEL_AUTO_INDEX); + + ADVANCE_RING(); + COMMIT_RING(); +} + +static inline void +set_default_state(drm_radeon_private_t *dev_priv) +{ + int default_state_dw, i; + u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; + u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; + int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; + int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; + int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; + RING_LOCALS; + + switch ((dev_priv->flags & RADEON_FAMILY_MASK)) { + case CHIP_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV630: + case CHIP_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + default: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + } + + if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) + sq_config = 0; + else + sq_config = R600_VC_ENABLE; + + sq_config |= (R600_DX9_CONSTS | + R600_ALU_INST_PREFER_VECTOR | + R600_PS_PRIO(0) | + R600_VS_PRIO(1) | + R600_GS_PRIO(2) | + R600_ES_PRIO(3)); + + sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) | + R600_NUM_VS_GPRS(num_vs_gprs) | + R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); + sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) | + R600_NUM_ES_GPRS(num_es_gprs)); + sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) | + R600_NUM_VS_THREADS(num_vs_threads) | + R600_NUM_GS_THREADS(num_gs_threads) | + R600_NUM_ES_THREADS(num_es_threads)); + sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | + R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); + sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | + R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries)); + + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) { + default_state_dw = r7xx_default_size * 4; + BEGIN_RING(default_state_dw + 10); + for (i = 0; i < default_state_dw; i++) + OUT_RING(r7xx_default_state[i]); + } else { + default_state_dw = r6xx_default_size * 4; + BEGIN_RING(default_state_dw + 10); + for (i = 0; i < default_state_dw; i++) + OUT_RING(r6xx_default_state[i]); + } + OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); + OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); + /* SQ config */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6)); + OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2); + OUT_RING(sq_config); + OUT_RING(sq_gpr_resource_mgmt_1); + OUT_RING(sq_gpr_resource_mgmt_2); + OUT_RING(sq_thread_resource_mgmt); + OUT_RING(sq_stack_resource_mgmt_1); + OUT_RING(sq_stack_resource_mgmt_2); + ADVANCE_RING(); +} + +static inline uint32_t i2f(uint32_t input) +{ + u32 result, i, exponent, fraction; + + if ((input & 0x3fff) == 0) + result = 0; /* 0 is a special case */ + else { + exponent = 140; /* exponent biased by 127; */ + fraction = (input & 0x3fff) << 10; /* cheat and only + handle numbers below 2^^15 */ + for (i = 0; i < 14; i++) { + if (fraction & 0x800000) + break; + else { + fraction = fraction << 1; /* keep + shifting left until top bit = 1 */ + exponent = exponent - 1; + } + } + result = exponent << 23 | (fraction & 0x7fffff); /* mask + off top bit; assumed 1 */ + } + return result; +} + + +int r600_nomm_get_vb(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + dev_priv->blit_vb = radeon_freelist_get(dev); + if (!dev_priv->blit_vb) { + DRM_ERROR("Unable to allocate vertex buffer for blit\n"); + return -EAGAIN; + } + return 0; +} + +void r600_nomm_put_vb(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + dev_priv->blit_vb->used = 0; + radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb); +} + +void *r600_nomm_get_vb_ptr(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + return (((char *)dev->agp_buffer_map->handle + + dev_priv->blit_vb->offset + dev_priv->blit_vb->used)); +} + +int +r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + DRM_DEBUG("\n"); + + r600_nomm_get_vb(dev); + + dev_priv->blit_vb->file_priv = file_priv; + + set_default_state(dev_priv); + set_shaders(dev); + + return 0; +} + + +void +r600_done_blit_copy(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + RING_LOCALS; + DRM_DEBUG("\n"); + + BEGIN_RING(5); + OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); + OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); + /* wait for 3D idle clean */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2); + OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN); + + ADVANCE_RING(); + COMMIT_RING(); + + r600_nomm_put_vb(dev); +} + +void +r600_blit_copy(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int size_bytes) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + int max_bytes; + u64 vb_addr; + u32 *vb; + + vb = r600_nomm_get_vb_ptr(dev); + + if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { + max_bytes = 8192; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = src_gpu_addr & 255; + int dst_x = dst_gpu_addr & 255; + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { + + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } + + vb[0] = i2f(dst_x); + vb[1] = 0; + vb[2] = i2f(src_x); + vb[3] = 0; + + vb[4] = i2f(dst_x); + vb[5] = i2f(h); + vb[6] = i2f(src_x); + vb[7] = i2f(h); + + vb[8] = i2f(dst_x + cur_size); + vb[9] = i2f(h); + vb[10] = i2f(src_x + cur_size); + vb[11] = i2f(h); + + /* src */ + set_tex_resource(dev_priv, FMT_8, + src_x + cur_size, h, src_x + cur_size, + src_gpu_addr); + + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst */ + set_render_target(dev_priv, COLOR_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors */ + set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h); + + /* Vertex buffer setup */ + vb_addr = dev_priv->gart_buffers_offset + + dev_priv->blit_vb->offset + + dev_priv->blit_vb->used; + set_vtx_resource(dev_priv, vb_addr); + + /* draw */ + draw_auto(dev_priv); + + cp_set_surface_sync(dev_priv, + R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + vb += 12; + dev_priv->blit_vb->used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } else { + max_bytes = 8192 * 4; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = (src_gpu_addr & 255); + int dst_x = (dst_gpu_addr & 255); + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } + + vb[0] = i2f(dst_x / 4); + vb[1] = 0; + vb[2] = i2f(src_x / 4); + vb[3] = 0; + + vb[4] = i2f(dst_x / 4); + vb[5] = i2f(h); + vb[6] = i2f(src_x / 4); + vb[7] = i2f(h); + + vb[8] = i2f((dst_x + cur_size) / 4); + vb[9] = i2f(h); + vb[10] = i2f((src_x + cur_size) / 4); + vb[11] = i2f(h); + + /* src */ + set_tex_resource(dev_priv, FMT_8_8_8_8, + (src_x + cur_size) / 4, + h, (src_x + cur_size) / 4, + src_gpu_addr); + + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst */ + set_render_target(dev_priv, COLOR_8_8_8_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors */ + set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h); + + /* Vertex buffer setup */ + vb_addr = dev_priv->gart_buffers_offset + + dev_priv->blit_vb->offset + + dev_priv->blit_vb->used; + set_vtx_resource(dev_priv, vb_addr); + + /* draw */ + draw_auto(dev_priv); + + cp_set_surface_sync(dev_priv, + R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + vb += 12; + dev_priv->blit_vb->used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } +} + +void +r600_blit_swap(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int sx, int sy, int dx, int dy, + int w, int h, int src_pitch, int dst_pitch, int cpp) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + int cb_format, tex_format; + u64 vb_addr; + u32 *vb; + + vb = (u32 *) ((char *)dev->agp_buffer_map->handle + + dev_priv->blit_vb->offset + dev_priv->blit_vb->used); + + if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { + + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } + + if (cpp == 4) { + cb_format = COLOR_8_8_8_8; + tex_format = FMT_8_8_8_8; + } else if (cpp == 2) { + cb_format = COLOR_5_6_5; + tex_format = FMT_5_6_5; + } else { + cb_format = COLOR_8; + tex_format = FMT_8; + } + + vb[0] = i2f(dx); + vb[1] = i2f(dy); + vb[2] = i2f(sx); + vb[3] = i2f(sy); + + vb[4] = i2f(dx); + vb[5] = i2f(dy + h); + vb[6] = i2f(sx); + vb[7] = i2f(sy + h); + + vb[8] = i2f(dx + w); + vb[9] = i2f(dy + h); + vb[10] = i2f(sx + w); + vb[11] = i2f(sy + h); + + /* src */ + set_tex_resource(dev_priv, tex_format, + src_pitch / cpp, + sy + h, src_pitch / cpp, + src_gpu_addr); + + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, (src_pitch * (sy + h)), src_gpu_addr); + + /* dst */ + set_render_target(dev_priv, cb_format, + dst_pitch / cpp, dy + h, + dst_gpu_addr); + + /* scissors */ + set_scissors(dev_priv, dx, dy, dx + w, dy + h); + + /* Vertex buffer setup */ + vb_addr = dev_priv->gart_buffers_offset + + dev_priv->blit_vb->offset + + dev_priv->blit_vb->used; + set_vtx_resource(dev_priv, vb_addr); + + /* draw */ + draw_auto(dev_priv); + + cp_set_surface_sync(dev_priv, + R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, + dst_pitch * (dy + h), dst_gpu_addr); + + dev_priv->blit_vb->used += 12 * 4; +} diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c new file mode 100644 index 00000000000..5755647e688 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -0,0 +1,777 @@ +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon.h" + +#include "r600d.h" +#include "r600_blit_shaders.h" + +#define DI_PT_RECTLIST 0x11 +#define DI_INDEX_SIZE_16_BIT 0x0 +#define DI_SRC_SEL_AUTO_INDEX 0x2 + +#define FMT_8 0x1 +#define FMT_5_6_5 0x8 +#define FMT_8_8_8_8 0x1a +#define COLOR_8 0x1 +#define COLOR_5_6_5 0x8 +#define COLOR_8_8_8_8 0x1a + +/* emits 21 on rv770+, 23 on r600 */ +static void +set_render_target(struct radeon_device *rdev, int format, + int w, int h, u64 gpu_addr) +{ + u32 cb_color_info; + int pitch, slice; + + h = (h + 7) & ~7; + if (h < 8) + h = 8; + + cb_color_info = ((format << 2) | (1 << 27)); + pitch = (w / 8) - 1; + slice = ((w * h) / 64) - 1; + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) { + radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0)); + radeon_ring_write(rdev, 2 << 0); + } + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (pitch << 0) | (slice << 10)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, cb_color_info); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); +} + +/* emits 5dw */ +static void +cp_set_surface_sync(struct radeon_device *rdev, + u32 sync_type, u32 size, + u64 mc_addr) +{ + u32 cp_coher_size; + + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(rdev, sync_type); + radeon_ring_write(rdev, cp_coher_size); + radeon_ring_write(rdev, mc_addr >> 8); + radeon_ring_write(rdev, 10); /* poll interval */ +} + +/* emits 21dw + 1 surface sync = 26dw */ +static void +set_shaders(struct radeon_device *rdev) +{ + u64 gpu_addr; + u32 sq_pgm_resources; + + /* setup shader regs */ + sq_pgm_resources = (1 << 0); + + /* VS */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_pgm_resources); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + /* PS */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_pgm_resources | (1 << 28)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 2); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); +} + +/* emits 9 + 1 sync (5) = 14*/ +static void +set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) +{ + u32 sq_vtx_constant_word2; + + sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); + radeon_ring_write(rdev, 0x460); + radeon_ring_write(rdev, gpu_addr & 0xffffffff); + radeon_ring_write(rdev, 48 - 1); + radeon_ring_write(rdev, sq_vtx_constant_word2); + radeon_ring_write(rdev, 1 << 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30); + + if ((rdev->family == CHIP_RV610) || + (rdev->family == CHIP_RV620) || + (rdev->family == CHIP_RS780) || + (rdev->family == CHIP_RS880) || + (rdev->family == CHIP_RV710)) + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, 48, gpu_addr); + else + cp_set_surface_sync(rdev, + PACKET3_VC_ACTION_ENA, 48, gpu_addr); +} + +/* emits 9 */ +static void +set_tex_resource(struct radeon_device *rdev, + int format, int w, int h, int pitch, + u64 gpu_addr) +{ + uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; + + if (h < 1) + h = 1; + + sq_tex_resource_word0 = (1 << 0); + sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | + ((w - 1) << 19)); + + sq_tex_resource_word1 = (format << 26); + sq_tex_resource_word1 |= ((h - 1) << 0); + + sq_tex_resource_word4 = ((1 << 14) | + (0 << 16) | + (1 << 19) | + (2 << 22) | + (3 << 25)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, sq_tex_resource_word0); + radeon_ring_write(rdev, sq_tex_resource_word1); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, sq_tex_resource_word4); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30); +} + +/* emits 12 */ +static void +set_scissors(struct radeon_device *rdev, int x1, int y1, + int x2, int y2) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); +} + +/* emits 10 */ +static void +draw_auto(struct radeon_device *rdev) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, DI_PT_RECTLIST); + + radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0)); + radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT); + + radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0)); + radeon_ring_write(rdev, 1); + + radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); + radeon_ring_write(rdev, 3); + radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX); + +} + +/* emits 14 */ +static void +set_default_state(struct radeon_device *rdev) +{ + u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; + u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; + int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; + int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; + int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; + u64 gpu_addr; + + switch (rdev->family) { + case CHIP_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV630: + case CHIP_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + default: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + } + + if ((rdev->family == CHIP_RV610) || + (rdev->family == CHIP_RV620) || + (rdev->family == CHIP_RS780) || + (rdev->family == CHIP_RS780) || + (rdev->family == CHIP_RV710)) + sq_config = 0; + else + sq_config = VC_ENABLE; + + sq_config |= (DX9_CONSTS | + ALU_INST_PREFER_VECTOR | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | + NUM_VS_GPRS(num_vs_gprs) | + NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | + NUM_ES_GPRS(num_es_gprs)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | + NUM_VS_THREADS(num_vs_threads) | + NUM_GS_THREADS(num_gs_threads) | + NUM_ES_THREADS(num_es_threads)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | + NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | + NUM_ES_STACK_ENTRIES(num_es_stack_entries)); + + /* emit an IB pointing at default state */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC); + radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF); + radeon_ring_write(rdev, (rdev->r600_blit.state_len / 4)); + + radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0)); + radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT); + /* SQ config */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6)); + radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_config); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); + radeon_ring_write(rdev, sq_thread_resource_mgmt); + radeon_ring_write(rdev, sq_stack_resource_mgmt_1); + radeon_ring_write(rdev, sq_stack_resource_mgmt_2); +} + +static inline uint32_t i2f(uint32_t input) +{ + u32 result, i, exponent, fraction; + + if ((input & 0x3fff) == 0) + result = 0; /* 0 is a special case */ + else { + exponent = 140; /* exponent biased by 127; */ + fraction = (input & 0x3fff) << 10; /* cheat and only + handle numbers below 2^^15 */ + for (i = 0; i < 14; i++) { + if (fraction & 0x800000) + break; + else { + fraction = fraction << 1; /* keep + shifting left until top bit = 1 */ + exponent = exponent - 1; + } + } + result = exponent << 23 | (fraction & 0x7fffff); /* mask + off top bit; assumed 1 */ + } + return result; +} + +int r600_blit_init(struct radeon_device *rdev) +{ + u32 obj_size; + int r; + void *ptr; + + rdev->r600_blit.state_offset = 0; + + if (rdev->family >= CHIP_RV770) + rdev->r600_blit.state_len = r7xx_default_size * 4; + else + rdev->r600_blit.state_len = r6xx_default_size * 4; + + obj_size = rdev->r600_blit.state_len; + obj_size = ALIGN(obj_size, 256); + + rdev->r600_blit.vs_offset = obj_size; + obj_size += r6xx_vs_size * 4; + obj_size = ALIGN(obj_size, 256); + + rdev->r600_blit.ps_offset = obj_size; + obj_size += r6xx_ps_size * 4; + obj_size = ALIGN(obj_size, 256); + + r = radeon_object_create(rdev, NULL, obj_size, + true, RADEON_GEM_DOMAIN_VRAM, + false, &rdev->r600_blit.shader_obj); + if (r) { + DRM_ERROR("r600 failed to allocate shader\n"); + return r; + } + + r = radeon_object_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_blit.shader_gpu_addr); + if (r) { + DRM_ERROR("failed to pin blit object %d\n", r); + return r; + } + + DRM_DEBUG("r6xx blit allocated bo @ 0x%16llx %08x vs %08x ps %08x\n", + rdev->r600_blit.shader_gpu_addr, obj_size, + rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset); + + r = radeon_object_kmap(rdev->r600_blit.shader_obj, &ptr); + if (r) { + DRM_ERROR("failed to map blit object %d\n", r); + return r; + } + + if (rdev->family >= CHIP_RV770) + memcpy_toio(ptr + rdev->r600_blit.state_offset, r7xx_default_state, rdev->r600_blit.state_len); + else + memcpy_toio(ptr + rdev->r600_blit.state_offset, r6xx_default_state, rdev->r600_blit.state_len); + + memcpy(ptr + rdev->r600_blit.vs_offset, r6xx_vs, r6xx_vs_size * 4); + memcpy(ptr + rdev->r600_blit.ps_offset, r6xx_ps, r6xx_ps_size * 4); + + radeon_object_kunmap(rdev->r600_blit.shader_obj); + return 0; +} + +void r600_blit_fini(struct radeon_device *rdev) +{ + radeon_object_unpin(rdev->r600_blit.shader_obj); + radeon_object_unref(&rdev->r600_blit.shader_obj); +} + +int r600_vb_ib_get(struct radeon_device *rdev) +{ + int r; + r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib); + if (r) { + DRM_ERROR("failed to get IB for vertex buffer\n"); + return r; + } + + rdev->r600_blit.vb_total = 64*1024; + rdev->r600_blit.vb_used = 0; + return 0; +} + +void r600_vb_ib_put(struct radeon_device *rdev) +{ + mutex_lock(&rdev->ib_pool.mutex); + radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence); + list_add_tail(&rdev->r600_blit.vb_ib->list, &rdev->ib_pool.scheduled_ibs); + mutex_unlock(&rdev->ib_pool.mutex); + radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); +} + +int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) +{ + int r; + int ring_size; + const int max_size = 8192*8192; + + r = r600_vb_ib_get(rdev); + WARN_ON(r); + + /* loops of emits 64 + fence emit possible */ + ring_size = ((size_bytes + max_size) / max_size) * 78; + /* set default + shaders */ + ring_size += 40; /* shaders + def state */ + ring_size += 3; /* fence emit for VB IB */ + ring_size += 5; /* done copy */ + ring_size += 3; /* fence emit for done copy */ + r = radeon_ring_lock(rdev, ring_size); + WARN_ON(r); + + set_default_state(rdev); /* 14 */ + set_shaders(rdev); /* 26 */ + return 0; +} + +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) +{ + int r; + + radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0)); + radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT); + /* wait for 3D idle clean */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit); + + if (rdev->r600_blit.vb_ib) + r600_vb_ib_put(rdev); + + if (fence) + r = radeon_fence_emit(rdev, fence); + + radeon_ring_unlock_commit(rdev); +} + +void r600_kms_blit_copy(struct radeon_device *rdev, + u64 src_gpu_addr, u64 dst_gpu_addr, + int size_bytes) +{ + int max_bytes; + u64 vb_gpu_addr; + u32 *vb; + + DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, + size_bytes, rdev->r600_blit.vb_used); + vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); + if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { + max_bytes = 8192; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = src_gpu_addr & 255; + int dst_x = dst_gpu_addr & 255; + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); + +#if 0 + r600_vb_ib_put(rdev); + + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); +#endif + } + + vb[0] = i2f(dst_x); + vb[1] = 0; + vb[2] = i2f(src_x); + vb[3] = 0; + + vb[4] = i2f(dst_x); + vb[5] = i2f(h); + vb[6] = i2f(src_x); + vb[7] = i2f(h); + + vb[8] = i2f(dst_x + cur_size); + vb[9] = i2f(h); + vb[10] = i2f(src_x + cur_size); + vb[11] = i2f(h); + + /* src 9 */ + set_tex_resource(rdev, FMT_8, + src_x + cur_size, h, src_x + cur_size, + src_gpu_addr); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst 23 */ + set_render_target(rdev, COLOR_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors 12 */ + set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); + + /* 14 */ + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); + + /* draw 10 */ + draw_auto(rdev); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + vb += 12; + rdev->r600_blit.vb_used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } else { + max_bytes = 8192 * 4; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = (src_gpu_addr & 255); + int dst_x = (dst_gpu_addr & 255); + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); + } +#if 0 + if ((rdev->blit_vb->used + 48) > rdev->blit_vb->total) { + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!rdev->blit_vb) + return; + + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } +#endif + + vb[0] = i2f(dst_x / 4); + vb[1] = 0; + vb[2] = i2f(src_x / 4); + vb[3] = 0; + + vb[4] = i2f(dst_x / 4); + vb[5] = i2f(h); + vb[6] = i2f(src_x / 4); + vb[7] = i2f(h); + + vb[8] = i2f((dst_x + cur_size) / 4); + vb[9] = i2f(h); + vb[10] = i2f((src_x + cur_size) / 4); + vb[11] = i2f(h); + + /* src 9 */ + set_tex_resource(rdev, FMT_8_8_8_8, + (src_x + cur_size) / 4, + h, (src_x + cur_size) / 4, + src_gpu_addr); + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst 23 */ + set_render_target(rdev, COLOR_8_8_8_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors 12 */ + set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); + + /* Vertex buffer setup 14 */ + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); + + /* draw 10 */ + draw_auto(rdev); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + /* 78 ring dwords per loop */ + vb += 12; + rdev->r600_blit.vb_used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } +} + diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.c b/drivers/gpu/drm/radeon/r600_blit_shaders.c new file mode 100644 index 00000000000..d745e815c2e --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit_shaders.c @@ -0,0 +1,1072 @@ + +#include +#include + +const u32 r6xx_default_state[] = +{ + 0xc0002400, + 0x00000000, + 0xc0012800, + 0x80000000, + 0x80000000, + 0xc0004600, + 0x00000016, + 0xc0016800, + 0x00000010, + 0x00028000, + 0xc0016800, + 0x00000010, + 0x00008000, + 0xc0016800, + 0x00000542, + 0x07000003, + 0xc0016800, + 0x000005c5, + 0x00000000, + 0xc0016800, + 0x00000363, + 0x00000000, + 0xc0016800, + 0x0000060c, + 0x82000000, + 0xc0016800, + 0x0000060e, + 0x01020204, + 0xc0016f00, + 0x00000000, + 0x00000000, + 0xc0016f00, + 0x00000001, + 0x00000000, + 0xc0096900, + 0x0000022a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000004, + 0x00000000, + 0xc0016900, + 0x0000000a, + 0x00000000, + 0xc0016900, + 0x0000000b, + 0x00000000, + 0xc0016900, + 0x0000010c, + 0x00000000, + 0xc0016900, + 0x0000010d, + 0x00000000, + 0xc0016900, + 0x00000200, + 0x00000000, + 0xc0016900, + 0x00000343, + 0x00000060, + 0xc0016900, + 0x00000344, + 0x00000040, + 0xc0016900, + 0x00000351, + 0x0000aa00, + 0xc0016900, + 0x00000104, + 0x00000000, + 0xc0016900, + 0x0000010e, + 0x00000000, + 0xc0046900, + 0x00000105, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0036900, + 0x00000109, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0046900, + 0x0000030c, + 0x01000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0046900, + 0x00000048, + 0x3f800000, + 0x00000000, + 0x3f800000, + 0x3f800000, + 0xc0016900, + 0x0000008e, + 0x0000000f, + 0xc0016900, + 0x00000080, + 0x00000000, + 0xc0016900, + 0x00000083, + 0x0000ffff, + 0xc0016900, + 0x00000084, + 0x00000000, + 0xc0016900, + 0x00000085, + 0x20002000, + 0xc0016900, + 0x00000086, + 0x00000000, + 0xc0016900, + 0x00000087, + 0x20002000, + 0xc0016900, + 0x00000088, + 0x00000000, + 0xc0016900, + 0x00000089, + 0x20002000, + 0xc0016900, + 0x0000008a, + 0x00000000, + 0xc0016900, + 0x0000008b, + 0x20002000, + 0xc0016900, + 0x0000008c, + 0x00000000, + 0xc0016900, + 0x00000094, + 0x80000000, + 0xc0016900, + 0x00000095, + 0x20002000, + 0xc0026900, + 0x000000b4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000096, + 0x80000000, + 0xc0016900, + 0x00000097, + 0x20002000, + 0xc0026900, + 0x000000b6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000098, + 0x80000000, + 0xc0016900, + 0x00000099, + 0x20002000, + 0xc0026900, + 0x000000b8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009a, + 0x80000000, + 0xc0016900, + 0x0000009b, + 0x20002000, + 0xc0026900, + 0x000000ba, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009c, + 0x80000000, + 0xc0016900, + 0x0000009d, + 0x20002000, + 0xc0026900, + 0x000000bc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009e, + 0x80000000, + 0xc0016900, + 0x0000009f, + 0x20002000, + 0xc0026900, + 0x000000be, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a0, + 0x80000000, + 0xc0016900, + 0x000000a1, + 0x20002000, + 0xc0026900, + 0x000000c0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a2, + 0x80000000, + 0xc0016900, + 0x000000a3, + 0x20002000, + 0xc0026900, + 0x000000c2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a4, + 0x80000000, + 0xc0016900, + 0x000000a5, + 0x20002000, + 0xc0026900, + 0x000000c4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a6, + 0x80000000, + 0xc0016900, + 0x000000a7, + 0x20002000, + 0xc0026900, + 0x000000c6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a8, + 0x80000000, + 0xc0016900, + 0x000000a9, + 0x20002000, + 0xc0026900, + 0x000000c8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000aa, + 0x80000000, + 0xc0016900, + 0x000000ab, + 0x20002000, + 0xc0026900, + 0x000000ca, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ac, + 0x80000000, + 0xc0016900, + 0x000000ad, + 0x20002000, + 0xc0026900, + 0x000000cc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ae, + 0x80000000, + 0xc0016900, + 0x000000af, + 0x20002000, + 0xc0026900, + 0x000000ce, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b0, + 0x80000000, + 0xc0016900, + 0x000000b1, + 0x20002000, + 0xc0026900, + 0x000000d0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b2, + 0x80000000, + 0xc0016900, + 0x000000b3, + 0x20002000, + 0xc0026900, + 0x000000d2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000293, + 0x00004010, + 0xc0016900, + 0x00000300, + 0x00000000, + 0xc0016900, + 0x00000301, + 0x00000000, + 0xc0016900, + 0x00000312, + 0xffffffff, + 0xc0016900, + 0x00000307, + 0x00000000, + 0xc0016900, + 0x00000308, + 0x00000000, + 0xc0016900, + 0x00000283, + 0x00000000, + 0xc0016900, + 0x00000292, + 0x00000000, + 0xc0066900, + 0x0000010f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000000, + 0xc0016900, + 0x00000207, + 0x00000000, + 0xc0016900, + 0x00000208, + 0x00000000, + 0xc0046900, + 0x00000303, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0xc0016900, + 0x00000205, + 0x00000004, + 0xc0016900, + 0x00000280, + 0x00000000, + 0xc0016900, + 0x00000281, + 0x00000000, + 0xc0016900, + 0x0000037e, + 0x00000000, + 0xc0016900, + 0x00000382, + 0x00000000, + 0xc0016900, + 0x00000380, + 0x00000000, + 0xc0016900, + 0x00000383, + 0x00000000, + 0xc0016900, + 0x00000381, + 0x00000000, + 0xc0016900, + 0x00000282, + 0x00000008, + 0xc0016900, + 0x00000302, + 0x0000002d, + 0xc0016900, + 0x0000037f, + 0x00000000, + 0xc0016900, + 0x000001b2, + 0x00000000, + 0xc0016900, + 0x000001b6, + 0x00000000, + 0xc0016900, + 0x000001b7, + 0x00000000, + 0xc0016900, + 0x000001b8, + 0x00000000, + 0xc0016900, + 0x000001b9, + 0x00000000, + 0xc0016900, + 0x00000225, + 0x00000000, + 0xc0016900, + 0x00000229, + 0x00000000, + 0xc0016900, + 0x00000237, + 0x00000000, + 0xc0016900, + 0x00000100, + 0x00000800, + 0xc0016900, + 0x00000101, + 0x00000000, + 0xc0016900, + 0x00000102, + 0x00000000, + 0xc0016900, + 0x000002a8, + 0x00000000, + 0xc0016900, + 0x000002a9, + 0x00000000, + 0xc0016900, + 0x00000103, + 0x00000000, + 0xc0016900, + 0x00000284, + 0x00000000, + 0xc0016900, + 0x00000290, + 0x00000000, + 0xc0016900, + 0x00000285, + 0x00000000, + 0xc0016900, + 0x00000286, + 0x00000000, + 0xc0016900, + 0x00000287, + 0x00000000, + 0xc0016900, + 0x00000288, + 0x00000000, + 0xc0016900, + 0x00000289, + 0x00000000, + 0xc0016900, + 0x0000028a, + 0x00000000, + 0xc0016900, + 0x0000028b, + 0x00000000, + 0xc0016900, + 0x0000028c, + 0x00000000, + 0xc0016900, + 0x0000028d, + 0x00000000, + 0xc0016900, + 0x0000028e, + 0x00000000, + 0xc0016900, + 0x0000028f, + 0x00000000, + 0xc0016900, + 0x000002a1, + 0x00000000, + 0xc0016900, + 0x000002a5, + 0x00000000, + 0xc0016900, + 0x000002ac, + 0x00000000, + 0xc0016900, + 0x000002ad, + 0x00000000, + 0xc0016900, + 0x000002ae, + 0x00000000, + 0xc0016900, + 0x000002c8, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000100, + 0xc0016900, + 0x00000204, + 0x00010000, + 0xc0036e00, + 0x00000000, + 0x00000012, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x0000008f, + 0x0000000f, + 0xc0016900, + 0x000001e8, + 0x00000001, + 0xc0016900, + 0x00000202, + 0x00cc0000, + 0xc0016900, + 0x00000205, + 0x00000244, + 0xc0016900, + 0x00000203, + 0x00000210, + 0xc0016900, + 0x000001b1, + 0x00000000, + 0xc0016900, + 0x00000185, + 0x00000000, + 0xc0016900, + 0x000001b3, + 0x00000001, + 0xc0016900, + 0x000001b4, + 0x00000000, + 0xc0016900, + 0x00000191, + 0x00000b00, + 0xc0016900, + 0x000001b5, + 0x00000000, +}; + +const u32 r7xx_default_state[] = +{ + 0xc0012800, + 0x80000000, + 0x80000000, + 0xc0004600, + 0x00000016, + 0xc0016800, + 0x00000010, + 0x00028000, + 0xc0016800, + 0x00000010, + 0x00008000, + 0xc0016800, + 0x00000542, + 0x07000002, + 0xc0016800, + 0x000005c5, + 0x00000000, + 0xc0016800, + 0x00000363, + 0x00004000, + 0xc0016800, + 0x0000060c, + 0x00000000, + 0xc0016800, + 0x0000060e, + 0x00420204, + 0xc0016f00, + 0x00000000, + 0x00000000, + 0xc0016f00, + 0x00000001, + 0x00000000, + 0xc0096900, + 0x0000022a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000004, + 0x00000000, + 0xc0016900, + 0x0000000a, + 0x00000000, + 0xc0016900, + 0x0000000b, + 0x00000000, + 0xc0016900, + 0x0000010c, + 0x00000000, + 0xc0016900, + 0x0000010d, + 0x00000000, + 0xc0016900, + 0x00000200, + 0x00000000, + 0xc0016900, + 0x00000343, + 0x00000060, + 0xc0016900, + 0x00000344, + 0x00000000, + 0xc0016900, + 0x00000351, + 0x0000aa00, + 0xc0016900, + 0x00000104, + 0x00000000, + 0xc0016900, + 0x0000010e, + 0x00000000, + 0xc0046900, + 0x00000105, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0046900, + 0x0000030c, + 0x01000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x0000008e, + 0x0000000f, + 0xc0016900, + 0x00000080, + 0x00000000, + 0xc0016900, + 0x00000083, + 0x0000ffff, + 0xc0016900, + 0x00000084, + 0x00000000, + 0xc0016900, + 0x00000085, + 0x20002000, + 0xc0016900, + 0x00000086, + 0x00000000, + 0xc0016900, + 0x00000087, + 0x20002000, + 0xc0016900, + 0x00000088, + 0x00000000, + 0xc0016900, + 0x00000089, + 0x20002000, + 0xc0016900, + 0x0000008a, + 0x00000000, + 0xc0016900, + 0x0000008b, + 0x20002000, + 0xc0016900, + 0x0000008c, + 0xaaaaaaaa, + 0xc0016900, + 0x00000094, + 0x80000000, + 0xc0016900, + 0x00000095, + 0x20002000, + 0xc0026900, + 0x000000b4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000096, + 0x80000000, + 0xc0016900, + 0x00000097, + 0x20002000, + 0xc0026900, + 0x000000b6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000098, + 0x80000000, + 0xc0016900, + 0x00000099, + 0x20002000, + 0xc0026900, + 0x000000b8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009a, + 0x80000000, + 0xc0016900, + 0x0000009b, + 0x20002000, + 0xc0026900, + 0x000000ba, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009c, + 0x80000000, + 0xc0016900, + 0x0000009d, + 0x20002000, + 0xc0026900, + 0x000000bc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009e, + 0x80000000, + 0xc0016900, + 0x0000009f, + 0x20002000, + 0xc0026900, + 0x000000be, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a0, + 0x80000000, + 0xc0016900, + 0x000000a1, + 0x20002000, + 0xc0026900, + 0x000000c0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a2, + 0x80000000, + 0xc0016900, + 0x000000a3, + 0x20002000, + 0xc0026900, + 0x000000c2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a4, + 0x80000000, + 0xc0016900, + 0x000000a5, + 0x20002000, + 0xc0026900, + 0x000000c4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a6, + 0x80000000, + 0xc0016900, + 0x000000a7, + 0x20002000, + 0xc0026900, + 0x000000c6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a8, + 0x80000000, + 0xc0016900, + 0x000000a9, + 0x20002000, + 0xc0026900, + 0x000000c8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000aa, + 0x80000000, + 0xc0016900, + 0x000000ab, + 0x20002000, + 0xc0026900, + 0x000000ca, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ac, + 0x80000000, + 0xc0016900, + 0x000000ad, + 0x20002000, + 0xc0026900, + 0x000000cc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ae, + 0x80000000, + 0xc0016900, + 0x000000af, + 0x20002000, + 0xc0026900, + 0x000000ce, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b0, + 0x80000000, + 0xc0016900, + 0x000000b1, + 0x20002000, + 0xc0026900, + 0x000000d0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b2, + 0x80000000, + 0xc0016900, + 0x000000b3, + 0x20002000, + 0xc0026900, + 0x000000d2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000293, + 0x00514000, + 0xc0016900, + 0x00000300, + 0x00000000, + 0xc0016900, + 0x00000301, + 0x00000000, + 0xc0016900, + 0x00000312, + 0xffffffff, + 0xc0016900, + 0x00000307, + 0x00000000, + 0xc0016900, + 0x00000308, + 0x00000000, + 0xc0016900, + 0x00000283, + 0x00000000, + 0xc0016900, + 0x00000292, + 0x00000000, + 0xc0066900, + 0x0000010f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000000, + 0xc0016900, + 0x00000207, + 0x00000000, + 0xc0016900, + 0x00000208, + 0x00000000, + 0xc0046900, + 0x00000303, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0xc0016900, + 0x00000205, + 0x00000004, + 0xc0016900, + 0x00000280, + 0x00000000, + 0xc0016900, + 0x00000281, + 0x00000000, + 0xc0016900, + 0x0000037e, + 0x00000000, + 0xc0016900, + 0x00000382, + 0x00000000, + 0xc0016900, + 0x00000380, + 0x00000000, + 0xc0016900, + 0x00000383, + 0x00000000, + 0xc0016900, + 0x00000381, + 0x00000000, + 0xc0016900, + 0x00000282, + 0x00000008, + 0xc0016900, + 0x00000302, + 0x0000002d, + 0xc0016900, + 0x0000037f, + 0x00000000, + 0xc0016900, + 0x000001b2, + 0x00000001, + 0xc0016900, + 0x000001b6, + 0x00000000, + 0xc0016900, + 0x000001b7, + 0x00000000, + 0xc0016900, + 0x000001b8, + 0x00000000, + 0xc0016900, + 0x000001b9, + 0x00000000, + 0xc0016900, + 0x00000225, + 0x00000000, + 0xc0016900, + 0x00000229, + 0x00000000, + 0xc0016900, + 0x00000237, + 0x00000000, + 0xc0016900, + 0x00000100, + 0x00000800, + 0xc0016900, + 0x00000101, + 0x00000000, + 0xc0016900, + 0x00000102, + 0x00000000, + 0xc0016900, + 0x000002a8, + 0x00000000, + 0xc0016900, + 0x000002a9, + 0x00000000, + 0xc0016900, + 0x00000103, + 0x00000000, + 0xc0016900, + 0x00000284, + 0x00000000, + 0xc0016900, + 0x00000290, + 0x00000000, + 0xc0016900, + 0x00000285, + 0x00000000, + 0xc0016900, + 0x00000286, + 0x00000000, + 0xc0016900, + 0x00000287, + 0x00000000, + 0xc0016900, + 0x00000288, + 0x00000000, + 0xc0016900, + 0x00000289, + 0x00000000, + 0xc0016900, + 0x0000028a, + 0x00000000, + 0xc0016900, + 0x0000028b, + 0x00000000, + 0xc0016900, + 0x0000028c, + 0x00000000, + 0xc0016900, + 0x0000028d, + 0x00000000, + 0xc0016900, + 0x0000028e, + 0x00000000, + 0xc0016900, + 0x0000028f, + 0x00000000, + 0xc0016900, + 0x000002a1, + 0x00000000, + 0xc0016900, + 0x000002a5, + 0x00000000, + 0xc0016900, + 0x000002ac, + 0x00000000, + 0xc0016900, + 0x000002ad, + 0x00000000, + 0xc0016900, + 0x000002ae, + 0x00000000, + 0xc0016900, + 0x000002c8, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000100, + 0xc0016900, + 0x00000204, + 0x00010000, + 0xc0036e00, + 0x00000000, + 0x00000012, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x0000008f, + 0x0000000f, + 0xc0016900, + 0x000001e8, + 0x00000001, + 0xc0016900, + 0x00000202, + 0x00cc0000, + 0xc0016900, + 0x00000205, + 0x00000244, + 0xc0016900, + 0x00000203, + 0x00000210, + 0xc0016900, + 0x000001b1, + 0x00000000, + 0xc0016900, + 0x00000185, + 0x00000000, + 0xc0016900, + 0x000001b3, + 0x00000001, + 0xc0016900, + 0x000001b4, + 0x00000000, + 0xc0016900, + 0x00000191, + 0x00000b00, + 0xc0016900, + 0x000001b5, + 0x00000000, +}; + +/* same for r6xx/r7xx */ +const u32 r6xx_vs[] = +{ + 0x00000004, + 0x81000000, + 0x0000203c, + 0x94000b08, + 0x00004000, + 0x14200b1a, + 0x00000000, + 0x00000000, + 0x3c000000, + 0x68cd1000, + 0x00080000, + 0x00000000, +}; + +const u32 r6xx_ps[] = +{ + 0x00000002, + 0x80800000, + 0x00000000, + 0x94200688, + 0x00000010, + 0x000d1000, + 0xb0800000, + 0x00000000, +}; + +const u32 r6xx_ps_size = ARRAY_SIZE(r6xx_ps); +const u32 r6xx_vs_size = ARRAY_SIZE(r6xx_vs); +const u32 r6xx_default_size = ARRAY_SIZE(r6xx_default_state); +const u32 r7xx_default_size = ARRAY_SIZE(r7xx_default_state); diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/radeon/r600_blit_shaders.h new file mode 100644 index 00000000000..fdc3b378cbb --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit_shaders.h @@ -0,0 +1,14 @@ + +#ifndef R600_BLIT_SHADERS_H +#define R600_BLIT_SHADERS_H + +extern const u32 r6xx_ps[]; +extern const u32 r6xx_vs[]; +extern const u32 r7xx_default_state[]; +extern const u32 r6xx_default_state[]; + + +extern const u32 r6xx_ps_size, r6xx_vs_size; +extern const u32 r6xx_default_size, r7xx_default_size; + +#endif diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 8327912de96..6d5a711c2e9 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -58,6 +58,12 @@ MODULE_FIRMWARE("radeon/RV730_me.bin"); MODULE_FIRMWARE("radeon/RV710_pfp.bin"); MODULE_FIRMWARE("radeon/RV710_me.bin"); + +int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, + unsigned family, u32 *ib, int *l); +void r600_cs_legacy_init(void); + + # define ATI_PCIGART_PAGE_SIZE 4096 /**< PCI GART page size */ # define ATI_PCIGART_PAGE_MASK (~(ATI_PCIGART_PAGE_SIZE-1)) @@ -1857,6 +1863,8 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, DRM_DEBUG("\n"); + mutex_init(&dev_priv->cs_mutex); + r600_cs_legacy_init(); /* if we require new memory map but we don't have it fail */ if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) { DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n"); @@ -1888,7 +1896,7 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, /* Enable vblank on CRTC1 for older X servers */ dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1; - + dev_priv->do_boxes = 0; dev_priv->cp_mode = init->cp_mode; /* We don't support anything other than bus-mastering ring mode, @@ -1974,11 +1982,11 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, } else #endif { - dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset; + dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset; dev_priv->ring_rptr->handle = - (void *)dev_priv->ring_rptr->offset; + (void *)(unsigned long)dev_priv->ring_rptr->offset; dev->agp_buffer_map->handle = - (void *)dev->agp_buffer_map->offset; + (void *)(unsigned long)dev->agp_buffer_map->offset; DRM_DEBUG("dev_priv->cp_ring->handle %p\n", dev_priv->cp_ring->handle); @@ -2282,3 +2290,239 @@ int r600_cp_dispatch_indirect(struct drm_device *dev, return 0; } + +void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_master *master = file_priv->master; + struct drm_radeon_master_private *master_priv = master->driver_priv; + drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv; + int nbox = sarea_priv->nbox; + struct drm_clip_rect *pbox = sarea_priv->boxes; + int i, cpp, src_pitch, dst_pitch; + uint64_t src, dst; + RING_LOCALS; + DRM_DEBUG("\n"); + + if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888) + cpp = 4; + else + cpp = 2; + + if (sarea_priv->pfCurrentPage == 0) { + src_pitch = dev_priv->back_pitch; + dst_pitch = dev_priv->front_pitch; + src = dev_priv->back_offset + dev_priv->fb_location; + dst = dev_priv->front_offset + dev_priv->fb_location; + } else { + src_pitch = dev_priv->front_pitch; + dst_pitch = dev_priv->back_pitch; + src = dev_priv->front_offset + dev_priv->fb_location; + dst = dev_priv->back_offset + dev_priv->fb_location; + } + + if (r600_prepare_blit_copy(dev, file_priv)) { + DRM_ERROR("unable to allocate vertex buffer for swap buffer\n"); + return; + } + for (i = 0; i < nbox; i++) { + int x = pbox[i].x1; + int y = pbox[i].y1; + int w = pbox[i].x2 - x; + int h = pbox[i].y2 - y; + + DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h); + + r600_blit_swap(dev, + src, dst, + x, y, x, y, w, h, + src_pitch, dst_pitch, cpp); + } + r600_done_blit_copy(dev); + + /* Increment the frame counter. The client-side 3D driver must + * throttle the framerate by waiting for this value before + * performing the swapbuffer ioctl. + */ + sarea_priv->last_frame++; + + BEGIN_RING(3); + R600_FRAME_AGE(sarea_priv->last_frame); + ADVANCE_RING(); +} + +int r600_cp_dispatch_texture(struct drm_device *dev, + struct drm_file *file_priv, + drm_radeon_texture_t *tex, + drm_radeon_tex_image_t *image) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_buf *buf; + u32 *buffer; + const u8 __user *data; + int size, pass_size; + u64 src_offset, dst_offset; + + if (!radeon_check_offset(dev_priv, tex->offset)) { + DRM_ERROR("Invalid destination offset\n"); + return -EINVAL; + } + + /* this might fail for zero-sized uploads - are those illegal? */ + if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) { + DRM_ERROR("Invalid final destination offset\n"); + return -EINVAL; + } + + size = tex->height * tex->pitch; + + if (size == 0) + return 0; + + dst_offset = tex->offset; + + if (r600_prepare_blit_copy(dev, file_priv)) { + DRM_ERROR("unable to allocate vertex buffer for swap buffer\n"); + return -EAGAIN; + } + do { + data = (const u8 __user *)image->data; + pass_size = size; + + buf = radeon_freelist_get(dev); + if (!buf) { + DRM_DEBUG("EAGAIN\n"); + if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image))) + return -EFAULT; + return -EAGAIN; + } + + if (pass_size > buf->total) + pass_size = buf->total; + + /* Dispatch the indirect buffer. + */ + buffer = + (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset); + + if (DRM_COPY_FROM_USER(buffer, data, pass_size)) { + DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size); + return -EFAULT; + } + + buf->file_priv = file_priv; + buf->used = pass_size; + src_offset = dev_priv->gart_buffers_offset + buf->offset; + + r600_blit_copy(dev, src_offset, dst_offset, pass_size); + + radeon_cp_discard_buffer(dev, file_priv->master, buf); + + /* Update the input parameters for next time */ + image->data = (const u8 __user *)image->data + pass_size; + dst_offset += pass_size; + size -= pass_size; + } while (size > 0); + r600_done_blit_copy(dev); + + return 0; +} + +/* + * Legacy cs ioctl + */ +static u32 radeon_cs_id_get(struct drm_radeon_private *radeon) +{ + /* FIXME: check if wrap affect last reported wrap & sequence */ + radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF; + if (!radeon->cs_id_scnt) { + /* increment wrap counter */ + radeon->cs_id_wcnt += 0x01000000; + /* valid sequence counter start at 1 */ + radeon->cs_id_scnt = 1; + } + return (radeon->cs_id_scnt | radeon->cs_id_wcnt); +} + +static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id) +{ + RING_LOCALS; + + *id = radeon_cs_id_get(dev_priv); + + /* SCRATCH 2 */ + BEGIN_RING(3); + R600_CLEAR_AGE(*id); + ADVANCE_RING(); + COMMIT_RING(); +} + +static int r600_ib_get(struct drm_device *dev, + struct drm_file *fpriv, + struct drm_buf **buffer) +{ + struct drm_buf *buf; + + *buffer = NULL; + buf = radeon_freelist_get(dev); + if (!buf) { + return -EBUSY; + } + buf->file_priv = fpriv; + *buffer = buf; + return 0; +} + +static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf, + struct drm_file *fpriv, int l, int r) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + if (buf) { + if (!r) + r600_cp_dispatch_indirect(dev, buf, 0, l * 4); + radeon_cp_discard_buffer(dev, fpriv->master, buf); + COMMIT_RING(); + } +} + +int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv) +{ + struct drm_radeon_private *dev_priv = dev->dev_private; + struct drm_radeon_cs *cs = data; + struct drm_buf *buf; + unsigned family; + int l, r = 0; + u32 *ib, cs_id = 0; + + if (dev_priv == NULL) { + DRM_ERROR("called with no initialization\n"); + return -EINVAL; + } + family = dev_priv->flags & RADEON_FAMILY_MASK; + if (family < CHIP_R600) { + DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n"); + return -EINVAL; + } + mutex_lock(&dev_priv->cs_mutex); + /* get ib */ + r = r600_ib_get(dev, fpriv, &buf); + if (r) { + DRM_ERROR("ib_get failed\n"); + goto out; + } + ib = dev->agp_buffer_map->handle + buf->offset; + /* now parse command stream */ + r = r600_cs_legacy(dev, data, fpriv, family, ib, &l); + if (r) { + goto out; + } + +out: + r600_ib_free(dev, buf, fpriv, l, r); + /* emit cs id sequence */ + r600_cs_id_emit(dev_priv, &cs_id); + cs->cs_id = cs_id; + mutex_unlock(&dev_priv->cs_mutex); + return r; +} diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c new file mode 100644 index 00000000000..39bf6349351 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -0,0 +1,658 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#include "drmP.h" +#include "radeon.h" +#include "radeon_share.h" +#include "r600d.h" +#include "avivod.h" + +static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc); +static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc); +typedef int (*next_reloc_t)(struct radeon_cs_parser*, struct radeon_cs_reloc**); +static next_reloc_t r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_mm; + +/** + * r600_cs_packet_parse() - parse cp packet and point ib index to next packet + * @parser: parser structure holding parsing context. + * @pkt: where to store packet informations + * + * Assume that chunk_ib_index is properly set. Will return -EINVAL + * if packet is bigger than remaining ib size. or if packets is unknown. + **/ +int r600_cs_packet_parse(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx) +{ + struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; + uint32_t header; + + if (idx >= ib_chunk->length_dw) { + DRM_ERROR("Can not parse packet at %d after CS end %d !\n", + idx, ib_chunk->length_dw); + return -EINVAL; + } + header = ib_chunk->kdata[idx]; + pkt->idx = idx; + pkt->type = CP_PACKET_GET_TYPE(header); + pkt->count = CP_PACKET_GET_COUNT(header); + pkt->one_reg_wr = 0; + switch (pkt->type) { + case PACKET_TYPE0: + pkt->reg = CP_PACKET0_GET_REG(header); + break; + case PACKET_TYPE3: + pkt->opcode = CP_PACKET3_GET_OPCODE(header); + break; + case PACKET_TYPE2: + pkt->count = -1; + break; + default: + DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); + return -EINVAL; + } + if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { + DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", + pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); + return -EINVAL; + } + return 0; +} + +/** + * r600_cs_packet_next_reloc_mm() - parse next packet which should be reloc packet3 + * @parser: parser structure holding parsing context. + * @data: pointer to relocation data + * @offset_start: starting offset + * @offset_mask: offset mask (to align start offset on) + * @reloc: reloc informations + * + * Check next packet is relocation packet3, do bo validation and compute + * GPU offset using the provided start. + **/ +static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc) +{ + struct radeon_cs_chunk *ib_chunk; + struct radeon_cs_chunk *relocs_chunk; + struct radeon_cs_packet p3reloc; + unsigned idx; + int r; + + if (p->chunk_relocs_idx == -1) { + DRM_ERROR("No relocation chunk !\n"); + return -EINVAL; + } + *cs_reloc = NULL; + ib_chunk = &p->chunks[p->chunk_ib_idx]; + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; + r = r600_cs_packet_parse(p, &p3reloc, p->idx); + if (r) { + return r; + } + p->idx += p3reloc.count + 2; + if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { + DRM_ERROR("No packet3 for relocation for packet at %d.\n", + p3reloc.idx); + return -EINVAL; + } + idx = ib_chunk->kdata[p3reloc.idx + 1]; + if (idx >= relocs_chunk->length_dw) { + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", + idx, relocs_chunk->length_dw); + return -EINVAL; + } + /* FIXME: we assume reloc size is 4 dwords */ + *cs_reloc = p->relocs_ptr[(idx / 4)]; + return 0; +} + +/** + * r600_cs_packet_next_reloc_nomm() - parse next packet which should be reloc packet3 + * @parser: parser structure holding parsing context. + * @data: pointer to relocation data + * @offset_start: starting offset + * @offset_mask: offset mask (to align start offset on) + * @reloc: reloc informations + * + * Check next packet is relocation packet3, do bo validation and compute + * GPU offset using the provided start. + **/ +static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc) +{ + struct radeon_cs_chunk *ib_chunk; + struct radeon_cs_chunk *relocs_chunk; + struct radeon_cs_packet p3reloc; + unsigned idx; + int r; + + if (p->chunk_relocs_idx == -1) { + DRM_ERROR("No relocation chunk !\n"); + return -EINVAL; + } + *cs_reloc = NULL; + ib_chunk = &p->chunks[p->chunk_ib_idx]; + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; + r = r600_cs_packet_parse(p, &p3reloc, p->idx); + if (r) { + return r; + } + p->idx += p3reloc.count + 2; + if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { + DRM_ERROR("No packet3 for relocation for packet at %d.\n", + p3reloc.idx); + return -EINVAL; + } + idx = ib_chunk->kdata[p3reloc.idx + 1]; + if (idx >= relocs_chunk->length_dw) { + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", + idx, relocs_chunk->length_dw); + return -EINVAL; + } + *cs_reloc = &p->relocs[0]; + (*cs_reloc)->lobj.gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32; + (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0]; + return 0; +} + +static int r600_packet0_check(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, unsigned reg) +{ + switch (reg) { + case AVIVO_D1MODE_VLINE_START_END: + case AVIVO_D2MODE_VLINE_START_END: + break; + default: + printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", + reg, idx); + return -EINVAL; + } + return 0; +} + +static int r600_cs_parse_packet0(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt) +{ + unsigned reg, i; + unsigned idx; + int r; + + idx = pkt->idx + 1; + reg = pkt->reg; + for (i = 0; i <= pkt->count; i++, idx++, reg += 4) { + r = r600_packet0_check(p, pkt, idx, reg); + if (r) { + return r; + } + } + return 0; +} + +static int r600_packet3_check(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt) +{ + struct radeon_cs_chunk *ib_chunk; + struct radeon_cs_reloc *reloc; + volatile u32 *ib; + unsigned idx; + unsigned i; + unsigned start_reg, end_reg, reg; + int r; + + ib = p->ib->ptr; + ib_chunk = &p->chunks[p->chunk_ib_idx]; + idx = pkt->idx + 1; + switch (pkt->opcode) { + case PACKET3_START_3D_CMDBUF: + if (p->family >= CHIP_RV770 || pkt->count) { + DRM_ERROR("bad START_3D\n"); + return -EINVAL; + } + break; + case PACKET3_CONTEXT_CONTROL: + if (pkt->count != 1) { + DRM_ERROR("bad CONTEXT_CONTROL\n"); + return -EINVAL; + } + break; + case PACKET3_INDEX_TYPE: + case PACKET3_NUM_INSTANCES: + if (pkt->count) { + DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n"); + return -EINVAL; + } + break; + case PACKET3_DRAW_INDEX: + if (pkt->count != 3) { + DRM_ERROR("bad DRAW_INDEX\n"); + return -EINVAL; + } + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad DRAW_INDEX\n"); + return -EINVAL; + } + ib[idx+0] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+1] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + break; + case PACKET3_DRAW_INDEX_AUTO: + if (pkt->count != 1) { + DRM_ERROR("bad DRAW_INDEX_AUTO\n"); + return -EINVAL; + } + break; + case PACKET3_DRAW_INDEX_IMMD_BE: + case PACKET3_DRAW_INDEX_IMMD: + if (pkt->count < 2) { + DRM_ERROR("bad DRAW_INDEX_IMMD\n"); + return -EINVAL; + } + break; + case PACKET3_WAIT_REG_MEM: + if (pkt->count != 5) { + DRM_ERROR("bad WAIT_REG_MEM\n"); + return -EINVAL; + } + /* bit 4 is reg (0) or mem (1) */ + if (ib_chunk->kdata[idx+0] & 0x10) { + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad WAIT_REG_MEM\n"); + return -EINVAL; + } + ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+2] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + } + break; + case PACKET3_SURFACE_SYNC: + if (pkt->count != 3) { + DRM_ERROR("bad SURFACE_SYNC\n"); + return -EINVAL; + } + /* 0xffffffff/0x0 is flush all cache flag */ + if (ib_chunk->kdata[idx+1] != 0xffffffff || + ib_chunk->kdata[idx+2] != 0) { + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SURFACE_SYNC\n"); + return -EINVAL; + } + ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + } + break; + case PACKET3_EVENT_WRITE: + if (pkt->count != 2 && pkt->count != 0) { + DRM_ERROR("bad EVENT_WRITE\n"); + return -EINVAL; + } + if (pkt->count) { + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad EVENT_WRITE\n"); + return -EINVAL; + } + ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + } + break; + case PACKET3_EVENT_WRITE_EOP: + if (pkt->count != 4) { + DRM_ERROR("bad EVENT_WRITE_EOP\n"); + return -EINVAL; + } + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad EVENT_WRITE\n"); + return -EINVAL; + } + ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + break; + case PACKET3_SET_CONFIG_REG: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONFIG_REG_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) || + (start_reg >= PACKET3_SET_CONFIG_REG_END) || + (end_reg >= PACKET3_SET_CONFIG_REG_END)) { + DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + return -EINVAL; + } + for (i = 0; i < pkt->count; i++) { + reg = start_reg + (4 * i); + switch (reg) { + case CP_COHER_BASE: + /* use PACKET3_SURFACE_SYNC */ + return -EINVAL; + default: + break; + } + } + break; + case PACKET3_SET_CONTEXT_REG: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONTEXT_REG_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) || + (start_reg >= PACKET3_SET_CONTEXT_REG_END) || + (end_reg >= PACKET3_SET_CONTEXT_REG_END)) { + DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n"); + return -EINVAL; + } + for (i = 0; i < pkt->count; i++) { + reg = start_reg + (4 * i); + switch (reg) { + case DB_DEPTH_BASE: + case CB_COLOR0_BASE: + case CB_COLOR1_BASE: + case CB_COLOR2_BASE: + case CB_COLOR3_BASE: + case CB_COLOR4_BASE: + case CB_COLOR5_BASE: + case CB_COLOR6_BASE: + case CB_COLOR7_BASE: + case SQ_PGM_START_FS: + case SQ_PGM_START_ES: + case SQ_PGM_START_VS: + case SQ_PGM_START_GS: + case SQ_PGM_START_PS: + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + ib[idx+1+i] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + break; + case VGT_DMA_BASE: + case VGT_DMA_BASE_HI: + /* These should be handled by DRAW_INDEX packet 3 */ + case VGT_STRMOUT_BASE_OFFSET_0: + case VGT_STRMOUT_BASE_OFFSET_1: + case VGT_STRMOUT_BASE_OFFSET_2: + case VGT_STRMOUT_BASE_OFFSET_3: + case VGT_STRMOUT_BASE_OFFSET_HI_0: + case VGT_STRMOUT_BASE_OFFSET_HI_1: + case VGT_STRMOUT_BASE_OFFSET_HI_2: + case VGT_STRMOUT_BASE_OFFSET_HI_3: + case VGT_STRMOUT_BUFFER_BASE_0: + case VGT_STRMOUT_BUFFER_BASE_1: + case VGT_STRMOUT_BUFFER_BASE_2: + case VGT_STRMOUT_BUFFER_BASE_3: + case VGT_STRMOUT_BUFFER_OFFSET_0: + case VGT_STRMOUT_BUFFER_OFFSET_1: + case VGT_STRMOUT_BUFFER_OFFSET_2: + case VGT_STRMOUT_BUFFER_OFFSET_3: + /* These should be handled by STRMOUT_BUFFER packet 3 */ + DRM_ERROR("bad context reg: 0x%08x\n", reg); + return -EINVAL; + default: + break; + } + } + break; + case PACKET3_SET_RESOURCE: + if (pkt->count % 7) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + start_reg = (ib[idx+0] << 2) + PACKET3_SET_RESOURCE_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) || + (start_reg >= PACKET3_SET_RESOURCE_END) || + (end_reg >= PACKET3_SET_RESOURCE_END)) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + for (i = 0; i < (pkt->count / 7); i++) { + switch (G__SQ_VTX_CONSTANT_TYPE(ib[idx+(i*7)+6+1])) { + case SQ_TEX_VTX_VALID_TEXTURE: + /* tex base */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + ib[idx+1+(i*7)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + /* tex mip base */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + ib[idx+1+(i*7)+3] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + break; + case SQ_TEX_VTX_VALID_BUFFER: + /* vtx base */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + ib[idx+1+(i*7)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff); + ib[idx+1+(i*7)+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + break; + case SQ_TEX_VTX_INVALID_TEXTURE: + case SQ_TEX_VTX_INVALID_BUFFER: + default: + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + } + break; + case PACKET3_SET_ALU_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_ALU_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) || + (start_reg >= PACKET3_SET_ALU_CONST_END) || + (end_reg >= PACKET3_SET_ALU_CONST_END)) { + DRM_ERROR("bad SET_ALU_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_BOOL_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_BOOL_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) || + (start_reg >= PACKET3_SET_BOOL_CONST_END) || + (end_reg >= PACKET3_SET_BOOL_CONST_END)) { + DRM_ERROR("bad SET_BOOL_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_LOOP_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_LOOP_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) || + (start_reg >= PACKET3_SET_LOOP_CONST_END) || + (end_reg >= PACKET3_SET_LOOP_CONST_END)) { + DRM_ERROR("bad SET_LOOP_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_CTL_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_CTL_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) || + (start_reg >= PACKET3_SET_CTL_CONST_END) || + (end_reg >= PACKET3_SET_CTL_CONST_END)) { + DRM_ERROR("bad SET_CTL_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_SAMPLER: + if (pkt->count % 3) { + DRM_ERROR("bad SET_SAMPLER\n"); + return -EINVAL; + } + start_reg = (ib[idx+0] << 2) + PACKET3_SET_SAMPLER_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) || + (start_reg >= PACKET3_SET_SAMPLER_END) || + (end_reg >= PACKET3_SET_SAMPLER_END)) { + DRM_ERROR("bad SET_SAMPLER\n"); + return -EINVAL; + } + break; + case PACKET3_SURFACE_BASE_UPDATE: + if (p->family >= CHIP_RV770 || p->family == CHIP_R600) { + DRM_ERROR("bad SURFACE_BASE_UPDATE\n"); + return -EINVAL; + } + if (pkt->count) { + DRM_ERROR("bad SURFACE_BASE_UPDATE\n"); + return -EINVAL; + } + break; + case PACKET3_NOP: + break; + default: + DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + return -EINVAL; + } + return 0; +} + +int r600_cs_parse(struct radeon_cs_parser *p) +{ + struct radeon_cs_packet pkt; + int r; + + do { + r = r600_cs_packet_parse(p, &pkt, p->idx); + if (r) { + return r; + } + p->idx += pkt.count + 2; + switch (pkt.type) { + case PACKET_TYPE0: + r = r600_cs_parse_packet0(p, &pkt); + break; + case PACKET_TYPE2: + break; + case PACKET_TYPE3: + r = r600_packet3_check(p, &pkt); + break; + default: + DRM_ERROR("Unknown packet type %d !\n", pkt.type); + return -EINVAL; + } + if (r) { + return r; + } + } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); +#if 0 + for (r = 0; r < p->ib->length_dw; r++) { + printk(KERN_INFO "%05d 0x%08X\n", r, p->ib->ptr[r]); + mdelay(1); + } +#endif + return 0; +} + +static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p) +{ + if (p->chunk_relocs_idx == -1) { + return 0; + } + p->relocs = kcalloc(1, sizeof(struct radeon_cs_reloc), GFP_KERNEL); + if (p->relocs == NULL) { + return -ENOMEM; + } + return 0; +} + +/** + * cs_parser_fini() - clean parser states + * @parser: parser structure holding parsing context. + * @error: error number + * + * If error is set than unvalidate buffer, otherwise just free memory + * used by parsing context. + **/ +static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error) +{ + unsigned i; + + kfree(parser->relocs); + for (i = 0; i < parser->nchunks; i++) { + kfree(parser->chunks[i].kdata); + } + kfree(parser->chunks); + kfree(parser->chunks_array); +} + +int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, + unsigned family, u32 *ib, int *l) +{ + struct radeon_cs_parser parser; + struct radeon_cs_chunk *ib_chunk; + struct radeon_ib fake_ib; + int r; + + /* initialize parser */ + memset(&parser, 0, sizeof(struct radeon_cs_parser)); + parser.filp = filp; + parser.rdev = NULL; + parser.family = family; + parser.ib = &fake_ib; + fake_ib.ptr = ib; + r = radeon_cs_parser_init(&parser, data); + if (r) { + DRM_ERROR("Failed to initialize parser !\n"); + r600_cs_parser_fini(&parser, r); + return r; + } + r = r600_cs_parser_relocs_legacy(&parser); + if (r) { + DRM_ERROR("Failed to parse relocation !\n"); + r600_cs_parser_fini(&parser, r); + return r; + } + /* Copy the packet into the IB, the parser will read from the + * input memory (cached) and write to the IB (which can be + * uncached). */ + ib_chunk = &parser.chunks[parser.chunk_ib_idx]; + parser.ib->length_dw = ib_chunk->length_dw; + memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4); + *l = parser.ib->length_dw; + r = r600_cs_parse(&parser); + if (r) { + DRM_ERROR("Invalid command stream !\n"); + r600_cs_parser_fini(&parser, r); + return r; + } + r600_cs_parser_fini(&parser, r); + return r; +} + +void r600_cs_legacy_init(void) +{ + r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; +} diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h new file mode 100644 index 00000000000..723295f5928 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600d.h @@ -0,0 +1,661 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2009 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef R600D_H +#define R600D_H + +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) + +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) + +#define R6XX_MAX_SH_GPRS 256 +#define R6XX_MAX_TEMP_GPRS 16 +#define R6XX_MAX_SH_THREADS 256 +#define R6XX_MAX_SH_STACK_ENTRIES 4096 +#define R6XX_MAX_BACKENDS 8 +#define R6XX_MAX_BACKENDS_MASK 0xff +#define R6XX_MAX_SIMDS 8 +#define R6XX_MAX_SIMDS_MASK 0xff +#define R6XX_MAX_PIPES 8 +#define R6XX_MAX_PIPES_MASK 0xff + +/* PTE flags */ +#define PTE_VALID (1 << 0) +#define PTE_SYSTEM (1 << 1) +#define PTE_SNOOPED (1 << 2) +#define PTE_READABLE (1 << 5) +#define PTE_WRITEABLE (1 << 6) + +/* Registers */ +#define ARB_POP 0x2418 +#define ENABLE_TC128 (1 << 30) +#define ARB_GDEC_RD_CNTL 0x246C + +#define CC_GC_SHADER_PIPE_CONFIG 0x8950 +#define CC_RB_BACKEND_DISABLE 0x98F4 +#define BACKEND_DISABLE(x) ((x) << 16) + +#define CB_COLOR0_BASE 0x28040 +#define CB_COLOR1_BASE 0x28044 +#define CB_COLOR2_BASE 0x28048 +#define CB_COLOR3_BASE 0x2804C +#define CB_COLOR4_BASE 0x28050 +#define CB_COLOR5_BASE 0x28054 +#define CB_COLOR6_BASE 0x28058 +#define CB_COLOR7_BASE 0x2805C +#define CB_COLOR7_FRAG 0x280FC + +#define CB_COLOR0_SIZE 0x28060 +#define CB_COLOR0_VIEW 0x28080 +#define CB_COLOR0_INFO 0x280a0 +#define CB_COLOR0_TILE 0x280c0 +#define CB_COLOR0_FRAG 0x280e0 +#define CB_COLOR0_MASK 0x28100 + +#define CONFIG_MEMSIZE 0x5428 +#define CP_STAT 0x8680 +#define CP_COHER_BASE 0x85F8 +#define CP_DEBUG 0xC1FC +#define R_0086D8_CP_ME_CNTL 0x86D8 +#define S_0086D8_CP_ME_HALT(x) (((x) & 1)<<28) +#define C_0086D8_CP_ME_HALT(x) ((x) & 0xEFFFFFFF) +#define CP_ME_RAM_DATA 0xC160 +#define CP_ME_RAM_RADDR 0xC158 +#define CP_ME_RAM_WADDR 0xC15C +#define CP_MEQ_THRESHOLDS 0x8764 +#define MEQ_END(x) ((x) << 16) +#define ROQ_END(x) ((x) << 24) +#define CP_PERFMON_CNTL 0x87FC +#define CP_PFP_UCODE_ADDR 0xC150 +#define CP_PFP_UCODE_DATA 0xC154 +#define CP_QUEUE_THRESHOLDS 0x8760 +#define ROQ_IB1_START(x) ((x) << 0) +#define ROQ_IB2_START(x) ((x) << 8) +#define CP_RB_BASE 0xC100 +#define CP_RB_CNTL 0xC104 +#define RB_BUFSZ(x) ((x)<<0) +#define RB_BLKSZ(x) ((x)<<8) +#define RB_NO_UPDATE (1<<27) +#define RB_RPTR_WR_ENA (1<<31) +#define BUF_SWAP_32BIT (2 << 16) +#define CP_RB_RPTR 0x8700 +#define CP_RB_RPTR_ADDR 0xC10C +#define CP_RB_RPTR_ADDR_HI 0xC110 +#define CP_RB_RPTR_WR 0xC108 +#define CP_RB_WPTR 0xC114 +#define CP_RB_WPTR_ADDR 0xC118 +#define CP_RB_WPTR_ADDR_HI 0xC11C +#define CP_RB_WPTR_DELAY 0x8704 +#define CP_ROQ_IB1_STAT 0x8784 +#define CP_ROQ_IB2_STAT 0x8788 +#define CP_SEM_WAIT_TIMER 0x85BC + +#define DB_DEBUG 0x9830 +#define PREZ_MUST_WAIT_FOR_POSTZ_DONE (1 << 31) +#define DB_DEPTH_BASE 0x2800C +#define DB_WATERMARKS 0x9838 +#define DEPTH_FREE(x) ((x) << 0) +#define DEPTH_FLUSH(x) ((x) << 5) +#define DEPTH_PENDING_FREE(x) ((x) << 15) +#define DEPTH_CACHELINE_FREE(x) ((x) << 20) + +#define DCP_TILING_CONFIG 0x6CA0 +#define PIPE_TILING(x) ((x) << 1) +#define BANK_TILING(x) ((x) << 4) +#define GROUP_SIZE(x) ((x) << 6) +#define ROW_TILING(x) ((x) << 8) +#define BANK_SWAPS(x) ((x) << 11) +#define SAMPLE_SPLIT(x) ((x) << 14) +#define BACKEND_MAP(x) ((x) << 16) + +#define GB_TILING_CONFIG 0x98F0 + +#define GC_USER_SHADER_PIPE_CONFIG 0x8954 +#define INACTIVE_QD_PIPES(x) ((x) << 8) +#define INACTIVE_QD_PIPES_MASK 0x0000FF00 +#define INACTIVE_SIMDS(x) ((x) << 16) +#define INACTIVE_SIMDS_MASK 0x00FF0000 + +#define SQ_CONFIG 0x8c00 +# define VC_ENABLE (1 << 0) +# define EXPORT_SRC_C (1 << 1) +# define DX9_CONSTS (1 << 2) +# define ALU_INST_PREFER_VECTOR (1 << 3) +# define DX10_CLAMP (1 << 4) +# define CLAUSE_SEQ_PRIO(x) ((x) << 8) +# define PS_PRIO(x) ((x) << 24) +# define VS_PRIO(x) ((x) << 26) +# define GS_PRIO(x) ((x) << 28) +# define ES_PRIO(x) ((x) << 30) +#define SQ_GPR_RESOURCE_MGMT_1 0x8c04 +# define NUM_PS_GPRS(x) ((x) << 0) +# define NUM_VS_GPRS(x) ((x) << 16) +# define NUM_CLAUSE_TEMP_GPRS(x) ((x) << 28) +#define SQ_GPR_RESOURCE_MGMT_2 0x8c08 +# define NUM_GS_GPRS(x) ((x) << 0) +# define NUM_ES_GPRS(x) ((x) << 16) +#define SQ_THREAD_RESOURCE_MGMT 0x8c0c +# define NUM_PS_THREADS(x) ((x) << 0) +# define NUM_VS_THREADS(x) ((x) << 8) +# define NUM_GS_THREADS(x) ((x) << 16) +# define NUM_ES_THREADS(x) ((x) << 24) +#define SQ_STACK_RESOURCE_MGMT_1 0x8c10 +# define NUM_PS_STACK_ENTRIES(x) ((x) << 0) +# define NUM_VS_STACK_ENTRIES(x) ((x) << 16) +#define SQ_STACK_RESOURCE_MGMT_2 0x8c14 +# define NUM_GS_STACK_ENTRIES(x) ((x) << 0) +# define NUM_ES_STACK_ENTRIES(x) ((x) << 16) + +#define GRBM_CNTL 0x8000 +# define GRBM_READ_TIMEOUT(x) ((x) << 0) +#define GRBM_STATUS 0x8010 +#define CMDFIFO_AVAIL_MASK 0x0000001F +#define GUI_ACTIVE (1<<31) +#define GRBM_STATUS2 0x8014 +#define GRBM_SOFT_RESET 0x8020 +#define SOFT_RESET_CP (1<<0) + +#define HDP_HOST_PATH_CNTL 0x2C00 +#define HDP_NONSURFACE_BASE 0x2C04 +#define HDP_NONSURFACE_INFO 0x2C08 +#define HDP_NONSURFACE_SIZE 0x2C0C +#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 +#define HDP_TILING_CONFIG 0x2F3C + +#define MC_VM_AGP_TOP 0x2184 +#define MC_VM_AGP_BOT 0x2188 +#define MC_VM_AGP_BASE 0x218C +#define MC_VM_FB_LOCATION 0x2180 +#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C +#define ENABLE_L1_TLB (1 << 0) +#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1) +#define ENABLE_L1_STRICT_ORDERING (1 << 2) +#define SYSTEM_ACCESS_MODE_MASK 0x000000C0 +#define SYSTEM_ACCESS_MODE_SHIFT 6 +#define SYSTEM_ACCESS_MODE_PA_ONLY (0 << 6) +#define SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 6) +#define SYSTEM_ACCESS_MODE_IN_SYS (2 << 6) +#define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 6) +#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 8) +#define SYSTEM_APERTURE_UNMAPPED_ACCESS_DEFAULT_PAGE (1 << 8) +#define ENABLE_SEMAPHORE_MODE (1 << 10) +#define ENABLE_WAIT_L2_QUERY (1 << 11) +#define EFFECTIVE_L1_TLB_SIZE(x) (((x) & 7) << 12) +#define EFFECTIVE_L1_TLB_SIZE_MASK 0x00007000 +#define EFFECTIVE_L1_TLB_SIZE_SHIFT 12 +#define EFFECTIVE_L1_QUEUE_SIZE(x) (((x) & 7) << 15) +#define EFFECTIVE_L1_QUEUE_SIZE_MASK 0x00038000 +#define EFFECTIVE_L1_QUEUE_SIZE_SHIFT 15 +#define MC_VM_L1_TLB_MCD_RD_B_CNTL 0x21A0 +#define MC_VM_L1_TLB_MCB_RD_GFX_CNTL 0x21FC +#define MC_VM_L1_TLB_MCB_RD_HDP_CNTL 0x2204 +#define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL 0x2208 +#define MC_VM_L1_TLB_MCB_RD_SEM_CNTL 0x220C +#define MC_VM_L1_TLB_MCB_RD_SYS_CNTL 0x2200 +#define MC_VM_L1_TLB_MCD_WR_A_CNTL 0x21A4 +#define MC_VM_L1_TLB_MCD_WR_B_CNTL 0x21A8 +#define MC_VM_L1_TLB_MCB_WR_GFX_CNTL 0x2210 +#define MC_VM_L1_TLB_MCB_WR_HDP_CNTL 0x2218 +#define MC_VM_L1_TLB_MCB_WR_PDMA_CNTL 0x221C +#define MC_VM_L1_TLB_MCB_WR_SEM_CNTL 0x2220 +#define MC_VM_L1_TLB_MCB_WR_SYS_CNTL 0x2214 +#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2190 +#define LOGICAL_PAGE_NUMBER_MASK 0x000FFFFF +#define LOGICAL_PAGE_NUMBER_SHIFT 0 +#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2194 +#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198 + +#define PA_CL_ENHANCE 0x8A14 +#define CLIP_VTX_REORDER_ENA (1 << 0) +#define NUM_CLIP_SEQ(x) ((x) << 1) +#define PA_SC_AA_CONFIG 0x28C04 +#define PA_SC_AA_SAMPLE_LOCS_2S 0x8B40 +#define PA_SC_AA_SAMPLE_LOCS_4S 0x8B44 +#define PA_SC_AA_SAMPLE_LOCS_8S_WD0 0x8B48 +#define PA_SC_AA_SAMPLE_LOCS_8S_WD1 0x8B4C +#define S0_X(x) ((x) << 0) +#define S0_Y(x) ((x) << 4) +#define S1_X(x) ((x) << 8) +#define S1_Y(x) ((x) << 12) +#define S2_X(x) ((x) << 16) +#define S2_Y(x) ((x) << 20) +#define S3_X(x) ((x) << 24) +#define S3_Y(x) ((x) << 28) +#define S4_X(x) ((x) << 0) +#define S4_Y(x) ((x) << 4) +#define S5_X(x) ((x) << 8) +#define S5_Y(x) ((x) << 12) +#define S6_X(x) ((x) << 16) +#define S6_Y(x) ((x) << 20) +#define S7_X(x) ((x) << 24) +#define S7_Y(x) ((x) << 28) +#define PA_SC_CLIPRECT_RULE 0x2820c +#define PA_SC_ENHANCE 0x8BF0 +#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0) +#define FORCE_EOV_MAX_TILE_CNT(x) ((x) << 12) +#define PA_SC_LINE_STIPPLE 0x28A0C +#define PA_SC_LINE_STIPPLE_STATE 0x8B10 +#define PA_SC_MODE_CNTL 0x28A4C +#define PA_SC_MULTI_CHIP_CNTL 0x8B20 + +#define PA_SC_SCREEN_SCISSOR_TL 0x28030 +#define PA_SC_GENERIC_SCISSOR_TL 0x28240 +#define PA_SC_WINDOW_SCISSOR_TL 0x28204 + +#define PCIE_PORT_INDEX 0x0038 +#define PCIE_PORT_DATA 0x003C + +#define RAMCFG 0x2408 +#define NOOFBANK_SHIFT 0 +#define NOOFBANK_MASK 0x00000001 +#define NOOFRANK_SHIFT 1 +#define NOOFRANK_MASK 0x00000002 +#define NOOFROWS_SHIFT 2 +#define NOOFROWS_MASK 0x0000001C +#define NOOFCOLS_SHIFT 5 +#define NOOFCOLS_MASK 0x00000060 +#define CHANSIZE_SHIFT 7 +#define CHANSIZE_MASK 0x00000080 +#define BURSTLENGTH_SHIFT 8 +#define BURSTLENGTH_MASK 0x00000100 +#define CHANSIZE_OVERRIDE (1 << 10) + +#define SCRATCH_REG0 0x8500 +#define SCRATCH_REG1 0x8504 +#define SCRATCH_REG2 0x8508 +#define SCRATCH_REG3 0x850C +#define SCRATCH_REG4 0x8510 +#define SCRATCH_REG5 0x8514 +#define SCRATCH_REG6 0x8518 +#define SCRATCH_REG7 0x851C +#define SCRATCH_UMSK 0x8540 +#define SCRATCH_ADDR 0x8544 + +#define SPI_CONFIG_CNTL 0x9100 +#define GPR_WRITE_PRIORITY(x) ((x) << 0) +#define DISABLE_INTERP_1 (1 << 5) +#define SPI_CONFIG_CNTL_1 0x913C +#define VTX_DONE_DELAY(x) ((x) << 0) +#define INTERP_ONE_PRIM_PER_ROW (1 << 4) +#define SPI_INPUT_Z 0x286D8 +#define SPI_PS_IN_CONTROL_0 0x286CC +#define NUM_INTERP(x) ((x)<<0) +#define POSITION_ENA (1<<8) +#define POSITION_CENTROID (1<<9) +#define POSITION_ADDR(x) ((x)<<10) +#define PARAM_GEN(x) ((x)<<15) +#define PARAM_GEN_ADDR(x) ((x)<<19) +#define BARYC_SAMPLE_CNTL(x) ((x)<<26) +#define PERSP_GRADIENT_ENA (1<<28) +#define LINEAR_GRADIENT_ENA (1<<29) +#define POSITION_SAMPLE (1<<30) +#define BARYC_AT_SAMPLE_ENA (1<<31) +#define SPI_PS_IN_CONTROL_1 0x286D0 +#define GEN_INDEX_PIX (1<<0) +#define GEN_INDEX_PIX_ADDR(x) ((x)<<1) +#define FRONT_FACE_ENA (1<<8) +#define FRONT_FACE_CHAN(x) ((x)<<9) +#define FRONT_FACE_ALL_BITS (1<<11) +#define FRONT_FACE_ADDR(x) ((x)<<12) +#define FOG_ADDR(x) ((x)<<17) +#define FIXED_PT_POSITION_ENA (1<<24) +#define FIXED_PT_POSITION_ADDR(x) ((x)<<25) + +#define SQ_MS_FIFO_SIZES 0x8CF0 +#define CACHE_FIFO_SIZE(x) ((x) << 0) +#define FETCH_FIFO_HIWATER(x) ((x) << 8) +#define DONE_FIFO_HIWATER(x) ((x) << 16) +#define ALU_UPDATE_FIFO_HIWATER(x) ((x) << 24) +#define SQ_PGM_START_ES 0x28880 +#define SQ_PGM_START_FS 0x28894 +#define SQ_PGM_START_GS 0x2886C +#define SQ_PGM_START_PS 0x28840 +#define SQ_PGM_RESOURCES_PS 0x28850 +#define SQ_PGM_EXPORTS_PS 0x28854 +#define SQ_PGM_CF_OFFSET_PS 0x288cc +#define SQ_PGM_START_VS 0x28858 +#define SQ_PGM_RESOURCES_VS 0x28868 +#define SQ_PGM_CF_OFFSET_VS 0x288d0 +#define SQ_VTX_CONSTANT_WORD6_0 0x38018 +#define S__SQ_VTX_CONSTANT_TYPE(x) (((x) & 3) << 30) +#define G__SQ_VTX_CONSTANT_TYPE(x) (((x) >> 30) & 3) +#define SQ_TEX_VTX_INVALID_TEXTURE 0x0 +#define SQ_TEX_VTX_INVALID_BUFFER 0x1 +#define SQ_TEX_VTX_VALID_TEXTURE 0x2 +#define SQ_TEX_VTX_VALID_BUFFER 0x3 + + +#define SX_MISC 0x28350 +#define SX_DEBUG_1 0x9054 +#define SMX_EVENT_RELEASE (1 << 0) +#define ENABLE_NEW_SMX_ADDRESS (1 << 16) + +#define TA_CNTL_AUX 0x9508 +#define DISABLE_CUBE_WRAP (1 << 0) +#define DISABLE_CUBE_ANISO (1 << 1) +#define SYNC_GRADIENT (1 << 24) +#define SYNC_WALKER (1 << 25) +#define SYNC_ALIGNER (1 << 26) +#define BILINEAR_PRECISION_6_BIT (0 << 31) +#define BILINEAR_PRECISION_8_BIT (1 << 31) + +#define TC_CNTL 0x9608 +#define TC_L2_SIZE(x) ((x)<<5) +#define L2_DISABLE_LATE_HIT (1<<9) + + +#define VGT_CACHE_INVALIDATION 0x88C4 +#define CACHE_INVALIDATION(x) ((x)<<0) +#define VC_ONLY 0 +#define TC_ONLY 1 +#define VC_AND_TC 2 +#define VGT_DMA_BASE 0x287E8 +#define VGT_DMA_BASE_HI 0x287E4 +#define VGT_ES_PER_GS 0x88CC +#define VGT_GS_PER_ES 0x88C8 +#define VGT_GS_PER_VS 0x88E8 +#define VGT_GS_VERTEX_REUSE 0x88D4 +#define VGT_PRIMITIVE_TYPE 0x8958 +#define VGT_NUM_INSTANCES 0x8974 +#define VGT_OUT_DEALLOC_CNTL 0x28C5C +#define DEALLOC_DIST_MASK 0x0000007F +#define VGT_STRMOUT_BASE_OFFSET_0 0x28B10 +#define VGT_STRMOUT_BASE_OFFSET_1 0x28B14 +#define VGT_STRMOUT_BASE_OFFSET_2 0x28B18 +#define VGT_STRMOUT_BASE_OFFSET_3 0x28B1c +#define VGT_STRMOUT_BASE_OFFSET_HI_0 0x28B44 +#define VGT_STRMOUT_BASE_OFFSET_HI_1 0x28B48 +#define VGT_STRMOUT_BASE_OFFSET_HI_2 0x28B4c +#define VGT_STRMOUT_BASE_OFFSET_HI_3 0x28B50 +#define VGT_STRMOUT_BUFFER_BASE_0 0x28AD8 +#define VGT_STRMOUT_BUFFER_BASE_1 0x28AE8 +#define VGT_STRMOUT_BUFFER_BASE_2 0x28AF8 +#define VGT_STRMOUT_BUFFER_BASE_3 0x28B08 +#define VGT_STRMOUT_BUFFER_OFFSET_0 0x28ADC +#define VGT_STRMOUT_BUFFER_OFFSET_1 0x28AEC +#define VGT_STRMOUT_BUFFER_OFFSET_2 0x28AFC +#define VGT_STRMOUT_BUFFER_OFFSET_3 0x28B0C +#define VGT_STRMOUT_EN 0x28AB0 +#define VGT_VERTEX_REUSE_BLOCK_CNTL 0x28C58 +#define VTX_REUSE_DEPTH_MASK 0x000000FF +#define VGT_EVENT_INITIATOR 0x28a90 +# define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0) + +#define VM_CONTEXT0_CNTL 0x1410 +#define ENABLE_CONTEXT (1 << 0) +#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) +#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) +#define VM_CONTEXT0_INVALIDATION_LOW_ADDR 0x1490 +#define VM_CONTEXT0_INVALIDATION_HIGH_ADDR 0x14B0 +#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x1574 +#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x1594 +#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x15B4 +#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x1554 +#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 +#define REQUEST_TYPE(x) (((x) & 0xf) << 0) +#define RESPONSE_TYPE_MASK 0x000000F0 +#define RESPONSE_TYPE_SHIFT 4 +#define VM_L2_CNTL 0x1400 +#define ENABLE_L2_CACHE (1 << 0) +#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1) +#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9) +#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 13) +#define VM_L2_CNTL2 0x1404 +#define INVALIDATE_ALL_L1_TLBS (1 << 0) +#define INVALIDATE_L2_CACHE (1 << 1) +#define VM_L2_CNTL3 0x1408 +#define BANK_SELECT_0(x) (((x) & 0x1f) << 0) +#define BANK_SELECT_1(x) (((x) & 0x1f) << 5) +#define L2_CACHE_UPDATE_MODE(x) (((x) & 3) << 10) +#define VM_L2_STATUS 0x140C +#define L2_BUSY (1 << 0) + +#define WAIT_UNTIL 0x8040 +#define WAIT_2D_IDLE_bit (1 << 14) +#define WAIT_3D_IDLE_bit (1 << 15) +#define WAIT_2D_IDLECLEAN_bit (1 << 16) +#define WAIT_3D_IDLECLEAN_bit (1 << 17) + + + +/* + * PM4 + */ +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) +#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ + (((reg) >> 2) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) +#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) + +/* Packet 3 types */ +#define PACKET3_NOP 0x10 +#define PACKET3_INDIRECT_BUFFER_END 0x17 +#define PACKET3_SET_PREDICATION 0x20 +#define PACKET3_REG_RMW 0x21 +#define PACKET3_COND_EXEC 0x22 +#define PACKET3_PRED_EXEC 0x23 +#define PACKET3_START_3D_CMDBUF 0x24 +#define PACKET3_DRAW_INDEX_2 0x27 +#define PACKET3_CONTEXT_CONTROL 0x28 +#define PACKET3_DRAW_INDEX_IMMD_BE 0x29 +#define PACKET3_INDEX_TYPE 0x2A +#define PACKET3_DRAW_INDEX 0x2B +#define PACKET3_DRAW_INDEX_AUTO 0x2D +#define PACKET3_DRAW_INDEX_IMMD 0x2E +#define PACKET3_NUM_INSTANCES 0x2F +#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 +#define PACKET3_INDIRECT_BUFFER_MP 0x38 +#define PACKET3_MEM_SEMAPHORE 0x39 +#define PACKET3_MPEG_INDEX 0x3A +#define PACKET3_WAIT_REG_MEM 0x3C +#define PACKET3_MEM_WRITE 0x3D +#define PACKET3_INDIRECT_BUFFER 0x32 +#define PACKET3_CP_INTERRUPT 0x40 +#define PACKET3_SURFACE_SYNC 0x43 +# define PACKET3_CB0_DEST_BASE_ENA (1 << 6) +# define PACKET3_TC_ACTION_ENA (1 << 23) +# define PACKET3_VC_ACTION_ENA (1 << 24) +# define PACKET3_CB_ACTION_ENA (1 << 25) +# define PACKET3_DB_ACTION_ENA (1 << 26) +# define PACKET3_SH_ACTION_ENA (1 << 27) +# define PACKET3_SMX_ACTION_ENA (1 << 28) +#define PACKET3_ME_INITIALIZE 0x44 +#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) +#define PACKET3_COND_WRITE 0x45 +#define PACKET3_EVENT_WRITE 0x46 +#define PACKET3_EVENT_WRITE_EOP 0x47 +#define PACKET3_ONE_REG_WRITE 0x57 +#define PACKET3_SET_CONFIG_REG 0x68 +#define PACKET3_SET_CONFIG_REG_OFFSET 0x00008000 +#define PACKET3_SET_CONFIG_REG_END 0x0000ac00 +#define PACKET3_SET_CONTEXT_REG 0x69 +#define PACKET3_SET_CONTEXT_REG_OFFSET 0x00028000 +#define PACKET3_SET_CONTEXT_REG_END 0x00029000 +#define PACKET3_SET_ALU_CONST 0x6A +#define PACKET3_SET_ALU_CONST_OFFSET 0x00030000 +#define PACKET3_SET_ALU_CONST_END 0x00032000 +#define PACKET3_SET_BOOL_CONST 0x6B +#define PACKET3_SET_BOOL_CONST_OFFSET 0x0003e380 +#define PACKET3_SET_BOOL_CONST_END 0x00040000 +#define PACKET3_SET_LOOP_CONST 0x6C +#define PACKET3_SET_LOOP_CONST_OFFSET 0x0003e200 +#define PACKET3_SET_LOOP_CONST_END 0x0003e380 +#define PACKET3_SET_RESOURCE 0x6D +#define PACKET3_SET_RESOURCE_OFFSET 0x00038000 +#define PACKET3_SET_RESOURCE_END 0x0003c000 +#define PACKET3_SET_SAMPLER 0x6E +#define PACKET3_SET_SAMPLER_OFFSET 0x0003c000 +#define PACKET3_SET_SAMPLER_END 0x0003cff0 +#define PACKET3_SET_CTL_CONST 0x6F +#define PACKET3_SET_CTL_CONST_OFFSET 0x0003cff0 +#define PACKET3_SET_CTL_CONST_END 0x0003e200 +#define PACKET3_SURFACE_BASE_UPDATE 0x73 + + +#define R_008020_GRBM_SOFT_RESET 0x8020 +#define S_008020_SOFT_RESET_CP(x) (((x) & 1) << 0) +#define S_008020_SOFT_RESET_CB(x) (((x) & 1) << 1) +#define S_008020_SOFT_RESET_CR(x) (((x) & 1) << 2) +#define S_008020_SOFT_RESET_DB(x) (((x) & 1) << 3) +#define S_008020_SOFT_RESET_PA(x) (((x) & 1) << 5) +#define S_008020_SOFT_RESET_SC(x) (((x) & 1) << 6) +#define S_008020_SOFT_RESET_SMX(x) (((x) & 1) << 7) +#define S_008020_SOFT_RESET_SPI(x) (((x) & 1) << 8) +#define S_008020_SOFT_RESET_SH(x) (((x) & 1) << 9) +#define S_008020_SOFT_RESET_SX(x) (((x) & 1) << 10) +#define S_008020_SOFT_RESET_TC(x) (((x) & 1) << 11) +#define S_008020_SOFT_RESET_TA(x) (((x) & 1) << 12) +#define S_008020_SOFT_RESET_VC(x) (((x) & 1) << 13) +#define S_008020_SOFT_RESET_VGT(x) (((x) & 1) << 14) +#define R_008010_GRBM_STATUS 0x8010 +#define S_008010_CMDFIFO_AVAIL(x) (((x) & 0x1F) << 0) +#define S_008010_CP_RQ_PENDING(x) (((x) & 1) << 6) +#define S_008010_CF_RQ_PENDING(x) (((x) & 1) << 7) +#define S_008010_PF_RQ_PENDING(x) (((x) & 1) << 8) +#define S_008010_GRBM_EE_BUSY(x) (((x) & 1) << 10) +#define S_008010_VC_BUSY(x) (((x) & 1) << 11) +#define S_008010_DB03_CLEAN(x) (((x) & 1) << 12) +#define S_008010_CB03_CLEAN(x) (((x) & 1) << 13) +#define S_008010_VGT_BUSY_NO_DMA(x) (((x) & 1) << 16) +#define S_008010_VGT_BUSY(x) (((x) & 1) << 17) +#define S_008010_TA03_BUSY(x) (((x) & 1) << 18) +#define S_008010_TC_BUSY(x) (((x) & 1) << 19) +#define S_008010_SX_BUSY(x) (((x) & 1) << 20) +#define S_008010_SH_BUSY(x) (((x) & 1) << 21) +#define S_008010_SPI03_BUSY(x) (((x) & 1) << 22) +#define S_008010_SMX_BUSY(x) (((x) & 1) << 23) +#define S_008010_SC_BUSY(x) (((x) & 1) << 24) +#define S_008010_PA_BUSY(x) (((x) & 1) << 25) +#define S_008010_DB03_BUSY(x) (((x) & 1) << 26) +#define S_008010_CR_BUSY(x) (((x) & 1) << 27) +#define S_008010_CP_COHERENCY_BUSY(x) (((x) & 1) << 28) +#define S_008010_CP_BUSY(x) (((x) & 1) << 29) +#define S_008010_CB03_BUSY(x) (((x) & 1) << 30) +#define S_008010_GUI_ACTIVE(x) (((x) & 1) << 31) +#define G_008010_CMDFIFO_AVAIL(x) (((x) >> 0) & 0x1F) +#define G_008010_CP_RQ_PENDING(x) (((x) >> 6) & 1) +#define G_008010_CF_RQ_PENDING(x) (((x) >> 7) & 1) +#define G_008010_PF_RQ_PENDING(x) (((x) >> 8) & 1) +#define G_008010_GRBM_EE_BUSY(x) (((x) >> 10) & 1) +#define G_008010_VC_BUSY(x) (((x) >> 11) & 1) +#define G_008010_DB03_CLEAN(x) (((x) >> 12) & 1) +#define G_008010_CB03_CLEAN(x) (((x) >> 13) & 1) +#define G_008010_VGT_BUSY_NO_DMA(x) (((x) >> 16) & 1) +#define G_008010_VGT_BUSY(x) (((x) >> 17) & 1) +#define G_008010_TA03_BUSY(x) (((x) >> 18) & 1) +#define G_008010_TC_BUSY(x) (((x) >> 19) & 1) +#define G_008010_SX_BUSY(x) (((x) >> 20) & 1) +#define G_008010_SH_BUSY(x) (((x) >> 21) & 1) +#define G_008010_SPI03_BUSY(x) (((x) >> 22) & 1) +#define G_008010_SMX_BUSY(x) (((x) >> 23) & 1) +#define G_008010_SC_BUSY(x) (((x) >> 24) & 1) +#define G_008010_PA_BUSY(x) (((x) >> 25) & 1) +#define G_008010_DB03_BUSY(x) (((x) >> 26) & 1) +#define G_008010_CR_BUSY(x) (((x) >> 27) & 1) +#define G_008010_CP_COHERENCY_BUSY(x) (((x) >> 28) & 1) +#define G_008010_CP_BUSY(x) (((x) >> 29) & 1) +#define G_008010_CB03_BUSY(x) (((x) >> 30) & 1) +#define G_008010_GUI_ACTIVE(x) (((x) >> 31) & 1) +#define R_008014_GRBM_STATUS2 0x8014 +#define S_008014_CR_CLEAN(x) (((x) & 1) << 0) +#define S_008014_SMX_CLEAN(x) (((x) & 1) << 1) +#define S_008014_SPI0_BUSY(x) (((x) & 1) << 8) +#define S_008014_SPI1_BUSY(x) (((x) & 1) << 9) +#define S_008014_SPI2_BUSY(x) (((x) & 1) << 10) +#define S_008014_SPI3_BUSY(x) (((x) & 1) << 11) +#define S_008014_TA0_BUSY(x) (((x) & 1) << 12) +#define S_008014_TA1_BUSY(x) (((x) & 1) << 13) +#define S_008014_TA2_BUSY(x) (((x) & 1) << 14) +#define S_008014_TA3_BUSY(x) (((x) & 1) << 15) +#define S_008014_DB0_BUSY(x) (((x) & 1) << 16) +#define S_008014_DB1_BUSY(x) (((x) & 1) << 17) +#define S_008014_DB2_BUSY(x) (((x) & 1) << 18) +#define S_008014_DB3_BUSY(x) (((x) & 1) << 19) +#define S_008014_CB0_BUSY(x) (((x) & 1) << 20) +#define S_008014_CB1_BUSY(x) (((x) & 1) << 21) +#define S_008014_CB2_BUSY(x) (((x) & 1) << 22) +#define S_008014_CB3_BUSY(x) (((x) & 1) << 23) +#define G_008014_CR_CLEAN(x) (((x) >> 0) & 1) +#define G_008014_SMX_CLEAN(x) (((x) >> 1) & 1) +#define G_008014_SPI0_BUSY(x) (((x) >> 8) & 1) +#define G_008014_SPI1_BUSY(x) (((x) >> 9) & 1) +#define G_008014_SPI2_BUSY(x) (((x) >> 10) & 1) +#define G_008014_SPI3_BUSY(x) (((x) >> 11) & 1) +#define G_008014_TA0_BUSY(x) (((x) >> 12) & 1) +#define G_008014_TA1_BUSY(x) (((x) >> 13) & 1) +#define G_008014_TA2_BUSY(x) (((x) >> 14) & 1) +#define G_008014_TA3_BUSY(x) (((x) >> 15) & 1) +#define G_008014_DB0_BUSY(x) (((x) >> 16) & 1) +#define G_008014_DB1_BUSY(x) (((x) >> 17) & 1) +#define G_008014_DB2_BUSY(x) (((x) >> 18) & 1) +#define G_008014_DB3_BUSY(x) (((x) >> 19) & 1) +#define G_008014_CB0_BUSY(x) (((x) >> 20) & 1) +#define G_008014_CB1_BUSY(x) (((x) >> 21) & 1) +#define G_008014_CB2_BUSY(x) (((x) >> 22) & 1) +#define G_008014_CB3_BUSY(x) (((x) >> 23) & 1) +#define R_000E50_SRBM_STATUS 0x0E50 +#define G_000E50_RLC_RQ_PENDING(x) (((x) >> 3) & 1) +#define G_000E50_RCU_RQ_PENDING(x) (((x) >> 4) & 1) +#define G_000E50_GRBM_RQ_PENDING(x) (((x) >> 5) & 1) +#define G_000E50_HI_RQ_PENDING(x) (((x) >> 6) & 1) +#define G_000E50_IO_EXTERN_SIGNAL(x) (((x) >> 7) & 1) +#define G_000E50_VMC_BUSY(x) (((x) >> 8) & 1) +#define G_000E50_MCB_BUSY(x) (((x) >> 9) & 1) +#define G_000E50_MCDZ_BUSY(x) (((x) >> 10) & 1) +#define G_000E50_MCDY_BUSY(x) (((x) >> 11) & 1) +#define G_000E50_MCDX_BUSY(x) (((x) >> 12) & 1) +#define G_000E50_MCDW_BUSY(x) (((x) >> 13) & 1) +#define G_000E50_SEM_BUSY(x) (((x) >> 14) & 1) +#define G_000E50_RLC_BUSY(x) (((x) >> 15) & 1) +#define R_000E60_SRBM_SOFT_RESET 0x0E60 +#define S_000E60_SOFT_RESET_BIF(x) (((x) & 1) << 1) +#define S_000E60_SOFT_RESET_CG(x) (((x) & 1) << 2) +#define S_000E60_SOFT_RESET_CMC(x) (((x) & 1) << 3) +#define S_000E60_SOFT_RESET_CSC(x) (((x) & 1) << 4) +#define S_000E60_SOFT_RESET_DC(x) (((x) & 1) << 5) +#define S_000E60_SOFT_RESET_GRBM(x) (((x) & 1) << 8) +#define S_000E60_SOFT_RESET_HDP(x) (((x) & 1) << 9) +#define S_000E60_SOFT_RESET_IH(x) (((x) & 1) << 10) +#define S_000E60_SOFT_RESET_MC(x) (((x) & 1) << 11) +#define S_000E60_SOFT_RESET_RLC(x) (((x) & 1) << 13) +#define S_000E60_SOFT_RESET_ROM(x) (((x) & 1) << 14) +#define S_000E60_SOFT_RESET_SEM(x) (((x) & 1) << 15) +#define S_000E60_SOFT_RESET_TSC(x) (((x) & 1) << 16) +#define S_000E60_SOFT_RESET_VMC(x) (((x) & 1) << 17) + +#endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e47f2fc294c..3299733ac30 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -50,8 +50,8 @@ #include #include "radeon_mode.h" +#include "radeon_share.h" #include "radeon_reg.h" -#include "r300.h" /* * Modules parameters. @@ -112,10 +112,11 @@ enum radeon_family { CHIP_RV635, CHIP_RV670, CHIP_RS780, + CHIP_RS880, CHIP_RV770, CHIP_RV730, CHIP_RV710, - CHIP_RS880, + CHIP_RV740, CHIP_LAST, }; @@ -152,10 +153,21 @@ struct radeon_device; */ bool radeon_get_bios(struct radeon_device *rdev); + /* - * Clocks + * Dummy page */ +struct radeon_dummy_page { + struct page *page; + dma_addr_t addr; +}; +int radeon_dummy_page_init(struct radeon_device *rdev); +void radeon_dummy_page_fini(struct radeon_device *rdev); + +/* + * Clocks + */ struct radeon_clock { struct radeon_pll p1pll; struct radeon_pll p2pll; @@ -166,6 +178,7 @@ struct radeon_clock { uint32_t default_sclk; }; + /* * Fences. */ @@ -332,14 +345,18 @@ struct radeon_mc { resource_size_t aper_size; resource_size_t aper_base; resource_size_t agp_base; - unsigned gtt_location; - unsigned gtt_size; - unsigned vram_location; /* for some chips with <= 32MB we need to lie * about vram size near mc fb location */ - unsigned mc_vram_size; + u64 mc_vram_size; + u64 gtt_location; + u64 gtt_size; + u64 gtt_start; + u64 gtt_end; + u64 vram_location; + u64 vram_start; + u64 vram_end; unsigned vram_width; - unsigned real_vram_size; + u64 real_vram_size; int vram_mtrr; bool vram_is_ddr; }; @@ -411,6 +428,16 @@ struct radeon_cp { bool ready; }; +struct r600_blit { + struct radeon_object *shader_obj; + u64 shader_gpu_addr; + u32 vs_offset, ps_offset; + u32 state_offset; + u32 state_len; + u32 vb_used, vb_total; + struct radeon_ib *vb_ib; +}; + int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib); @@ -463,6 +490,7 @@ struct radeon_cs_parser { int chunk_relocs_idx; struct radeon_ib *ib; void *track; + unsigned family; }; struct radeon_cs_packet { @@ -559,6 +587,9 @@ int r100_debugfs_cp_init(struct radeon_device *rdev); */ struct radeon_asic { int (*init)(struct radeon_device *rdev); + void (*fini)(struct radeon_device *rdev); + int (*resume)(struct radeon_device *rdev); + int (*suspend)(struct radeon_device *rdev); void (*errata)(struct radeon_device *rdev); void (*vram_info)(struct radeon_device *rdev); int (*gpu_reset)(struct radeon_device *rdev); @@ -573,7 +604,11 @@ struct radeon_asic { int (*cp_init)(struct radeon_device *rdev, unsigned ring_size); void (*cp_fini)(struct radeon_device *rdev); void (*cp_disable)(struct radeon_device *rdev); + void (*cp_commit)(struct radeon_device *rdev); void (*ring_start)(struct radeon_device *rdev); + int (*ring_test)(struct radeon_device *rdev); + void (*ring_ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); + int (*ib_test)(struct radeon_device *rdev); int (*irq_set)(struct radeon_device *rdev); int (*irq_process)(struct radeon_device *rdev); u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc); @@ -613,6 +648,8 @@ struct r100_asic { union radeon_asic_config { struct r300_asic r300; struct r100_asic r100; + struct r600_asic r600; + struct rv770_asic rv770; }; @@ -698,12 +735,16 @@ struct radeon_device { struct radeon_pm pm; struct mutex cs_mutex; struct radeon_wb wb; + struct radeon_dummy_page dummy_page; bool gpu_lockup; bool shutdown; bool suspend; bool need_dma32; + bool new_init_path; struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES]; - const struct firmware *fw; /* firmware */ + const struct firmware *me_fw; /* all family ME firmware */ + const struct firmware *pfp_fw; /* r6/700 PFP firmware */ + struct r600_blit r600_blit; }; int radeon_device_init(struct radeon_device *rdev, @@ -713,6 +754,13 @@ int radeon_device_init(struct radeon_device *rdev, void radeon_device_fini(struct radeon_device *rdev); int radeon_gpu_wait_for_idle(struct radeon_device *rdev); +/* r600 blit */ +int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes); +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence); +void r600_kms_blit_copy(struct radeon_device *rdev, + u64 src_gpu_addr, u64 dst_gpu_addr, + int size_bytes); + static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) { if (reg < 0x10000) @@ -740,6 +788,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32 #define RREG8(reg) readb(((void __iomem *)rdev->rmmio) + (reg)) #define WREG8(reg, v) writeb(v, ((void __iomem *)rdev->rmmio) + (reg)) #define RREG32(reg) r100_mm_rreg(rdev, (reg)) +#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg))) #define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v)) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) @@ -763,6 +812,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32 tmp_ |= ((val) & ~(mask)); \ WREG32_PLL(reg, tmp_); \ } while (0) +#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg))) /* * Indirect registers accessor @@ -827,51 +877,6 @@ void radeon_atombios_fini(struct radeon_device *rdev); /* * RING helpers. */ -#define CP_PACKET0 0x00000000 -#define PACKET0_BASE_INDEX_SHIFT 0 -#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) -#define PACKET0_COUNT_SHIFT 16 -#define PACKET0_COUNT_MASK (0x3fff << 16) -#define CP_PACKET1 0x40000000 -#define CP_PACKET2 0x80000000 -#define PACKET2_PAD_SHIFT 0 -#define PACKET2_PAD_MASK (0x3fffffff << 0) -#define CP_PACKET3 0xC0000000 -#define PACKET3_IT_OPCODE_SHIFT 8 -#define PACKET3_IT_OPCODE_MASK (0xff << 8) -#define PACKET3_COUNT_SHIFT 16 -#define PACKET3_COUNT_MASK (0x3fff << 16) -/* PACKET3 op code */ -#define PACKET3_NOP 0x10 -#define PACKET3_3D_DRAW_VBUF 0x28 -#define PACKET3_3D_DRAW_IMMD 0x29 -#define PACKET3_3D_DRAW_INDX 0x2A -#define PACKET3_3D_LOAD_VBPNTR 0x2F -#define PACKET3_INDX_BUFFER 0x33 -#define PACKET3_3D_DRAW_VBUF_2 0x34 -#define PACKET3_3D_DRAW_IMMD_2 0x35 -#define PACKET3_3D_DRAW_INDX_2 0x36 -#define PACKET3_BITBLT_MULTI 0x9B - -#define PACKET0(reg, n) (CP_PACKET0 | \ - REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ - REG_SET(PACKET0_COUNT, (n))) -#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) -#define PACKET3(op, n) (CP_PACKET3 | \ - REG_SET(PACKET3_IT_OPCODE, (op)) | \ - REG_SET(PACKET3_COUNT, (n))) - -#define PACKET_TYPE0 0 -#define PACKET_TYPE1 1 -#define PACKET_TYPE2 2 -#define PACKET_TYPE3 3 - -#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) -#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) -#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) -#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) -#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) - static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) { #if DRM_DEBUG_CODE @@ -890,6 +895,9 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) * ASICs macro. */ #define radeon_init(rdev) (rdev)->asic->init((rdev)) +#define radeon_fini(rdev) (rdev)->asic->fini((rdev)) +#define radeon_resume(rdev) (rdev)->asic->resume((rdev)) +#define radeon_suspend(rdev) (rdev)->asic->suspend((rdev)) #define radeon_cs_parse(p) rdev->asic->cs_parse((p)) #define radeon_errata(rdev) (rdev)->asic->errata((rdev)) #define radeon_vram_info(rdev) (rdev)->asic->vram_info((rdev)) @@ -905,7 +913,11 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) #define radeon_cp_init(rdev,rsize) (rdev)->asic->cp_init((rdev), (rsize)) #define radeon_cp_fini(rdev) (rdev)->asic->cp_fini((rdev)) #define radeon_cp_disable(rdev) (rdev)->asic->cp_disable((rdev)) +#define radeon_cp_commit(rdev) (rdev)->asic->cp_commit((rdev)) #define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev)) +#define radeon_ring_test(rdev) (rdev)->asic->ring_test((rdev)) +#define radeon_ring_ib_execute(rdev, ib) (rdev)->asic->ring_ib_execute((rdev), (ib)) +#define radeon_ib_test(rdev) (rdev)->asic->ib_test((rdev)) #define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev)) #define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev)) #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index c9cbd8ae1f9..e87bb915a6d 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -60,6 +60,7 @@ int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr); int r100_cp_init(struct radeon_device *rdev, unsigned ring_size); void r100_cp_fini(struct radeon_device *rdev); void r100_cp_disable(struct radeon_device *rdev); +void r100_cp_commit(struct radeon_device *rdev); void r100_ring_start(struct radeon_device *rdev); int r100_irq_set(struct radeon_device *rdev); int r100_irq_process(struct radeon_device *rdev); @@ -78,6 +79,9 @@ int r100_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t offset, uint32_t obj_size); int r100_clear_surface_reg(struct radeon_device *rdev, int reg); void r100_bandwidth_update(struct radeon_device *rdev); +void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +int r100_ib_test(struct radeon_device *rdev); +int r100_ring_test(struct radeon_device *rdev); static struct radeon_asic r100_asic = { .init = &r100_init, @@ -95,7 +99,11 @@ static struct radeon_asic r100_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r100_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -156,7 +164,11 @@ static struct radeon_asic r300_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -197,7 +209,11 @@ static struct radeon_asic r420_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -245,7 +261,11 @@ static struct radeon_asic rs400_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -298,7 +318,11 @@ static struct radeon_asic rs600_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -341,7 +365,11 @@ static struct radeon_asic rs690_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -391,7 +419,11 @@ static struct radeon_asic rv515_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &rv515_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -434,7 +466,11 @@ static struct radeon_asic r520_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &rv515_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -453,9 +489,127 @@ static struct radeon_asic r520_asic = { }; /* - * r600,rv610,rv630,rv620,rv635,rv670,rs780,rv770,rv730,rv710 + * r600,rv610,rv630,rv620,rv635,rv670,rs780,rs880 */ +int r600_init(struct radeon_device *rdev); +void r600_fini(struct radeon_device *rdev); +int r600_suspend(struct radeon_device *rdev); +int r600_resume(struct radeon_device *rdev); +int r600_wb_init(struct radeon_device *rdev); +void r600_wb_fini(struct radeon_device *rdev); +void r600_cp_commit(struct radeon_device *rdev); +void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg); void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); +int r600_cs_parse(struct radeon_cs_parser *p); +void r600_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +int r600_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, + uint64_t dst_offset, + unsigned num_pages, + struct radeon_fence *fence); +int r600_irq_process(struct radeon_device *rdev); +int r600_irq_set(struct radeon_device *rdev); +int r600_gpu_reset(struct radeon_device *rdev); +int r600_set_surface_reg(struct radeon_device *rdev, int reg, + uint32_t tiling_flags, uint32_t pitch, + uint32_t offset, uint32_t obj_size); +int r600_clear_surface_reg(struct radeon_device *rdev, int reg); +void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +int r600_ib_test(struct radeon_device *rdev); +int r600_ring_test(struct radeon_device *rdev); +int r600_copy_blit(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_pages, struct radeon_fence *fence); + +static struct radeon_asic r600_asic = { + .errata = NULL, + .init = &r600_init, + .fini = &r600_fini, + .suspend = &r600_suspend, + .resume = &r600_resume, + .cp_commit = &r600_cp_commit, + .vram_info = NULL, + .gpu_reset = &r600_gpu_reset, + .mc_init = NULL, + .mc_fini = NULL, + .wb_init = &r600_wb_init, + .wb_fini = &r600_wb_fini, + .gart_enable = NULL, + .gart_disable = NULL, + .gart_tlb_flush = &r600_pcie_gart_tlb_flush, + .gart_set_page = &rs600_gart_set_page, + .cp_init = NULL, + .cp_fini = NULL, + .cp_disable = NULL, + .ring_start = NULL, + .ring_test = &r600_ring_test, + .ring_ib_execute = &r600_ring_ib_execute, + .ib_test = &r600_ib_test, + .irq_set = &r600_irq_set, + .irq_process = &r600_irq_process, + .fence_ring_emit = &r600_fence_ring_emit, + .cs_parse = &r600_cs_parse, + .copy_blit = &r600_copy_blit, + .copy_dma = &r600_copy_blit, + .copy = NULL, + .set_engine_clock = &radeon_atom_set_engine_clock, + .set_memory_clock = &radeon_atom_set_memory_clock, + .set_pcie_lanes = NULL, + .set_clock_gating = &radeon_atom_set_clock_gating, + .set_surface_reg = r600_set_surface_reg, + .clear_surface_reg = r600_clear_surface_reg, + .bandwidth_update = &r520_bandwidth_update, +}; + +/* + * rv770,rv730,rv710,rv740 + */ +int rv770_init(struct radeon_device *rdev); +void rv770_fini(struct radeon_device *rdev); +int rv770_suspend(struct radeon_device *rdev); +int rv770_resume(struct radeon_device *rdev); +int rv770_gpu_reset(struct radeon_device *rdev); + +static struct radeon_asic rv770_asic = { + .errata = NULL, + .init = &rv770_init, + .fini = &rv770_fini, + .suspend = &rv770_suspend, + .resume = &rv770_resume, + .cp_commit = &r600_cp_commit, + .vram_info = NULL, + .gpu_reset = &rv770_gpu_reset, + .mc_init = NULL, + .mc_fini = NULL, + .wb_init = &r600_wb_init, + .wb_fini = &r600_wb_fini, + .gart_enable = NULL, + .gart_disable = NULL, + .gart_tlb_flush = &r600_pcie_gart_tlb_flush, + .gart_set_page = &rs600_gart_set_page, + .cp_init = NULL, + .cp_fini = NULL, + .cp_disable = NULL, + .ring_start = NULL, + .ring_test = &r600_ring_test, + .ring_ib_execute = &r600_ring_ib_execute, + .ib_test = &r600_ib_test, + .irq_set = &r600_irq_set, + .irq_process = &r600_irq_process, + .fence_ring_emit = &r600_fence_ring_emit, + .cs_parse = &r600_cs_parse, + .copy_blit = &r600_copy_blit, + .copy_dma = &r600_copy_blit, + .copy = NULL, + .set_engine_clock = &radeon_atom_set_engine_clock, + .set_memory_clock = &radeon_atom_set_memory_clock, + .set_pcie_lanes = NULL, + .set_clock_gating = &radeon_atom_set_clock_gating, + .set_surface_reg = r600_set_surface_reg, + .clear_surface_reg = r600_clear_surface_reg, + .bandwidth_update = &r520_bandwidth_update, +}; #endif diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index bba9b4bd8f5..a8fb392c9cd 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -370,10 +370,6 @@ bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev) && record-> ucRecordType <= ATOM_MAX_OBJECT_RECORD_NUMBER) { - DRM_ERROR - ("record type %d\n", - record-> - ucRecordType); switch (record-> ucRecordType) { case ATOM_I2C_RECORD_TYPE: diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c index a37cbce5318..152eef13197 100644 --- a/drivers/gpu/drm/radeon/radeon_clocks.c +++ b/drivers/gpu/drm/radeon/radeon_clocks.c @@ -102,10 +102,12 @@ void radeon_get_clock_info(struct drm_device *dev) p1pll->reference_div = 12; if (p2pll->reference_div < 2) p2pll->reference_div = 12; - if (spll->reference_div < 2) - spll->reference_div = - RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) & - RADEON_M_SPLL_REF_DIV_MASK; + if (rdev->family < CHIP_RS600) { + if (spll->reference_div < 2) + spll->reference_div = + RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) & + RADEON_M_SPLL_REF_DIV_MASK; + } if (mpll->reference_div < 2) mpll->reference_div = spll->reference_div; } else { diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 7693f7c67bd..f2469c51178 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -37,7 +37,7 @@ /* * Clear GPU surface registers. */ -static void radeon_surface_init(struct radeon_device *rdev) +void radeon_surface_init(struct radeon_device *rdev) { /* FIXME: check this out */ if (rdev->family < CHIP_R600) { @@ -56,7 +56,7 @@ static void radeon_surface_init(struct radeon_device *rdev) /* * GPU scratch registers helpers function. */ -static void radeon_scratch_init(struct radeon_device *rdev) +void radeon_scratch_init(struct radeon_device *rdev) { int i; @@ -156,16 +156,14 @@ int radeon_mc_setup(struct radeon_device *rdev) tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1); rdev->mc.gtt_location = tmp; } - DRM_INFO("radeon: VRAM %uM\n", rdev->mc.real_vram_size >> 20); + DRM_INFO("radeon: VRAM %uM\n", (unsigned)(rdev->mc.mc_vram_size >> 20)); DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n", - rdev->mc.vram_location, - rdev->mc.vram_location + rdev->mc.mc_vram_size - 1); - if (rdev->mc.real_vram_size != rdev->mc.mc_vram_size) - DRM_INFO("radeon: VRAM less than aperture workaround enabled\n"); - DRM_INFO("radeon: GTT %uM\n", rdev->mc.gtt_size >> 20); + (unsigned)rdev->mc.vram_location, + (unsigned)(rdev->mc.vram_location + rdev->mc.mc_vram_size - 1)); + DRM_INFO("radeon: GTT %uM\n", (unsigned)(rdev->mc.gtt_size >> 20)); DRM_INFO("radeon: GTT from 0x%08X to 0x%08X\n", - rdev->mc.gtt_location, - rdev->mc.gtt_location + rdev->mc.gtt_size - 1); + (unsigned)rdev->mc.gtt_location, + (unsigned)(rdev->mc.gtt_location + rdev->mc.gtt_size - 1)); return 0; } @@ -205,6 +203,31 @@ static bool radeon_card_posted(struct radeon_device *rdev) } +int radeon_dummy_page_init(struct radeon_device *rdev) +{ + rdev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO); + if (rdev->dummy_page.page == NULL) + return -ENOMEM; + rdev->dummy_page.addr = pci_map_page(rdev->pdev, rdev->dummy_page.page, + 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (!rdev->dummy_page.addr) { + __free_page(rdev->dummy_page.page); + rdev->dummy_page.page = NULL; + return -ENOMEM; + } + return 0; +} + +void radeon_dummy_page_fini(struct radeon_device *rdev) +{ + if (rdev->dummy_page.page == NULL) + return; + pci_unmap_page(rdev->pdev, rdev->dummy_page.addr, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + __free_page(rdev->dummy_page.page); + rdev->dummy_page.page = NULL; +} + /* * Registers accessors functions. @@ -323,9 +346,15 @@ int radeon_asic_init(struct radeon_device *rdev) case CHIP_RV635: case CHIP_RV670: case CHIP_RS780: + case CHIP_RS880: + rdev->asic = &r600_asic; + break; case CHIP_RV770: case CHIP_RV730: case CHIP_RV710: + case CHIP_RV740: + rdev->asic = &rv770_asic; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -448,7 +477,7 @@ int radeon_device_init(struct radeon_device *rdev, struct pci_dev *pdev, uint32_t flags) { - int r, ret; + int r, ret = 0; int dma_bits; DRM_INFO("radeon: Initializing kernel modesetting.\n"); @@ -487,10 +516,6 @@ int radeon_device_init(struct radeon_device *rdev, if (r) { return r; } - r = radeon_init(rdev); - if (r) { - return r; - } /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. @@ -521,111 +546,118 @@ int radeon_device_init(struct radeon_device *rdev, DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)rdev->rmmio_base); DRM_INFO("register mmio size: %u\n", (unsigned)rdev->rmmio_size); - /* Setup errata flags */ - radeon_errata(rdev); - /* Initialize scratch registers */ - radeon_scratch_init(rdev); - /* Initialize surface registers */ - radeon_surface_init(rdev); - - /* TODO: disable VGA need to use VGA request */ - /* BIOS*/ - if (!radeon_get_bios(rdev)) { - if (ASIC_IS_AVIVO(rdev)) - return -EINVAL; - } - if (rdev->is_atom_bios) { - r = radeon_atombios_init(rdev); + rdev->new_init_path = false; + r = radeon_init(rdev); + if (r) { + return r; + } + if (!rdev->new_init_path) { + /* Setup errata flags */ + radeon_errata(rdev); + /* Initialize scratch registers */ + radeon_scratch_init(rdev); + /* Initialize surface registers */ + radeon_surface_init(rdev); + + /* TODO: disable VGA need to use VGA request */ + /* BIOS*/ + if (!radeon_get_bios(rdev)) { + if (ASIC_IS_AVIVO(rdev)) + return -EINVAL; + } + if (rdev->is_atom_bios) { + r = radeon_atombios_init(rdev); + if (r) { + return r; + } + } else { + r = radeon_combios_init(rdev); + if (r) { + return r; + } + } + /* Reset gpu before posting otherwise ATOM will enter infinite loop */ + if (radeon_gpu_reset(rdev)) { + /* FIXME: what do we want to do here ? */ + } + /* check if cards are posted or not */ + if (!radeon_card_posted(rdev) && rdev->bios) { + DRM_INFO("GPU not posted. posting now...\n"); + if (rdev->is_atom_bios) { + atom_asic_init(rdev->mode_info.atom_context); + } else { + radeon_combios_asic_init(rdev->ddev); + } + } + /* Initialize clocks */ + r = radeon_clocks_init(rdev); if (r) { return r; } - } else { - r = radeon_combios_init(rdev); + /* Get vram informations */ + radeon_vram_info(rdev); + + /* Add an MTRR for the VRAM */ + rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size, + MTRR_TYPE_WRCOMB, 1); + DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n", + (unsigned)(rdev->mc.mc_vram_size >> 20), + (unsigned)(rdev->mc.aper_size >> 20)); + DRM_INFO("RAM width %dbits %cDR\n", + rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S'); + /* Initialize memory controller (also test AGP) */ + r = radeon_mc_init(rdev); if (r) { return r; } - } - /* Reset gpu before posting otherwise ATOM will enter infinite loop */ - if (radeon_gpu_reset(rdev)) { - /* FIXME: what do we want to do here ? */ - } - /* check if cards are posted or not */ - if (!radeon_card_posted(rdev) && rdev->bios) { - DRM_INFO("GPU not posted. posting now...\n"); - if (rdev->is_atom_bios) { - atom_asic_init(rdev->mode_info.atom_context); - } else { - radeon_combios_asic_init(rdev->ddev); - } - } - /* Initialize clocks */ - r = radeon_clocks_init(rdev); - if (r) { - return r; - } - /* Get vram informations */ - radeon_vram_info(rdev); - - /* Add an MTRR for the VRAM */ - rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size, - MTRR_TYPE_WRCOMB, 1); - DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n", - rdev->mc.real_vram_size >> 20, - (unsigned)rdev->mc.aper_size >> 20); - DRM_INFO("RAM width %dbits %cDR\n", - rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S'); - /* Initialize memory controller (also test AGP) */ - r = radeon_mc_init(rdev); - if (r) { - return r; - } - /* Fence driver */ - r = radeon_fence_driver_init(rdev); - if (r) { - return r; - } - r = radeon_irq_kms_init(rdev); - if (r) { - return r; - } - /* Memory manager */ - r = radeon_object_init(rdev); - if (r) { - return r; - } - /* Initialize GART (initialize after TTM so we can allocate - * memory through TTM but finalize after TTM) */ - r = radeon_gart_enable(rdev); - if (!r) { - r = radeon_gem_init(rdev); - } - - /* 1M ring buffer */ - if (!r) { - r = radeon_cp_init(rdev, 1024 * 1024); - } - if (!r) { - r = radeon_wb_init(rdev); + /* Fence driver */ + r = radeon_fence_driver_init(rdev); if (r) { - DRM_ERROR("radeon: failled initializing WB (%d).\n", r); return r; } - } - if (!r) { - r = radeon_ib_pool_init(rdev); + r = radeon_irq_kms_init(rdev); if (r) { - DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); return r; } - } - if (!r) { - r = radeon_ib_test(rdev); + /* Memory manager */ + r = radeon_object_init(rdev); if (r) { - DRM_ERROR("radeon: failled testing IB (%d).\n", r); return r; } + /* Initialize GART (initialize after TTM so we can allocate + * memory through TTM but finalize after TTM) */ + r = radeon_gart_enable(rdev); + if (!r) { + r = radeon_gem_init(rdev); + } + + /* 1M ring buffer */ + if (!r) { + r = radeon_cp_init(rdev, 1024 * 1024); + } + if (!r) { + r = radeon_wb_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing WB (%d).\n", r); + return r; + } + } + if (!r) { + r = radeon_ib_pool_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); + return r; + } + } + if (!r) { + r = radeon_ib_test(rdev); + if (r) { + DRM_ERROR("radeon: failled testing IB (%d).\n", r); + return r; + } + } + ret = r; } - ret = r; r = radeon_modeset_init(rdev); if (r) { return r; @@ -651,26 +683,29 @@ void radeon_device_fini(struct radeon_device *rdev) rdev->shutdown = true; /* Order matter so becarefull if you rearrange anythings */ radeon_modeset_fini(rdev); - radeon_ib_pool_fini(rdev); - radeon_cp_fini(rdev); - radeon_wb_fini(rdev); - radeon_gem_fini(rdev); - radeon_object_fini(rdev); - /* mc_fini must be after object_fini */ - radeon_mc_fini(rdev); + if (!rdev->new_init_path) { + radeon_ib_pool_fini(rdev); + radeon_cp_fini(rdev); + radeon_wb_fini(rdev); + radeon_gem_fini(rdev); + radeon_mc_fini(rdev); #if __OS_HAS_AGP - radeon_agp_fini(rdev); + radeon_agp_fini(rdev); #endif - radeon_irq_kms_fini(rdev); - radeon_fence_driver_fini(rdev); - radeon_clocks_fini(rdev); - if (rdev->is_atom_bios) { - radeon_atombios_fini(rdev); + radeon_irq_kms_fini(rdev); + radeon_fence_driver_fini(rdev); + radeon_clocks_fini(rdev); + radeon_object_fini(rdev); + if (rdev->is_atom_bios) { + radeon_atombios_fini(rdev); + } else { + radeon_combios_fini(rdev); + } + kfree(rdev->bios); + rdev->bios = NULL; } else { - radeon_combios_fini(rdev); + radeon_fini(rdev); } - kfree(rdev->bios); - rdev->bios = NULL; iounmap(rdev->rmmio); rdev->rmmio = NULL; } @@ -708,9 +743,12 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) /* wait for gpu to finish processing current batch */ radeon_fence_wait_last(rdev); - radeon_cp_disable(rdev); - radeon_gart_disable(rdev); - + if (!rdev->new_init_path) { + radeon_cp_disable(rdev); + radeon_gart_disable(rdev); + } else { + radeon_suspend(rdev); + } /* evict remaining vram memory */ radeon_object_evict_vram(rdev); @@ -746,33 +784,37 @@ int radeon_resume_kms(struct drm_device *dev) if (radeon_gpu_reset(rdev)) { /* FIXME: what do we want to do here ? */ } - /* post card */ - if (rdev->is_atom_bios) { - atom_asic_init(rdev->mode_info.atom_context); + if (!rdev->new_init_path) { + /* post card */ + if (rdev->is_atom_bios) { + atom_asic_init(rdev->mode_info.atom_context); + } else { + radeon_combios_asic_init(rdev->ddev); + } + /* Initialize clocks */ + r = radeon_clocks_init(rdev); + if (r) { + release_console_sem(); + return r; + } + /* Enable IRQ */ + rdev->irq.sw_int = true; + radeon_irq_set(rdev); + /* Initialize GPU Memory Controller */ + r = radeon_mc_init(rdev); + if (r) { + goto out; + } + r = radeon_gart_enable(rdev); + if (r) { + goto out; + } + r = radeon_cp_init(rdev, rdev->cp.ring_size); + if (r) { + goto out; + } } else { - radeon_combios_asic_init(rdev->ddev); - } - /* Initialize clocks */ - r = radeon_clocks_init(rdev); - if (r) { - release_console_sem(); - return r; - } - /* Enable IRQ */ - rdev->irq.sw_int = true; - radeon_irq_set(rdev); - /* Initialize GPU Memory Controller */ - r = radeon_mc_init(rdev); - if (r) { - goto out; - } - r = radeon_gart_enable(rdev); - if (r) { - goto out; - } - r = radeon_cp_init(rdev, rdev->cp.ring_size); - if (r) { - goto out; + radeon_resume(rdev); } out: fb_set_suspend(rdev->fbdev_info, 0); diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 40294a07976..c7b185924f6 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -356,6 +356,12 @@ typedef struct drm_radeon_private { int r700_sc_hiz_tile_fifo_size; int r700_sc_earlyz_tile_fifo_fize; + struct mutex cs_mutex; + u32 cs_id_scnt; + u32 cs_id_wcnt; + /* r6xx/r7xx drm blit vertex buffer */ + struct drm_buf *blit_vb; + /* firmware */ const struct firmware *me_fw, *pfp_fw; } drm_radeon_private_t; @@ -396,6 +402,9 @@ static __inline__ int radeon_check_offset(drm_radeon_private_t *dev_priv, (off >= gart_start && off <= gart_end)); } +/* radeon_state.c */ +extern void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf); + /* radeon_cp.c */ extern int radeon_cp_init(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int radeon_cp_start(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -487,6 +496,22 @@ extern int r600_cp_dispatch_indirect(struct drm_device *dev, struct drm_buf *buf, int start, int end); extern int r600_page_table_init(struct drm_device *dev); extern void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info); +extern int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv); +extern void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv); +extern int r600_cp_dispatch_texture(struct drm_device *dev, + struct drm_file *file_priv, + drm_radeon_texture_t *tex, + drm_radeon_tex_image_t *image); +/* r600_blit.c */ +extern int r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv); +extern void r600_done_blit_copy(struct drm_device *dev); +extern void r600_blit_copy(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int size_bytes); +extern void r600_blit_swap(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int sx, int sy, int dx, int dy, + int w, int h, int src_pitch, int dst_pitch, int cpp); /* Flags for stats.boxes */ @@ -1114,13 +1139,71 @@ extern u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index); # define RADEON_CNTL_BITBLT_MULTI 0x00009B00 # define RADEON_CNTL_SET_SCISSORS 0xC0001E00 -# define R600_IT_INDIRECT_BUFFER 0x00003200 -# define R600_IT_ME_INITIALIZE 0x00004400 +# define R600_IT_INDIRECT_BUFFER_END 0x00001700 +# define R600_IT_SET_PREDICATION 0x00002000 +# define R600_IT_REG_RMW 0x00002100 +# define R600_IT_COND_EXEC 0x00002200 +# define R600_IT_PRED_EXEC 0x00002300 +# define R600_IT_START_3D_CMDBUF 0x00002400 +# define R600_IT_DRAW_INDEX_2 0x00002700 +# define R600_IT_CONTEXT_CONTROL 0x00002800 +# define R600_IT_DRAW_INDEX_IMMD_BE 0x00002900 +# define R600_IT_INDEX_TYPE 0x00002A00 +# define R600_IT_DRAW_INDEX 0x00002B00 +# define R600_IT_DRAW_INDEX_AUTO 0x00002D00 +# define R600_IT_DRAW_INDEX_IMMD 0x00002E00 +# define R600_IT_NUM_INSTANCES 0x00002F00 +# define R600_IT_STRMOUT_BUFFER_UPDATE 0x00003400 +# define R600_IT_INDIRECT_BUFFER_MP 0x00003800 +# define R600_IT_MEM_SEMAPHORE 0x00003900 +# define R600_IT_MPEG_INDEX 0x00003A00 +# define R600_IT_WAIT_REG_MEM 0x00003C00 +# define R600_IT_MEM_WRITE 0x00003D00 +# define R600_IT_INDIRECT_BUFFER 0x00003200 +# define R600_IT_CP_INTERRUPT 0x00004000 +# define R600_IT_SURFACE_SYNC 0x00004300 +# define R600_CB0_DEST_BASE_ENA (1 << 6) +# define R600_TC_ACTION_ENA (1 << 23) +# define R600_VC_ACTION_ENA (1 << 24) +# define R600_CB_ACTION_ENA (1 << 25) +# define R600_DB_ACTION_ENA (1 << 26) +# define R600_SH_ACTION_ENA (1 << 27) +# define R600_SMX_ACTION_ENA (1 << 28) +# define R600_IT_ME_INITIALIZE 0x00004400 # define R600_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) -# define R600_IT_EVENT_WRITE 0x00004600 -# define R600_IT_SET_CONFIG_REG 0x00006800 -# define R600_SET_CONFIG_REG_OFFSET 0x00008000 -# define R600_SET_CONFIG_REG_END 0x0000ac00 +# define R600_IT_COND_WRITE 0x00004500 +# define R600_IT_EVENT_WRITE 0x00004600 +# define R600_IT_EVENT_WRITE_EOP 0x00004700 +# define R600_IT_ONE_REG_WRITE 0x00005700 +# define R600_IT_SET_CONFIG_REG 0x00006800 +# define R600_SET_CONFIG_REG_OFFSET 0x00008000 +# define R600_SET_CONFIG_REG_END 0x0000ac00 +# define R600_IT_SET_CONTEXT_REG 0x00006900 +# define R600_SET_CONTEXT_REG_OFFSET 0x00028000 +# define R600_SET_CONTEXT_REG_END 0x00029000 +# define R600_IT_SET_ALU_CONST 0x00006A00 +# define R600_SET_ALU_CONST_OFFSET 0x00030000 +# define R600_SET_ALU_CONST_END 0x00032000 +# define R600_IT_SET_BOOL_CONST 0x00006B00 +# define R600_SET_BOOL_CONST_OFFSET 0x0003e380 +# define R600_SET_BOOL_CONST_END 0x00040000 +# define R600_IT_SET_LOOP_CONST 0x00006C00 +# define R600_SET_LOOP_CONST_OFFSET 0x0003e200 +# define R600_SET_LOOP_CONST_END 0x0003e380 +# define R600_IT_SET_RESOURCE 0x00006D00 +# define R600_SET_RESOURCE_OFFSET 0x00038000 +# define R600_SET_RESOURCE_END 0x0003c000 +# define R600_SQ_TEX_VTX_INVALID_TEXTURE 0x0 +# define R600_SQ_TEX_VTX_INVALID_BUFFER 0x1 +# define R600_SQ_TEX_VTX_VALID_TEXTURE 0x2 +# define R600_SQ_TEX_VTX_VALID_BUFFER 0x3 +# define R600_IT_SET_SAMPLER 0x00006E00 +# define R600_SET_SAMPLER_OFFSET 0x0003c000 +# define R600_SET_SAMPLER_END 0x0003cff0 +# define R600_IT_SET_CTL_CONST 0x00006F00 +# define R600_SET_CTL_CONST_OFFSET 0x0003cff0 +# define R600_SET_CTL_CONST_END 0x0003e200 +# define R600_IT_SURFACE_BASE_UPDATE 0x00007300 #define RADEON_CP_PACKET_MASK 0xC0000000 #define RADEON_CP_PACKET_COUNT_MASK 0x3fff0000 @@ -1598,6 +1681,52 @@ extern u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index); #define R600_CB_COLOR7_BASE 0x2805c #define R600_CB_COLOR7_FRAG 0x280fc +#define R600_CB_COLOR0_SIZE 0x28060 +#define R600_CB_COLOR0_VIEW 0x28080 +#define R600_CB_COLOR0_INFO 0x280a0 +#define R600_CB_COLOR0_TILE 0x280c0 +#define R600_CB_COLOR0_FRAG 0x280e0 +#define R600_CB_COLOR0_MASK 0x28100 + +#define AVIVO_D1MODE_VLINE_START_END 0x6538 +#define AVIVO_D2MODE_VLINE_START_END 0x6d38 +#define R600_CP_COHER_BASE 0x85f8 +#define R600_DB_DEPTH_BASE 0x2800c +#define R600_SQ_PGM_START_FS 0x28894 +#define R600_SQ_PGM_START_ES 0x28880 +#define R600_SQ_PGM_START_VS 0x28858 +#define R600_SQ_PGM_RESOURCES_VS 0x28868 +#define R600_SQ_PGM_CF_OFFSET_VS 0x288d0 +#define R600_SQ_PGM_START_GS 0x2886c +#define R600_SQ_PGM_START_PS 0x28840 +#define R600_SQ_PGM_RESOURCES_PS 0x28850 +#define R600_SQ_PGM_EXPORTS_PS 0x28854 +#define R600_SQ_PGM_CF_OFFSET_PS 0x288cc +#define R600_VGT_DMA_BASE 0x287e8 +#define R600_VGT_DMA_BASE_HI 0x287e4 +#define R600_VGT_STRMOUT_BASE_OFFSET_0 0x28b10 +#define R600_VGT_STRMOUT_BASE_OFFSET_1 0x28b14 +#define R600_VGT_STRMOUT_BASE_OFFSET_2 0x28b18 +#define R600_VGT_STRMOUT_BASE_OFFSET_3 0x28b1c +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_0 0x28b44 +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_1 0x28b48 +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_2 0x28b4c +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_3 0x28b50 +#define R600_VGT_STRMOUT_BUFFER_BASE_0 0x28ad8 +#define R600_VGT_STRMOUT_BUFFER_BASE_1 0x28ae8 +#define R600_VGT_STRMOUT_BUFFER_BASE_2 0x28af8 +#define R600_VGT_STRMOUT_BUFFER_BASE_3 0x28b08 +#define R600_VGT_STRMOUT_BUFFER_OFFSET_0 0x28adc +#define R600_VGT_STRMOUT_BUFFER_OFFSET_1 0x28aec +#define R600_VGT_STRMOUT_BUFFER_OFFSET_2 0x28afc +#define R600_VGT_STRMOUT_BUFFER_OFFSET_3 0x28b0c + +#define R600_VGT_PRIMITIVE_TYPE 0x8958 + +#define R600_PA_SC_SCREEN_SCISSOR_TL 0x28030 +#define R600_PA_SC_GENERIC_SCISSOR_TL 0x28240 +#define R600_PA_SC_WINDOW_SCISSOR_TL 0x28204 + #define R600_TC_CNTL 0x9608 # define R600_TC_L2_SIZE(x) ((x) << 5) # define R600_L2_DISABLE_LATE_HIT (1 << 9) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index b4e48dd2e85..506dd4dd3a2 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -53,9 +53,9 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) * away */ WREG32(rdev->fence_drv.scratch_reg, fence->seq); - } else { + } else radeon_fence_ring_emit(rdev, fence); - } + fence->emited = true; fence->timeout = jiffies + ((2000 * HZ) / 1000); list_del(&fence->list); @@ -168,7 +168,47 @@ bool radeon_fence_signaled(struct radeon_fence *fence) return signaled; } -int radeon_fence_wait(struct radeon_fence *fence, bool interruptible) +int r600_fence_wait(struct radeon_fence *fence, bool intr, bool lazy) +{ + struct radeon_device *rdev; + unsigned long cur_jiffies; + unsigned long timeout; + int ret = 0; + + cur_jiffies = jiffies; + timeout = HZ / 100; + + if (time_after(fence->timeout, cur_jiffies)) { + timeout = fence->timeout - cur_jiffies; + } + + rdev = fence->rdev; + + __set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); + + while (1) { + if (radeon_fence_signaled(fence)) + break; + + if (time_after_eq(jiffies, timeout)) { + ret = -EBUSY; + break; + } + + if (lazy) + schedule_timeout(1); + + if (intr && signal_pending(current)) { + ret = -ERESTART; + break; + } + } + __set_current_state(TASK_RUNNING); + return ret; +} + + +int radeon_fence_wait(struct radeon_fence *fence, bool intr) { struct radeon_device *rdev; unsigned long cur_jiffies; @@ -176,7 +216,6 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible) bool expired = false; int r; - if (fence == NULL) { WARN(1, "Querying an invalid fence : %p !\n", fence); return 0; @@ -185,13 +224,18 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible) if (radeon_fence_signaled(fence)) { return 0; } + + if (rdev->family >= CHIP_R600) + return r600_fence_wait(fence, intr, 0); + retry: cur_jiffies = jiffies; timeout = HZ / 100; if (time_after(fence->timeout, cur_jiffies)) { timeout = fence->timeout - cur_jiffies; } - if (interruptible) { + + if (intr) { r = wait_event_interruptible_timeout(rdev->fence_drv.queue, radeon_fence_signaled(fence), timeout); if (unlikely(r == -ERESTARTSYS)) { diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h index 28be2f1165c..21da871a793 100644 --- a/drivers/gpu/drm/radeon/radeon_reg.h +++ b/drivers/gpu/drm/radeon/radeon_reg.h @@ -3255,6 +3255,24 @@ #define RADEON_CP_RB_WPTR 0x0714 #define RADEON_CP_RB_RPTR_WR 0x071c +#define RADEON_SCRATCH_UMSK 0x0770 +#define RADEON_SCRATCH_ADDR 0x0774 + +#define R600_CP_RB_BASE 0xc100 +#define R600_CP_RB_CNTL 0xc104 +# define R600_RB_BUFSZ(x) ((x) << 0) +# define R600_RB_BLKSZ(x) ((x) << 8) +# define R600_RB_NO_UPDATE (1 << 27) +# define R600_RB_RPTR_WR_ENA (1 << 31) +#define R600_CP_RB_RPTR_WR 0xc108 +#define R600_CP_RB_RPTR_ADDR 0xc10c +#define R600_CP_RB_RPTR_ADDR_HI 0xc110 +#define R600_CP_RB_WPTR 0xc114 +#define R600_CP_RB_WPTR_ADDR 0xc118 +#define R600_CP_RB_WPTR_ADDR_HI 0xc11c +#define R600_CP_RB_RPTR 0x8700 +#define R600_CP_RB_WPTR_DELAY 0x8704 + #define RADEON_CP_IB_BASE 0x0738 #define RADEON_CP_IB_BUFSZ 0x073c diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 60d159308b8..aa9837a6aa7 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -110,7 +110,6 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) return; } list_del(&tmp->list); - INIT_LIST_HEAD(&tmp->list); if (tmp->fence) { radeon_fence_unref(&tmp->fence); } @@ -119,19 +118,11 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) mutex_unlock(&rdev->ib_pool.mutex); } -static void radeon_ib_align(struct radeon_device *rdev, struct radeon_ib *ib) -{ - while ((ib->length_dw & rdev->cp.align_mask)) { - ib->ptr[ib->length_dw++] = PACKET2(0); - } -} - int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) { int r = 0; mutex_lock(&rdev->ib_pool.mutex); - radeon_ib_align(rdev, ib); if (!ib->length_dw || !rdev->cp.ready) { /* TODO: Nothings in the ib we should report. */ mutex_unlock(&rdev->ib_pool.mutex); @@ -145,9 +136,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) mutex_unlock(&rdev->ib_pool.mutex); return r; } - radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1)); - radeon_ring_write(rdev, ib->gpu_addr); - radeon_ring_write(rdev, ib->length_dw); + radeon_ring_ib_execute(rdev, ib); radeon_fence_emit(rdev, ib->fence); radeon_ring_unlock_commit(rdev); list_add_tail(&ib->list, &rdev->ib_pool.scheduled_ibs); @@ -215,69 +204,16 @@ void radeon_ib_pool_fini(struct radeon_device *rdev) mutex_unlock(&rdev->ib_pool.mutex); } -int radeon_ib_test(struct radeon_device *rdev) -{ - struct radeon_ib *ib; - uint32_t scratch; - uint32_t tmp = 0; - unsigned i; - int r; - - r = radeon_scratch_get(rdev, &scratch); - if (r) { - DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - r = radeon_ib_get(rdev, &ib); - if (r) { - return r; - } - ib->ptr[0] = PACKET0(scratch, 0); - ib->ptr[1] = 0xDEADBEEF; - ib->ptr[2] = PACKET2(0); - ib->ptr[3] = PACKET2(0); - ib->ptr[4] = PACKET2(0); - ib->ptr[5] = PACKET2(0); - ib->ptr[6] = PACKET2(0); - ib->ptr[7] = PACKET2(0); - ib->length_dw = 8; - r = radeon_ib_schedule(rdev, ib); - if (r) { - radeon_scratch_free(rdev, scratch); - radeon_ib_free(rdev, &ib); - return r; - } - r = radeon_fence_wait(ib->fence, false); - if (r) { - return r; - } - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - break; - } - DRM_UDELAY(1); - } - if (i < rdev->usec_timeout) { - DRM_INFO("ib test succeeded in %u usecs\n", i); - } else { - DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", - scratch, tmp); - r = -EINVAL; - } - radeon_scratch_free(rdev, scratch); - radeon_ib_free(rdev, &ib); - return r; -} - /* * Ring. */ void radeon_ring_free_size(struct radeon_device *rdev) { - rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); + if (rdev->family >= CHIP_R600) + rdev->cp.rptr = RREG32(R600_CP_RB_RPTR); + else + rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); /* This works because ring_size is a power of 2 */ rdev->cp.ring_free_dw = (rdev->cp.rptr + (rdev->cp.ring_size / 4)); rdev->cp.ring_free_dw -= rdev->cp.wptr; @@ -320,11 +256,10 @@ void radeon_ring_unlock_commit(struct radeon_device *rdev) count_dw_pad = (rdev->cp.align_mask + 1) - (rdev->cp.wptr & rdev->cp.align_mask); for (i = 0; i < count_dw_pad; i++) { - radeon_ring_write(rdev, PACKET2(0)); + radeon_ring_write(rdev, 2 << 30); } DRM_MEMORYBARRIER(); - WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); - (void)RREG32(RADEON_CP_RB_WPTR); + radeon_cp_commit(rdev); mutex_unlock(&rdev->cp.mutex); } @@ -334,46 +269,6 @@ void radeon_ring_unlock_undo(struct radeon_device *rdev) mutex_unlock(&rdev->cp.mutex); } -int radeon_ring_test(struct radeon_device *rdev) -{ - uint32_t scratch; - uint32_t tmp = 0; - unsigned i; - int r; - - r = radeon_scratch_get(rdev, &scratch); - if (r) { - DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - r = radeon_ring_lock(rdev, 2); - if (r) { - DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); - radeon_scratch_free(rdev, scratch); - return r; - } - radeon_ring_write(rdev, PACKET0(scratch, 0)); - radeon_ring_write(rdev, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev); - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - break; - } - DRM_UDELAY(1); - } - if (i < rdev->usec_timeout) { - DRM_INFO("ring test succeeded in %d usecs\n", i); - } else { - DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n", - scratch, tmp); - r = -EINVAL; - } - radeon_scratch_free(rdev, scratch); - return r; -} - int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size) { int r; diff --git a/drivers/gpu/drm/radeon/radeon_share.h b/drivers/gpu/drm/radeon/radeon_share.h index 63a773578f1..5f9e358ab50 100644 --- a/drivers/gpu/drm/radeon/radeon_share.h +++ b/drivers/gpu/drm/radeon/radeon_share.h @@ -28,12 +28,89 @@ #ifndef __RADEON_SHARE_H__ #define __RADEON_SHARE_H__ +/* Common */ +struct radeon_device; +struct radeon_cs_parser; +int radeon_clocks_init(struct radeon_device *rdev); +void radeon_clocks_fini(struct radeon_device *rdev); +void radeon_scratch_init(struct radeon_device *rdev); +void radeon_surface_init(struct radeon_device *rdev); +int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data); + + +/* R100, RV100, RS100, RV200, RS200, R200, RV250, RS300, RV280 */ void r100_vram_init_sizes(struct radeon_device *rdev); + +/* R300, R350, RV350, RV380 */ +struct r300_asic { + const unsigned *reg_safe_bm; + unsigned reg_safe_bm_size; +}; + + +/* RS690, RS740 */ void rs690_line_buffer_adjust(struct radeon_device *rdev, struct drm_display_mode *mode1, struct drm_display_mode *mode2); + +/* RV515 */ void rv515_bandwidth_avivo_update(struct radeon_device *rdev); + +/* R600, RV610, RV630, RV620, RV635, RV670, RS780, RS880 */ +bool r600_card_posted(struct radeon_device *rdev); +void r600_cp_stop(struct radeon_device *rdev); +void r600_ring_init(struct radeon_device *rdev, unsigned ring_size); +int r600_cp_resume(struct radeon_device *rdev); +int r600_count_pipe_bits(uint32_t val); +int r600_gart_clear_page(struct radeon_device *rdev, int i); +int r600_mc_wait_for_idle(struct radeon_device *rdev); +void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); +int r600_ib_test(struct radeon_device *rdev); +int r600_ring_test(struct radeon_device *rdev); +int r600_wb_init(struct radeon_device *rdev); +void r600_wb_fini(struct radeon_device *rdev); +void r600_scratch_init(struct radeon_device *rdev); +int r600_blit_init(struct radeon_device *rdev); +void r600_blit_fini(struct radeon_device *rdev); +int r600_cp_init_microcode(struct radeon_device *rdev); +struct r600_asic { + unsigned max_pipes; + unsigned max_tile_pipes; + unsigned max_simds; + unsigned max_backends; + unsigned max_gprs; + unsigned max_threads; + unsigned max_stack_entries; + unsigned max_hw_contexts; + unsigned max_gs_threads; + unsigned sx_max_export_size; + unsigned sx_max_export_pos_size; + unsigned sx_max_export_smx_size; + unsigned sq_num_cf_insts; +}; + +/* RV770, RV7300, RV710 */ +struct rv770_asic { + unsigned max_pipes; + unsigned max_tile_pipes; + unsigned max_simds; + unsigned max_backends; + unsigned max_gprs; + unsigned max_threads; + unsigned max_stack_entries; + unsigned max_hw_contexts; + unsigned max_gs_threads; + unsigned sx_max_export_size; + unsigned sx_max_export_pos_size; + unsigned sx_max_export_smx_size; + unsigned sq_num_cf_insts; + unsigned sx_num_of_sets; + unsigned sc_prim_fifo_size; + unsigned sc_hiz_tile_fifo_size; + unsigned sc_earlyz_tile_fifo_fize; +}; + #endif diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 2882f40d5ec..aad0c6fafcf 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -1546,7 +1546,7 @@ static void radeon_cp_dispatch_vertex(struct drm_device * dev, } while (i < nbox); } -static void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf) +void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf) { drm_radeon_private_t *dev_priv = dev->dev_private; struct drm_radeon_master_private *master_priv = master->driver_priv; @@ -2213,7 +2213,10 @@ static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *f if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS) sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS; - radeon_cp_dispatch_swap(dev, file_priv->master); + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) + r600_cp_dispatch_swap(dev, file_priv); + else + radeon_cp_dispatch_swap(dev, file_priv->master); sarea_priv->ctx_owner = 0; COMMIT_RING(); @@ -2412,7 +2415,10 @@ static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file RING_SPACE_TEST_WITH_RETURN(dev_priv); VB_AGE_TEST_WITH_RETURN(dev_priv); - ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image); + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) + ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image); + else + ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image); return ret; } @@ -2495,8 +2501,9 @@ static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_fil radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end); } - if (indirect->discard) + if (indirect->discard) { radeon_cp_discard_buffer(dev, file_priv->master, buf); + } COMMIT_RING(); return 0; @@ -3227,7 +3234,8 @@ struct drm_ioctl_desc radeon_ioctls[] = { DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH), DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH), DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH) + DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH) }; int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index dc7a44274ea..acd889c9454 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -376,9 +376,8 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, radeon_move_null(bo, new_mem); return 0; } - if (!rdev->cp.ready) { + if (!rdev->cp.ready || rdev->asic->copy == NULL) { /* use memcpy */ - DRM_ERROR("CP is not ready use memcpy.\n"); goto memcpy; } @@ -495,7 +494,7 @@ int radeon_ttm_init(struct radeon_device *rdev) return r; } DRM_INFO("radeon: %uM of VRAM memory ready\n", - rdev->mc.real_vram_size / (1024 * 1024)); + (unsigned)rdev->mc.real_vram_size / (1024 * 1024)); r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_TT, 0, ((rdev->mc.gtt_size) >> PAGE_SHIFT)); if (r) { @@ -503,7 +502,7 @@ int radeon_ttm_init(struct radeon_device *rdev) return r; } DRM_INFO("radeon: %uM of GTT memory ready.\n", - rdev->mc.gtt_size / (1024 * 1024)); + (unsigned)(rdev->mc.gtt_size / (1024 * 1024))); if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) { rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping; } diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index b29affd9c5d..8c3ea7e3606 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -63,7 +63,7 @@ void rs400_gart_adjust_size(struct radeon_device *rdev) break; default: DRM_ERROR("Unable to use IGP GART size %uM\n", - rdev->mc.gtt_size >> 20); + (unsigned)(rdev->mc.gtt_size >> 20)); DRM_ERROR("Valid GART size for IGP are 32M,64M,128M,256M,512M,1G,2G\n"); DRM_ERROR("Forcing to 32M GART size\n"); rdev->mc.gtt_size = 32 * 1024 * 1024; diff --git a/drivers/gpu/drm/radeon/rs780.c b/drivers/gpu/drm/radeon/rs780.c deleted file mode 100644 index 0affcff8182..00000000000 --- a/drivers/gpu/drm/radeon/rs780.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2008 Advanced Micro Devices, Inc. - * Copyright 2008 Red Hat Inc. - * Copyright 2009 Jerome Glisse. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Dave Airlie - * Alex Deucher - * Jerome Glisse - */ -#include "drmP.h" -#include "radeon_reg.h" -#include "radeon.h" - -/* rs780 depends on : */ -void rs600_mc_disable_clients(struct radeon_device *rdev); - -/* This files gather functions specifics to: - * rs780 - * - * Some of these functions might be used by newer ASICs. - */ -int rs780_mc_wait_for_idle(struct radeon_device *rdev); -void rs780_gpu_init(struct radeon_device *rdev); - - -/* - * MC - */ -int rs780_mc_init(struct radeon_device *rdev) -{ - rs780_gpu_init(rdev); - /* FIXME: implement */ - - rs600_mc_disable_clients(rdev); - if (rs780_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); - } - return 0; -} - -void rs780_mc_fini(struct radeon_device *rdev) -{ - /* FIXME: implement */ -} - - -/* - * Global GPU functions - */ -void rs780_errata(struct radeon_device *rdev) -{ - rdev->pll_errata = 0; -} - -int rs780_mc_wait_for_idle(struct radeon_device *rdev) -{ - /* FIXME: implement */ - return 0; -} - -void rs780_gpu_init(struct radeon_device *rdev) -{ - /* FIXME: implement */ -} - - -/* - * VRAM info - */ -void rs780_vram_get_type(struct radeon_device *rdev) -{ - /* FIXME: implement */ -} - -void rs780_vram_info(struct radeon_device *rdev) -{ - rs780_vram_get_type(rdev); - - /* FIXME: implement */ - /* Could aper size report 0 ? */ - rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); - rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); -} diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 97965c430c1..99e397f1638 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -27,7 +27,7 @@ */ #include #include "drmP.h" -#include "rv515r.h" +#include "rv515d.h" #include "radeon.h" #include "radeon_share.h" diff --git a/drivers/gpu/drm/radeon/rv515d.h b/drivers/gpu/drm/radeon/rv515d.h new file mode 100644 index 00000000000..a65e17ec1c0 --- /dev/null +++ b/drivers/gpu/drm/radeon/rv515d.h @@ -0,0 +1,220 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef __RV515D_H__ +#define __RV515D_H__ + +/* + * RV515 registers + */ +#define PCIE_INDEX 0x0030 +#define PCIE_DATA 0x0034 +#define MC_IND_INDEX 0x0070 +#define MC_IND_WR_EN (1 << 24) +#define MC_IND_DATA 0x0074 +#define RBBM_SOFT_RESET 0x00F0 +#define CONFIG_MEMSIZE 0x00F8 +#define HDP_FB_LOCATION 0x0134 +#define CP_CSQ_CNTL 0x0740 +#define CP_CSQ_MODE 0x0744 +#define CP_CSQ_ADDR 0x07F0 +#define CP_CSQ_DATA 0x07F4 +#define CP_CSQ_STAT 0x07F8 +#define CP_CSQ2_STAT 0x07FC +#define RBBM_STATUS 0x0E40 +#define DST_PIPE_CONFIG 0x170C +#define WAIT_UNTIL 0x1720 +#define WAIT_2D_IDLE (1 << 14) +#define WAIT_3D_IDLE (1 << 15) +#define WAIT_2D_IDLECLEAN (1 << 16) +#define WAIT_3D_IDLECLEAN (1 << 17) +#define ISYNC_CNTL 0x1724 +#define ISYNC_ANY2D_IDLE3D (1 << 0) +#define ISYNC_ANY3D_IDLE2D (1 << 1) +#define ISYNC_TRIG2D_IDLE3D (1 << 2) +#define ISYNC_TRIG3D_IDLE2D (1 << 3) +#define ISYNC_WAIT_IDLEGUI (1 << 4) +#define ISYNC_CPSCRATCH_IDLEGUI (1 << 5) +#define VAP_INDEX_OFFSET 0x208C +#define VAP_PVS_STATE_FLUSH_REG 0x2284 +#define GB_ENABLE 0x4008 +#define GB_MSPOS0 0x4010 +#define MS_X0_SHIFT 0 +#define MS_Y0_SHIFT 4 +#define MS_X1_SHIFT 8 +#define MS_Y1_SHIFT 12 +#define MS_X2_SHIFT 16 +#define MS_Y2_SHIFT 20 +#define MSBD0_Y_SHIFT 24 +#define MSBD0_X_SHIFT 28 +#define GB_MSPOS1 0x4014 +#define MS_X3_SHIFT 0 +#define MS_Y3_SHIFT 4 +#define MS_X4_SHIFT 8 +#define MS_Y4_SHIFT 12 +#define MS_X5_SHIFT 16 +#define MS_Y5_SHIFT 20 +#define MSBD1_SHIFT 24 +#define GB_TILE_CONFIG 0x4018 +#define ENABLE_TILING (1 << 0) +#define PIPE_COUNT_MASK 0x0000000E +#define PIPE_COUNT_SHIFT 1 +#define TILE_SIZE_8 (0 << 4) +#define TILE_SIZE_16 (1 << 4) +#define TILE_SIZE_32 (2 << 4) +#define SUBPIXEL_1_12 (0 << 16) +#define SUBPIXEL_1_16 (1 << 16) +#define GB_SELECT 0x401C +#define GB_AA_CONFIG 0x4020 +#define GB_PIPE_SELECT 0x402C +#define GA_ENHANCE 0x4274 +#define GA_DEADLOCK_CNTL (1 << 0) +#define GA_FASTSYNC_CNTL (1 << 1) +#define GA_POLY_MODE 0x4288 +#define FRONT_PTYPE_POINT (0 << 4) +#define FRONT_PTYPE_LINE (1 << 4) +#define FRONT_PTYPE_TRIANGE (2 << 4) +#define BACK_PTYPE_POINT (0 << 7) +#define BACK_PTYPE_LINE (1 << 7) +#define BACK_PTYPE_TRIANGE (2 << 7) +#define GA_ROUND_MODE 0x428C +#define GEOMETRY_ROUND_TRUNC (0 << 0) +#define GEOMETRY_ROUND_NEAREST (1 << 0) +#define COLOR_ROUND_TRUNC (0 << 2) +#define COLOR_ROUND_NEAREST (1 << 2) +#define SU_REG_DEST 0x42C8 +#define RB3D_DSTCACHE_CTLSTAT 0x4E4C +#define RB3D_DC_FLUSH (2 << 0) +#define RB3D_DC_FREE (2 << 2) +#define RB3D_DC_FINISH (1 << 4) +#define ZB_ZCACHE_CTLSTAT 0x4F18 +#define ZC_FLUSH (1 << 0) +#define ZC_FREE (1 << 1) +#define DC_LB_MEMORY_SPLIT 0x6520 +#define DC_LB_MEMORY_SPLIT_MASK 0x00000003 +#define DC_LB_MEMORY_SPLIT_SHIFT 0 +#define DC_LB_MEMORY_SPLIT_D1HALF_D2HALF 0 +#define DC_LB_MEMORY_SPLIT_D1_3Q_D2_1Q 1 +#define DC_LB_MEMORY_SPLIT_D1_ONLY 2 +#define DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q 3 +#define DC_LB_MEMORY_SPLIT_SHIFT_MODE (1 << 2) +#define DC_LB_DISP1_END_ADR_SHIFT 4 +#define DC_LB_DISP1_END_ADR_MASK 0x00007FF0 +#define D1MODE_PRIORITY_A_CNT 0x6548 +#define MODE_PRIORITY_MARK_MASK 0x00007FFF +#define MODE_PRIORITY_OFF (1 << 16) +#define MODE_PRIORITY_ALWAYS_ON (1 << 20) +#define MODE_PRIORITY_FORCE_MASK (1 << 24) +#define D1MODE_PRIORITY_B_CNT 0x654C +#define LB_MAX_REQ_OUTSTANDING 0x6D58 +#define LB_D1_MAX_REQ_OUTSTANDING_MASK 0x0000000F +#define LB_D1_MAX_REQ_OUTSTANDING_SHIFT 0 +#define LB_D2_MAX_REQ_OUTSTANDING_MASK 0x000F0000 +#define LB_D2_MAX_REQ_OUTSTANDING_SHIFT 16 +#define D2MODE_PRIORITY_A_CNT 0x6D48 +#define D2MODE_PRIORITY_B_CNT 0x6D4C + +/* ix[MC] registers */ +#define MC_FB_LOCATION 0x01 +#define MC_FB_START_MASK 0x0000FFFF +#define MC_FB_START_SHIFT 0 +#define MC_FB_TOP_MASK 0xFFFF0000 +#define MC_FB_TOP_SHIFT 16 +#define MC_AGP_LOCATION 0x02 +#define MC_AGP_START_MASK 0x0000FFFF +#define MC_AGP_START_SHIFT 0 +#define MC_AGP_TOP_MASK 0xFFFF0000 +#define MC_AGP_TOP_SHIFT 16 +#define MC_AGP_BASE 0x03 +#define MC_AGP_BASE_2 0x04 +#define MC_CNTL 0x5 +#define MEM_NUM_CHANNELS_MASK 0x00000003 +#define MC_STATUS 0x08 +#define MC_STATUS_IDLE (1 << 4) +#define MC_MISC_LAT_TIMER 0x09 +#define MC_CPR_INIT_LAT_MASK 0x0000000F +#define MC_VF_INIT_LAT_MASK 0x000000F0 +#define MC_DISP0R_INIT_LAT_MASK 0x00000F00 +#define MC_DISP0R_INIT_LAT_SHIFT 8 +#define MC_DISP1R_INIT_LAT_MASK 0x0000F000 +#define MC_DISP1R_INIT_LAT_SHIFT 12 +#define MC_FIXED_INIT_LAT_MASK 0x000F0000 +#define MC_E2R_INIT_LAT_MASK 0x00F00000 +#define SAME_PAGE_PRIO_MASK 0x0F000000 +#define MC_GLOBW_INIT_LAT_MASK 0xF0000000 + + +/* + * PM4 packet + */ +#define CP_PACKET0 0x00000000 +#define PACKET0_BASE_INDEX_SHIFT 0 +#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) +#define PACKET0_COUNT_SHIFT 16 +#define PACKET0_COUNT_MASK (0x3fff << 16) +#define CP_PACKET1 0x40000000 +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) +#define CP_PACKET3 0xC0000000 +#define PACKET3_IT_OPCODE_SHIFT 8 +#define PACKET3_IT_OPCODE_MASK (0xff << 8) +#define PACKET3_COUNT_SHIFT 16 +#define PACKET3_COUNT_MASK (0x3fff << 16) +/* PACKET3 op code */ +#define PACKET3_NOP 0x10 +#define PACKET3_3D_DRAW_VBUF 0x28 +#define PACKET3_3D_DRAW_IMMD 0x29 +#define PACKET3_3D_DRAW_INDX 0x2A +#define PACKET3_3D_LOAD_VBPNTR 0x2F +#define PACKET3_INDX_BUFFER 0x33 +#define PACKET3_3D_DRAW_VBUF_2 0x34 +#define PACKET3_3D_DRAW_IMMD_2 0x35 +#define PACKET3_3D_DRAW_INDX_2 0x36 +#define PACKET3_BITBLT_MULTI 0x9B + +#define PACKET0(reg, n) (CP_PACKET0 | \ + REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ + REG_SET(PACKET0_COUNT, (n))) +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) +#define PACKET3(op, n) (CP_PACKET3 | \ + REG_SET(PACKET3_IT_OPCODE, (op)) | \ + REG_SET(PACKET3_COUNT, (n))) + +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) +#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) + +#endif + diff --git a/drivers/gpu/drm/radeon/rv515r.h b/drivers/gpu/drm/radeon/rv515r.h deleted file mode 100644 index f3cf8403990..00000000000 --- a/drivers/gpu/drm/radeon/rv515r.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright 2008 Advanced Micro Devices, Inc. - * Copyright 2008 Red Hat Inc. - * Copyright 2009 Jerome Glisse. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Dave Airlie - * Alex Deucher - * Jerome Glisse - */ -#ifndef RV515R_H -#define RV515R_H - -/* RV515 registers */ -#define PCIE_INDEX 0x0030 -#define PCIE_DATA 0x0034 -#define MC_IND_INDEX 0x0070 -#define MC_IND_WR_EN (1 << 24) -#define MC_IND_DATA 0x0074 -#define RBBM_SOFT_RESET 0x00F0 -#define CONFIG_MEMSIZE 0x00F8 -#define HDP_FB_LOCATION 0x0134 -#define CP_CSQ_CNTL 0x0740 -#define CP_CSQ_MODE 0x0744 -#define CP_CSQ_ADDR 0x07F0 -#define CP_CSQ_DATA 0x07F4 -#define CP_CSQ_STAT 0x07F8 -#define CP_CSQ2_STAT 0x07FC -#define RBBM_STATUS 0x0E40 -#define DST_PIPE_CONFIG 0x170C -#define WAIT_UNTIL 0x1720 -#define WAIT_2D_IDLE (1 << 14) -#define WAIT_3D_IDLE (1 << 15) -#define WAIT_2D_IDLECLEAN (1 << 16) -#define WAIT_3D_IDLECLEAN (1 << 17) -#define ISYNC_CNTL 0x1724 -#define ISYNC_ANY2D_IDLE3D (1 << 0) -#define ISYNC_ANY3D_IDLE2D (1 << 1) -#define ISYNC_TRIG2D_IDLE3D (1 << 2) -#define ISYNC_TRIG3D_IDLE2D (1 << 3) -#define ISYNC_WAIT_IDLEGUI (1 << 4) -#define ISYNC_CPSCRATCH_IDLEGUI (1 << 5) -#define VAP_INDEX_OFFSET 0x208C -#define VAP_PVS_STATE_FLUSH_REG 0x2284 -#define GB_ENABLE 0x4008 -#define GB_MSPOS0 0x4010 -#define MS_X0_SHIFT 0 -#define MS_Y0_SHIFT 4 -#define MS_X1_SHIFT 8 -#define MS_Y1_SHIFT 12 -#define MS_X2_SHIFT 16 -#define MS_Y2_SHIFT 20 -#define MSBD0_Y_SHIFT 24 -#define MSBD0_X_SHIFT 28 -#define GB_MSPOS1 0x4014 -#define MS_X3_SHIFT 0 -#define MS_Y3_SHIFT 4 -#define MS_X4_SHIFT 8 -#define MS_Y4_SHIFT 12 -#define MS_X5_SHIFT 16 -#define MS_Y5_SHIFT 20 -#define MSBD1_SHIFT 24 -#define GB_TILE_CONFIG 0x4018 -#define ENABLE_TILING (1 << 0) -#define PIPE_COUNT_MASK 0x0000000E -#define PIPE_COUNT_SHIFT 1 -#define TILE_SIZE_8 (0 << 4) -#define TILE_SIZE_16 (1 << 4) -#define TILE_SIZE_32 (2 << 4) -#define SUBPIXEL_1_12 (0 << 16) -#define SUBPIXEL_1_16 (1 << 16) -#define GB_SELECT 0x401C -#define GB_AA_CONFIG 0x4020 -#define GB_PIPE_SELECT 0x402C -#define GA_ENHANCE 0x4274 -#define GA_DEADLOCK_CNTL (1 << 0) -#define GA_FASTSYNC_CNTL (1 << 1) -#define GA_POLY_MODE 0x4288 -#define FRONT_PTYPE_POINT (0 << 4) -#define FRONT_PTYPE_LINE (1 << 4) -#define FRONT_PTYPE_TRIANGE (2 << 4) -#define BACK_PTYPE_POINT (0 << 7) -#define BACK_PTYPE_LINE (1 << 7) -#define BACK_PTYPE_TRIANGE (2 << 7) -#define GA_ROUND_MODE 0x428C -#define GEOMETRY_ROUND_TRUNC (0 << 0) -#define GEOMETRY_ROUND_NEAREST (1 << 0) -#define COLOR_ROUND_TRUNC (0 << 2) -#define COLOR_ROUND_NEAREST (1 << 2) -#define SU_REG_DEST 0x42C8 -#define RB3D_DSTCACHE_CTLSTAT 0x4E4C -#define RB3D_DC_FLUSH (2 << 0) -#define RB3D_DC_FREE (2 << 2) -#define RB3D_DC_FINISH (1 << 4) -#define ZB_ZCACHE_CTLSTAT 0x4F18 -#define ZC_FLUSH (1 << 0) -#define ZC_FREE (1 << 1) -#define DC_LB_MEMORY_SPLIT 0x6520 -#define DC_LB_MEMORY_SPLIT_MASK 0x00000003 -#define DC_LB_MEMORY_SPLIT_SHIFT 0 -#define DC_LB_MEMORY_SPLIT_D1HALF_D2HALF 0 -#define DC_LB_MEMORY_SPLIT_D1_3Q_D2_1Q 1 -#define DC_LB_MEMORY_SPLIT_D1_ONLY 2 -#define DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q 3 -#define DC_LB_MEMORY_SPLIT_SHIFT_MODE (1 << 2) -#define DC_LB_DISP1_END_ADR_SHIFT 4 -#define DC_LB_DISP1_END_ADR_MASK 0x00007FF0 -#define D1MODE_PRIORITY_A_CNT 0x6548 -#define MODE_PRIORITY_MARK_MASK 0x00007FFF -#define MODE_PRIORITY_OFF (1 << 16) -#define MODE_PRIORITY_ALWAYS_ON (1 << 20) -#define MODE_PRIORITY_FORCE_MASK (1 << 24) -#define D1MODE_PRIORITY_B_CNT 0x654C -#define LB_MAX_REQ_OUTSTANDING 0x6D58 -#define LB_D1_MAX_REQ_OUTSTANDING_MASK 0x0000000F -#define LB_D1_MAX_REQ_OUTSTANDING_SHIFT 0 -#define LB_D2_MAX_REQ_OUTSTANDING_MASK 0x000F0000 -#define LB_D2_MAX_REQ_OUTSTANDING_SHIFT 16 -#define D2MODE_PRIORITY_A_CNT 0x6D48 -#define D2MODE_PRIORITY_B_CNT 0x6D4C - -/* ix[MC] registers */ -#define MC_FB_LOCATION 0x01 -#define MC_FB_START_MASK 0x0000FFFF -#define MC_FB_START_SHIFT 0 -#define MC_FB_TOP_MASK 0xFFFF0000 -#define MC_FB_TOP_SHIFT 16 -#define MC_AGP_LOCATION 0x02 -#define MC_AGP_START_MASK 0x0000FFFF -#define MC_AGP_START_SHIFT 0 -#define MC_AGP_TOP_MASK 0xFFFF0000 -#define MC_AGP_TOP_SHIFT 16 -#define MC_AGP_BASE 0x03 -#define MC_AGP_BASE_2 0x04 -#define MC_CNTL 0x5 -#define MEM_NUM_CHANNELS_MASK 0x00000003 -#define MC_STATUS 0x08 -#define MC_STATUS_IDLE (1 << 4) -#define MC_MISC_LAT_TIMER 0x09 -#define MC_CPR_INIT_LAT_MASK 0x0000000F -#define MC_VF_INIT_LAT_MASK 0x000000F0 -#define MC_DISP0R_INIT_LAT_MASK 0x00000F00 -#define MC_DISP0R_INIT_LAT_SHIFT 8 -#define MC_DISP1R_INIT_LAT_MASK 0x0000F000 -#define MC_DISP1R_INIT_LAT_SHIFT 12 -#define MC_FIXED_INIT_LAT_MASK 0x000F0000 -#define MC_E2R_INIT_LAT_MASK 0x00F00000 -#define SAME_PAGE_PRIO_MASK 0x0F000000 -#define MC_GLOBW_INIT_LAT_MASK 0xF0000000 - - -#endif - diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 21d8ffd5730..57765f6d5b2 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -25,100 +25,975 @@ * Alex Deucher * Jerome Glisse */ +#include +#include #include "drmP.h" -#include "radeon_reg.h" #include "radeon.h" +#include "radeon_share.h" +#include "rv770d.h" +#include "avivod.h" +#include "atom.h" -/* rv770,rv730,rv710 depends on : */ -void rs600_mc_disable_clients(struct radeon_device *rdev); +#define R700_PFP_UCODE_SIZE 848 +#define R700_PM4_UCODE_SIZE 1360 -/* This files gather functions specifics to: - * rv770,rv730,rv710 - * - * Some of these functions might be used by newer ASICs. - */ -int rv770_mc_wait_for_idle(struct radeon_device *rdev); -void rv770_gpu_init(struct radeon_device *rdev); +static void rv770_gpu_init(struct radeon_device *rdev); +void rv770_fini(struct radeon_device *rdev); /* - * MC + * GART */ -int rv770_mc_init(struct radeon_device *rdev) +int rv770_pcie_gart_enable(struct radeon_device *rdev) { - uint32_t tmp; + u32 tmp; + int r, i; - rv770_gpu_init(rdev); + /* Initialize common gart structure */ + r = radeon_gart_init(rdev); + if (r) { + return r; + } + rdev->gart.table_size = rdev->gart.num_gpu_pages * 8; + r = radeon_gart_table_vram_alloc(rdev); + if (r) { + return r; + } + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | + ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL2, 0); + WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); + /* Setup TLB control */ + tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING | + SYSTEM_ACCESS_MODE_NOT_IN_SYS | + SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | + EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); + WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); + WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); + WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, + (u32)(rdev->dummy_page.addr >> 12)); + for (i = 1; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); - /* setup the gart before changing location so we can ask to - * discard unmapped mc request - */ - /* FIXME: disable out of gart access */ - tmp = rdev->mc.gtt_location / 4096; - tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp); - tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096; - tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp); - - rs600_mc_disable_clients(rdev); - if (rv770_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); - } - - tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1; - tmp = REG_SET(R700_MC_FB_TOP, tmp >> 24); - tmp |= REG_SET(R700_MC_FB_BASE, rdev->mc.vram_location >> 24); - WREG32(R700_MC_VM_FB_LOCATION, tmp); - tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; - tmp = REG_SET(R700_MC_AGP_TOP, tmp >> 22); - WREG32(R700_MC_VM_AGP_TOP, tmp); - tmp = REG_SET(R700_MC_AGP_BOT, rdev->mc.gtt_location >> 22); - WREG32(R700_MC_VM_AGP_BOT, tmp); + r600_pcie_gart_tlb_flush(rdev); + rdev->gart.ready = true; return 0; } -void rv770_mc_fini(struct radeon_device *rdev) +void rv770_pcie_gart_disable(struct radeon_device *rdev) { - /* FIXME: implement */ + u32 tmp; + int i; + + /* Clear ptes*/ + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + r600_pcie_gart_tlb_flush(rdev); + /* Disable all tables */ + for (i = 0; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); + + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL2, 0); + WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); + /* Setup TLB control */ + tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); + WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); } /* - * Global GPU functions + * MC */ -void rv770_errata(struct radeon_device *rdev) +static void rv770_mc_resume(struct radeon_device *rdev) { - rdev->pll_errata = 0; + u32 d1vga_control, d2vga_control; + u32 vga_render_control, vga_hdp_control; + u32 d1crtc_control, d2crtc_control; + u32 new_d1grph_primary, new_d1grph_secondary; + u32 new_d2grph_primary, new_d2grph_secondary; + u64 old_vram_start; + u32 tmp; + int i, j; + + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); + + d1vga_control = RREG32(D1VGA_CONTROL); + d2vga_control = RREG32(D2VGA_CONTROL); + vga_render_control = RREG32(VGA_RENDER_CONTROL); + vga_hdp_control = RREG32(VGA_HDP_CONTROL); + d1crtc_control = RREG32(D1CRTC_CONTROL); + d2crtc_control = RREG32(D2CRTC_CONTROL); + old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24; + new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS); + new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS); + new_d1grph_primary += rdev->mc.vram_start - old_vram_start; + new_d1grph_secondary += rdev->mc.vram_start - old_vram_start; + new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS); + new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS); + new_d2grph_primary += rdev->mc.vram_start - old_vram_start; + new_d2grph_secondary += rdev->mc.vram_start - old_vram_start; + + /* Stop all video */ + WREG32(D1VGA_CONTROL, 0); + WREG32(D2VGA_CONTROL, 0); + WREG32(VGA_RENDER_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, 0); + WREG32(D2CRTC_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); + } + + /* Lockout access through VGA aperture*/ + WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); + + /* Update configuration */ + WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); + tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16; + tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); + WREG32(MC_VM_FB_LOCATION, tmp); + WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); + WREG32(HDP_NONSURFACE_INFO, (2 << 7)); + WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF); + if (rdev->flags & RADEON_IS_AGP) { + WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16); + WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16); + WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22); + } else { + WREG32(MC_VM_AGP_BASE, 0); + WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); + WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); + } + WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary); + WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary); + WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary); + WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary); + WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start); + + /* Unlock host access */ + WREG32(VGA_HDP_CONTROL, vga_hdp_control); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); + } + + /* Restore video state */ + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, d1crtc_control); + WREG32(D2CRTC_CONTROL, d2crtc_control); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + WREG32(D1VGA_CONTROL, d1vga_control); + WREG32(D2VGA_CONTROL, d2vga_control); + WREG32(VGA_RENDER_CONTROL, vga_render_control); } -int rv770_mc_wait_for_idle(struct radeon_device *rdev) + +/* + * CP. + */ +void r700_cp_stop(struct radeon_device *rdev) { - /* FIXME: implement */ - return 0; + WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); } -void rv770_gpu_init(struct radeon_device *rdev) + +static int rv770_cp_load_microcode(struct radeon_device *rdev) { - /* FIXME: implement */ + const __be32 *fw_data; + int i; + + if (!rdev->me_fw || !rdev->pfp_fw) + return -EINVAL; + + r700_cp_stop(rdev); + WREG32(CP_RB_CNTL, RB_NO_UPDATE | (15 << 8) | (3 << 0)); + + /* Reset cp */ + WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); + RREG32(GRBM_SOFT_RESET); + mdelay(15); + WREG32(GRBM_SOFT_RESET, 0); + + fw_data = (const __be32 *)rdev->pfp_fw->data; + WREG32(CP_PFP_UCODE_ADDR, 0); + for (i = 0; i < R700_PFP_UCODE_SIZE; i++) + WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); + WREG32(CP_PFP_UCODE_ADDR, 0); + + fw_data = (const __be32 *)rdev->me_fw->data; + WREG32(CP_ME_RAM_WADDR, 0); + for (i = 0; i < R700_PM4_UCODE_SIZE; i++) + WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); + + WREG32(CP_PFP_UCODE_ADDR, 0); + WREG32(CP_ME_RAM_WADDR, 0); + WREG32(CP_ME_RAM_RADDR, 0); + return 0; } /* - * VRAM info + * Core functions */ -void rv770_vram_get_type(struct radeon_device *rdev) +static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes, + u32 num_backends, + u32 backend_disable_mask) { - /* FIXME: implement */ + u32 backend_map = 0; + u32 enabled_backends_mask; + u32 enabled_backends_count; + u32 cur_pipe; + u32 swizzle_pipe[R7XX_MAX_PIPES]; + u32 cur_backend; + u32 i; + + if (num_tile_pipes > R7XX_MAX_PIPES) + num_tile_pipes = R7XX_MAX_PIPES; + if (num_tile_pipes < 1) + num_tile_pipes = 1; + if (num_backends > R7XX_MAX_BACKENDS) + num_backends = R7XX_MAX_BACKENDS; + if (num_backends < 1) + num_backends = 1; + + enabled_backends_mask = 0; + enabled_backends_count = 0; + for (i = 0; i < R7XX_MAX_BACKENDS; ++i) { + if (((backend_disable_mask >> i) & 1) == 0) { + enabled_backends_mask |= (1 << i); + ++enabled_backends_count; + } + if (enabled_backends_count == num_backends) + break; + } + + if (enabled_backends_count == 0) { + enabled_backends_mask = 1; + enabled_backends_count = 1; + } + + if (enabled_backends_count != num_backends) + num_backends = enabled_backends_count; + + memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES); + switch (num_tile_pipes) { + case 1: + swizzle_pipe[0] = 0; + break; + case 2: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + break; + case 3: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 1; + break; + case 4: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 3; + swizzle_pipe[3] = 1; + break; + case 5: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 1; + swizzle_pipe[4] = 3; + break; + case 6: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 5; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 1; + break; + case 7: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 1; + swizzle_pipe[6] = 5; + break; + case 8: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 1; + swizzle_pipe[6] = 7; + swizzle_pipe[7] = 5; + break; + } + + cur_backend = 0; + for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { + while (((1 << cur_backend) & enabled_backends_mask) == 0) + cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; + + backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); + + cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; + } + + return backend_map; } -void rv770_vram_info(struct radeon_device *rdev) +static void rv770_gpu_init(struct radeon_device *rdev) { - rv770_vram_get_type(rdev); + int i, j, num_qd_pipes; + u32 sx_debug_1; + u32 smx_dc_ctl0; + u32 num_gs_verts_per_thread; + u32 vgt_gs_per_es; + u32 gs_prim_buffer_depth = 0; + u32 sq_ms_fifo_sizes; + u32 sq_config; + u32 sq_thread_resource_mgmt; + u32 hdp_host_path_cntl; + u32 sq_dyn_gpr_size_simd_ab_0; + u32 backend_map; + u32 gb_tiling_config = 0; + u32 cc_rb_backend_disable = 0; + u32 cc_gc_shader_pipe_config = 0; + u32 mc_arb_ramcfg; + u32 db_debug4; - /* FIXME: implement */ + /* setup chip specs */ + switch (rdev->family) { + case CHIP_RV770: + rdev->config.rv770.max_pipes = 4; + rdev->config.rv770.max_tile_pipes = 8; + rdev->config.rv770.max_simds = 10; + rdev->config.rv770.max_backends = 4; + rdev->config.rv770.max_gprs = 256; + rdev->config.rv770.max_threads = 248; + rdev->config.rv770.max_stack_entries = 512; + rdev->config.rv770.max_hw_contexts = 8; + rdev->config.rv770.max_gs_threads = 16 * 2; + rdev->config.rv770.sx_max_export_size = 128; + rdev->config.rv770.sx_max_export_pos_size = 16; + rdev->config.rv770.sx_max_export_smx_size = 112; + rdev->config.rv770.sq_num_cf_insts = 2; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0xF9; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + break; + case CHIP_RV730: + rdev->config.rv770.max_pipes = 2; + rdev->config.rv770.max_tile_pipes = 4; + rdev->config.rv770.max_simds = 8; + rdev->config.rv770.max_backends = 2; + rdev->config.rv770.max_gprs = 128; + rdev->config.rv770.max_threads = 248; + rdev->config.rv770.max_stack_entries = 256; + rdev->config.rv770.max_hw_contexts = 8; + rdev->config.rv770.max_gs_threads = 16 * 2; + rdev->config.rv770.sx_max_export_size = 256; + rdev->config.rv770.sx_max_export_pos_size = 32; + rdev->config.rv770.sx_max_export_smx_size = 224; + rdev->config.rv770.sq_num_cf_insts = 2; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0xf9; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + if (rdev->config.rv770.sx_max_export_pos_size > 16) { + rdev->config.rv770.sx_max_export_pos_size -= 16; + rdev->config.rv770.sx_max_export_smx_size += 16; + } + break; + case CHIP_RV710: + rdev->config.rv770.max_pipes = 2; + rdev->config.rv770.max_tile_pipes = 2; + rdev->config.rv770.max_simds = 2; + rdev->config.rv770.max_backends = 1; + rdev->config.rv770.max_gprs = 256; + rdev->config.rv770.max_threads = 192; + rdev->config.rv770.max_stack_entries = 256; + rdev->config.rv770.max_hw_contexts = 4; + rdev->config.rv770.max_gs_threads = 8 * 2; + rdev->config.rv770.sx_max_export_size = 128; + rdev->config.rv770.sx_max_export_pos_size = 16; + rdev->config.rv770.sx_max_export_smx_size = 112; + rdev->config.rv770.sq_num_cf_insts = 1; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0x40; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + break; + case CHIP_RV740: + rdev->config.rv770.max_pipes = 4; + rdev->config.rv770.max_tile_pipes = 4; + rdev->config.rv770.max_simds = 8; + rdev->config.rv770.max_backends = 4; + rdev->config.rv770.max_gprs = 256; + rdev->config.rv770.max_threads = 248; + rdev->config.rv770.max_stack_entries = 512; + rdev->config.rv770.max_hw_contexts = 8; + rdev->config.rv770.max_gs_threads = 16 * 2; + rdev->config.rv770.sx_max_export_size = 256; + rdev->config.rv770.sx_max_export_pos_size = 32; + rdev->config.rv770.sx_max_export_smx_size = 224; + rdev->config.rv770.sq_num_cf_insts = 2; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0x100; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + + if (rdev->config.rv770.sx_max_export_pos_size > 16) { + rdev->config.rv770.sx_max_export_pos_size -= 16; + rdev->config.rv770.sx_max_export_smx_size += 16; + } + break; + default: + break; + } + + /* Initialize HDP */ + j = 0; + for (i = 0; i < 32; i++) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + j += 0x18; + } + + WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); + + /* setup tiling, simd, pipe config */ + mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); + + switch (rdev->config.rv770.max_tile_pipes) { + case 1: + gb_tiling_config |= PIPE_TILING(0); + break; + case 2: + gb_tiling_config |= PIPE_TILING(1); + break; + case 4: + gb_tiling_config |= PIPE_TILING(2); + break; + case 8: + gb_tiling_config |= PIPE_TILING(3); + break; + default: + break; + } + + if (rdev->family == CHIP_RV770) + gb_tiling_config |= BANK_TILING(1); + else + gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_SHIFT) >> NOOFBANK_MASK); + + gb_tiling_config |= GROUP_SIZE(0); + + if (((mc_arb_ramcfg & NOOFROWS_MASK) & NOOFROWS_SHIFT) > 3) { + gb_tiling_config |= ROW_TILING(3); + gb_tiling_config |= SAMPLE_SPLIT(3); + } else { + gb_tiling_config |= + ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT)); + gb_tiling_config |= + SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT)); + } + + gb_tiling_config |= BANK_SWAPS(1); + + backend_map = r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes, + rdev->config.rv770.max_backends, + (0xff << rdev->config.rv770.max_backends) & 0xff); + gb_tiling_config |= BACKEND_MAP(backend_map); + + cc_gc_shader_pipe_config = + INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << rdev->config.rv770.max_pipes) & R7XX_MAX_PIPES_MASK); + cc_gc_shader_pipe_config |= + INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << rdev->config.rv770.max_simds) & R7XX_MAX_SIMDS_MASK); + + cc_rb_backend_disable = + BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK); + + WREG32(GB_TILING_CONFIG, gb_tiling_config); + WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); + WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); + + WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); + WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); + + WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CGTS_SYS_TCC_DISABLE, 0); + WREG32(CGTS_TCC_DISABLE, 0); + WREG32(CGTS_USER_SYS_TCC_DISABLE, 0); + WREG32(CGTS_USER_TCC_DISABLE, 0); + + num_qd_pipes = + R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK); + WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK); + WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK); + + /* set HW defaults for 3D engine */ + WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | + ROQ_IB2_START(0x2b))); + + WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30)); + + WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | + SYNC_GRADIENT | + SYNC_WALKER | + SYNC_ALIGNER)); + + sx_debug_1 = RREG32(SX_DEBUG_1); + sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS; + WREG32(SX_DEBUG_1, sx_debug_1); + + smx_dc_ctl0 = RREG32(SMX_DC_CTL0); + smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff); + smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1); + WREG32(SMX_DC_CTL0, smx_dc_ctl0); + + WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) | + GS_FLUSH_CTL(4) | + ACK_FLUSH_CTL(3) | + SYNC_FLUSH_CTL)); + + if (rdev->family == CHIP_RV770) + WREG32(DB_DEBUG3, DB_CLK_OFF_DELAY(0x1f)); + else { + db_debug4 = RREG32(DB_DEBUG4); + db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER; + WREG32(DB_DEBUG4, db_debug4); + } + + WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) | + POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) | + SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1))); + + WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) | + SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) | + SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize))); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + + WREG32(VGT_NUM_INSTANCES, 1); + + WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0)); + + WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); + + WREG32(CP_PERFMON_CNTL, 0); + + sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) | + DONE_FIFO_HIWATER(0xe0) | + ALU_UPDATE_FIFO_HIWATER(0x8)); + switch (rdev->family) { + case CHIP_RV770: + sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1); + break; + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + default: + sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4); + break; + } + WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes); + + /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT + * should be adjusted as needed by the 2D/3D drivers. This just sets default values + */ + sq_config = RREG32(SQ_CONFIG); + sq_config &= ~(PS_PRIO(3) | + VS_PRIO(3) | + GS_PRIO(3) | + ES_PRIO(3)); + sq_config |= (DX9_CONSTS | + VC_ENABLE | + EXPORT_SRC_C | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + if (rdev->family == CHIP_RV710) + /* no vertex cache */ + sq_config &= ~VC_ENABLE; + + WREG32(SQ_CONFIG, sq_config); + + WREG32(SQ_GPR_RESOURCE_MGMT_1, (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) | + NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) | + NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2))); + + WREG32(SQ_GPR_RESOURCE_MGMT_2, (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) | + NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64))); + + sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) | + NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) | + NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8)); + if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads) + sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads); + else + sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8); + WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); + + WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) | + NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4))); + + WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) | + NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4))); + + sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) | + SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) | + SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) | + SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64)); + + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0); + + WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | + FORCE_EOV_MAX_REZ_CNT(255))); + + if (rdev->family == CHIP_RV710) + WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) | + AUTO_INVLD_EN(ES_AND_GS_AUTO))); + else + WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) | + AUTO_INVLD_EN(ES_AND_GS_AUTO))); + + switch (rdev->family) { + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV740: + gs_prim_buffer_depth = 384; + break; + case CHIP_RV710: + gs_prim_buffer_depth = 128; + break; + default: + break; + } + + num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16; + vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread; + /* Max value for this is 256 */ + if (vgt_gs_per_es > 256) + vgt_gs_per_es = 256; + + WREG32(VGT_ES_PER_GS, 128); + WREG32(VGT_GS_PER_ES, vgt_gs_per_es); + WREG32(VGT_GS_PER_VS, 2); + + /* more default values. 2D/3D driver should adjust as needed */ + WREG32(VGT_GS_VERTEX_REUSE, 16); + WREG32(PA_SC_LINE_STIPPLE_STATE, 0); + WREG32(VGT_STRMOUT_EN, 0); + WREG32(SX_MISC, 0); + WREG32(PA_SC_MODE_CNTL, 0); + WREG32(PA_SC_EDGERULE, 0xaaaaaaaa); + WREG32(PA_SC_AA_CONFIG, 0); + WREG32(PA_SC_CLIPRECT_RULE, 0xffff); + WREG32(PA_SC_LINE_STIPPLE, 0); + WREG32(SPI_INPUT_Z, 0); + WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2)); + WREG32(CB_COLOR7_FRAG, 0); + + /* clear render buffer base addresses */ + WREG32(CB_COLOR0_BASE, 0); + WREG32(CB_COLOR1_BASE, 0); + WREG32(CB_COLOR2_BASE, 0); + WREG32(CB_COLOR3_BASE, 0); + WREG32(CB_COLOR4_BASE, 0); + WREG32(CB_COLOR5_BASE, 0); + WREG32(CB_COLOR6_BASE, 0); + WREG32(CB_COLOR7_BASE, 0); + + WREG32(TCP_CNTL, 0); + + hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); + WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + + WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA | + NUM_CLIP_SEQ(3))); + +} + +int rv770_mc_init(struct radeon_device *rdev) +{ + fixed20_12 a; + u32 tmp; + int r; + + /* Get VRAM informations */ + /* FIXME: Don't know how to determine vram width, need to check + * vram_width usage + */ + rdev->mc.vram_width = 128; + rdev->mc.vram_is_ddr = true; /* Could aper size report 0 ? */ rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); + /* Setup GPU memory space */ + rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); + rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); + if (rdev->flags & RADEON_IS_AGP) { + r = radeon_agp_init(rdev); + if (r) + return r; + /* gtt_size is setup by radeon_agp_init */ + rdev->mc.gtt_location = rdev->mc.agp_base; + tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size; + /* Try to put vram before or after AGP because we + * we want SYSTEM_APERTURE to cover both VRAM and + * AGP so that GPU can catch out of VRAM/AGP access + */ + if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) { + /* Enought place before */ + rdev->mc.vram_location = rdev->mc.gtt_location - + rdev->mc.mc_vram_size; + } else if (tmp > rdev->mc.mc_vram_size) { + /* Enought place after */ + rdev->mc.vram_location = rdev->mc.gtt_location + + rdev->mc.gtt_size; + } else { + /* Try to setup VRAM then AGP might not + * not work on some card + */ + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + } + } else { + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + } + rdev->mc.vram_start = rdev->mc.vram_location; + rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size; + rdev->mc.gtt_start = rdev->mc.gtt_location; + rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size; + /* FIXME: we should enforce default clock in case GPU is not in + * default setup + */ + a.full = rfixed_const(100); + rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk); + rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a); + return 0; +} +int rv770_gpu_reset(struct radeon_device *rdev) +{ + /* FIXME: implement */ + return 0; +} + +int rv770_resume(struct radeon_device *rdev) +{ + int r; + + rv770_mc_resume(rdev); + r = rv770_pcie_gart_enable(rdev); + if (r) + return r; + rv770_gpu_init(rdev); + r = radeon_ring_init(rdev, rdev->cp.ring_size); + if (r) + return r; + r = rv770_cp_load_microcode(rdev); + if (r) + return r; + r = r600_cp_resume(rdev); + if (r) + return r; + r = r600_wb_init(rdev); + if (r) + return r; + return 0; +} + +int rv770_suspend(struct radeon_device *rdev) +{ + /* FIXME: we should wait for ring to be empty */ + r700_cp_stop(rdev); + return 0; +} + +/* Plan is to move initialization in that function and use + * helper function so that radeon_device_init pretty much + * do nothing more than calling asic specific function. This + * should also allow to remove a bunch of callback function + * like vram_info. + */ +int rv770_init(struct radeon_device *rdev) +{ + int r; + + rdev->new_init_path = true; + r = radeon_dummy_page_init(rdev); + if (r) + return r; + /* This don't do much */ + r = radeon_gem_init(rdev); + if (r) + return r; + /* Read BIOS */ + if (!radeon_get_bios(rdev)) { + if (ASIC_IS_AVIVO(rdev)) + return -EINVAL; + } + /* Must be an ATOMBIOS */ + if (!rdev->is_atom_bios) + return -EINVAL; + r = radeon_atombios_init(rdev); + if (r) + return r; + /* Post card if necessary */ + if (!r600_card_posted(rdev) && rdev->bios) { + DRM_INFO("GPU not posted. posting now...\n"); + atom_asic_init(rdev->mode_info.atom_context); + } + /* Initialize scratch registers */ + r600_scratch_init(rdev); + /* Initialize surface registers */ + radeon_surface_init(rdev); + r = radeon_clocks_init(rdev); + if (r) + return r; + /* Fence driver */ + r = radeon_fence_driver_init(rdev); + if (r) + return r; + r = rv770_mc_init(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + rv770_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return rv770_init(rdev); + } + return r; + } + /* Memory manager */ + r = radeon_object_init(rdev); + if (r) + return r; + rdev->cp.ring_obj = NULL; + r600_ring_init(rdev, 1024 * 1024); + + if (!rdev->me_fw || !rdev->pfp_fw) { + r = r600_cp_init_microcode(rdev); + if (r) { + DRM_ERROR("Failed to load firmware!\n"); + return r; + } + } + + r = rv770_resume(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + rv770_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return rv770_init(rdev); + } + return r; + } + r = r600_blit_init(rdev); + if (r) { + DRM_ERROR("radeon: failled blitter (%d).\n", r); + return r; + } + r = radeon_ib_pool_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); + return r; + } + r = radeon_ib_test(rdev); + if (r) { + DRM_ERROR("radeon: failled testing IB (%d).\n", r); + return r; + } + return 0; +} + +void rv770_fini(struct radeon_device *rdev) +{ + r600_blit_fini(rdev); + radeon_ring_fini(rdev); + rv770_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); + radeon_gem_fini(rdev); + radeon_fence_driver_fini(rdev); + radeon_clocks_fini(rdev); +#if __OS_HAS_AGP + if (rdev->flags & RADEON_IS_AGP) + radeon_agp_fini(rdev); +#endif + radeon_object_fini(rdev); + if (rdev->is_atom_bios) { + radeon_atombios_fini(rdev); + } else { + radeon_combios_fini(rdev); + } + kfree(rdev->bios); + rdev->bios = NULL; + radeon_dummy_page_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h new file mode 100644 index 00000000000..4b9c3d6396f --- /dev/null +++ b/drivers/gpu/drm/radeon/rv770d.h @@ -0,0 +1,341 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2009 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef RV770_H +#define RV770_H + +#define R7XX_MAX_SH_GPRS 256 +#define R7XX_MAX_TEMP_GPRS 16 +#define R7XX_MAX_SH_THREADS 256 +#define R7XX_MAX_SH_STACK_ENTRIES 4096 +#define R7XX_MAX_BACKENDS 8 +#define R7XX_MAX_BACKENDS_MASK 0xff +#define R7XX_MAX_SIMDS 16 +#define R7XX_MAX_SIMDS_MASK 0xffff +#define R7XX_MAX_PIPES 8 +#define R7XX_MAX_PIPES_MASK 0xff + +/* Registers */ +#define CB_COLOR0_BASE 0x28040 +#define CB_COLOR1_BASE 0x28044 +#define CB_COLOR2_BASE 0x28048 +#define CB_COLOR3_BASE 0x2804C +#define CB_COLOR4_BASE 0x28050 +#define CB_COLOR5_BASE 0x28054 +#define CB_COLOR6_BASE 0x28058 +#define CB_COLOR7_BASE 0x2805C +#define CB_COLOR7_FRAG 0x280FC + +#define CC_GC_SHADER_PIPE_CONFIG 0x8950 +#define CC_RB_BACKEND_DISABLE 0x98F4 +#define BACKEND_DISABLE(x) ((x) << 16) +#define CC_SYS_RB_BACKEND_DISABLE 0x3F88 + +#define CGTS_SYS_TCC_DISABLE 0x3F90 +#define CGTS_TCC_DISABLE 0x9148 +#define CGTS_USER_SYS_TCC_DISABLE 0x3F94 +#define CGTS_USER_TCC_DISABLE 0x914C + +#define CONFIG_MEMSIZE 0x5428 + +#define CP_ME_CNTL 0x86D8 +#define CP_ME_HALT (1<<28) +#define CP_PFP_HALT (1<<26) +#define CP_ME_RAM_DATA 0xC160 +#define CP_ME_RAM_RADDR 0xC158 +#define CP_ME_RAM_WADDR 0xC15C +#define CP_MEQ_THRESHOLDS 0x8764 +#define STQ_SPLIT(x) ((x) << 0) +#define CP_PERFMON_CNTL 0x87FC +#define CP_PFP_UCODE_ADDR 0xC150 +#define CP_PFP_UCODE_DATA 0xC154 +#define CP_QUEUE_THRESHOLDS 0x8760 +#define ROQ_IB1_START(x) ((x) << 0) +#define ROQ_IB2_START(x) ((x) << 8) +#define CP_RB_CNTL 0xC104 +#define RB_BUFSZ(x) ((x)<<0) +#define RB_BLKSZ(x) ((x)<<8) +#define RB_NO_UPDATE (1<<27) +#define RB_RPTR_WR_ENA (1<<31) +#define BUF_SWAP_32BIT (2 << 16) +#define CP_RB_RPTR 0x8700 +#define CP_RB_RPTR_ADDR 0xC10C +#define CP_RB_RPTR_ADDR_HI 0xC110 +#define CP_RB_RPTR_WR 0xC108 +#define CP_RB_WPTR 0xC114 +#define CP_RB_WPTR_ADDR 0xC118 +#define CP_RB_WPTR_ADDR_HI 0xC11C +#define CP_RB_WPTR_DELAY 0x8704 +#define CP_SEM_WAIT_TIMER 0x85BC + +#define DB_DEBUG3 0x98B0 +#define DB_CLK_OFF_DELAY(x) ((x) << 11) +#define DB_DEBUG4 0x9B8C +#define DISABLE_TILE_COVERED_FOR_PS_ITER (1 << 6) + +#define DCP_TILING_CONFIG 0x6CA0 +#define PIPE_TILING(x) ((x) << 1) +#define BANK_TILING(x) ((x) << 4) +#define GROUP_SIZE(x) ((x) << 6) +#define ROW_TILING(x) ((x) << 8) +#define BANK_SWAPS(x) ((x) << 11) +#define SAMPLE_SPLIT(x) ((x) << 14) +#define BACKEND_MAP(x) ((x) << 16) + +#define GB_TILING_CONFIG 0x98F0 + +#define GC_USER_SHADER_PIPE_CONFIG 0x8954 +#define INACTIVE_QD_PIPES(x) ((x) << 8) +#define INACTIVE_QD_PIPES_MASK 0x0000FF00 +#define INACTIVE_SIMDS(x) ((x) << 16) +#define INACTIVE_SIMDS_MASK 0x00FF0000 + +#define GRBM_CNTL 0x8000 +#define GRBM_READ_TIMEOUT(x) ((x) << 0) +#define GRBM_SOFT_RESET 0x8020 +#define SOFT_RESET_CP (1<<0) +#define GRBM_STATUS 0x8010 +#define CMDFIFO_AVAIL_MASK 0x0000000F +#define GUI_ACTIVE (1<<31) +#define GRBM_STATUS2 0x8014 + +#define HDP_HOST_PATH_CNTL 0x2C00 +#define HDP_NONSURFACE_BASE 0x2C04 +#define HDP_NONSURFACE_INFO 0x2C08 +#define HDP_NONSURFACE_SIZE 0x2C0C +#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 +#define HDP_TILING_CONFIG 0x2F3C + +#define MC_ARB_RAMCFG 0x2760 +#define NOOFBANK_SHIFT 0 +#define NOOFBANK_MASK 0x00000003 +#define NOOFRANK_SHIFT 2 +#define NOOFRANK_MASK 0x00000004 +#define NOOFROWS_SHIFT 3 +#define NOOFROWS_MASK 0x00000038 +#define NOOFCOLS_SHIFT 6 +#define NOOFCOLS_MASK 0x000000C0 +#define CHANSIZE_SHIFT 8 +#define CHANSIZE_MASK 0x00000100 +#define BURSTLENGTH_SHIFT 9 +#define BURSTLENGTH_MASK 0x00000200 +#define MC_VM_AGP_TOP 0x2028 +#define MC_VM_AGP_BOT 0x202C +#define MC_VM_AGP_BASE 0x2030 +#define MC_VM_FB_LOCATION 0x2024 +#define MC_VM_MB_L1_TLB0_CNTL 0x2234 +#define MC_VM_MB_L1_TLB1_CNTL 0x2238 +#define MC_VM_MB_L1_TLB2_CNTL 0x223C +#define MC_VM_MB_L1_TLB3_CNTL 0x2240 +#define ENABLE_L1_TLB (1 << 0) +#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1) +#define SYSTEM_ACCESS_MODE_PA_ONLY (0 << 3) +#define SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 3) +#define SYSTEM_ACCESS_MODE_IN_SYS (2 << 3) +#define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 3) +#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 5) +#define EFFECTIVE_L1_TLB_SIZE(x) ((x)<<15) +#define EFFECTIVE_L1_QUEUE_SIZE(x) ((x)<<18) +#define MC_VM_MD_L1_TLB0_CNTL 0x2654 +#define MC_VM_MD_L1_TLB1_CNTL 0x2658 +#define MC_VM_MD_L1_TLB2_CNTL 0x265C +#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x203C +#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2038 +#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2034 + +#define PA_CL_ENHANCE 0x8A14 +#define CLIP_VTX_REORDER_ENA (1 << 0) +#define NUM_CLIP_SEQ(x) ((x) << 1) +#define PA_SC_AA_CONFIG 0x28C04 +#define PA_SC_CLIPRECT_RULE 0x2820C +#define PA_SC_EDGERULE 0x28230 +#define PA_SC_FIFO_SIZE 0x8BCC +#define SC_PRIM_FIFO_SIZE(x) ((x) << 0) +#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 12) +#define PA_SC_FORCE_EOV_MAX_CNTS 0x8B24 +#define FORCE_EOV_MAX_CLK_CNT(x) ((x)<<0) +#define FORCE_EOV_MAX_REZ_CNT(x) ((x)<<16) +#define PA_SC_LINE_STIPPLE 0x28A0C +#define PA_SC_LINE_STIPPLE_STATE 0x8B10 +#define PA_SC_MODE_CNTL 0x28A4C +#define PA_SC_MULTI_CHIP_CNTL 0x8B20 +#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 20) + +#define SCRATCH_REG0 0x8500 +#define SCRATCH_REG1 0x8504 +#define SCRATCH_REG2 0x8508 +#define SCRATCH_REG3 0x850C +#define SCRATCH_REG4 0x8510 +#define SCRATCH_REG5 0x8514 +#define SCRATCH_REG6 0x8518 +#define SCRATCH_REG7 0x851C +#define SCRATCH_UMSK 0x8540 +#define SCRATCH_ADDR 0x8544 + +#define SMX_DC_CTL0 0xA020 +#define USE_HASH_FUNCTION (1 << 0) +#define CACHE_DEPTH(x) ((x) << 1) +#define FLUSH_ALL_ON_EVENT (1 << 10) +#define STALL_ON_EVENT (1 << 11) +#define SMX_EVENT_CTL 0xA02C +#define ES_FLUSH_CTL(x) ((x) << 0) +#define GS_FLUSH_CTL(x) ((x) << 3) +#define ACK_FLUSH_CTL(x) ((x) << 6) +#define SYNC_FLUSH_CTL (1 << 8) + +#define SPI_CONFIG_CNTL 0x9100 +#define GPR_WRITE_PRIORITY(x) ((x) << 0) +#define DISABLE_INTERP_1 (1 << 5) +#define SPI_CONFIG_CNTL_1 0x913C +#define VTX_DONE_DELAY(x) ((x) << 0) +#define INTERP_ONE_PRIM_PER_ROW (1 << 4) +#define SPI_INPUT_Z 0x286D8 +#define SPI_PS_IN_CONTROL_0 0x286CC +#define NUM_INTERP(x) ((x)<<0) +#define POSITION_ENA (1<<8) +#define POSITION_CENTROID (1<<9) +#define POSITION_ADDR(x) ((x)<<10) +#define PARAM_GEN(x) ((x)<<15) +#define PARAM_GEN_ADDR(x) ((x)<<19) +#define BARYC_SAMPLE_CNTL(x) ((x)<<26) +#define PERSP_GRADIENT_ENA (1<<28) +#define LINEAR_GRADIENT_ENA (1<<29) +#define POSITION_SAMPLE (1<<30) +#define BARYC_AT_SAMPLE_ENA (1<<31) + +#define SQ_CONFIG 0x8C00 +#define VC_ENABLE (1 << 0) +#define EXPORT_SRC_C (1 << 1) +#define DX9_CONSTS (1 << 2) +#define ALU_INST_PREFER_VECTOR (1 << 3) +#define DX10_CLAMP (1 << 4) +#define CLAUSE_SEQ_PRIO(x) ((x) << 8) +#define PS_PRIO(x) ((x) << 24) +#define VS_PRIO(x) ((x) << 26) +#define GS_PRIO(x) ((x) << 28) +#define SQ_DYN_GPR_SIZE_SIMD_AB_0 0x8DB0 +#define SIMDA_RING0(x) ((x)<<0) +#define SIMDA_RING1(x) ((x)<<8) +#define SIMDB_RING0(x) ((x)<<16) +#define SIMDB_RING1(x) ((x)<<24) +#define SQ_DYN_GPR_SIZE_SIMD_AB_1 0x8DB4 +#define SQ_DYN_GPR_SIZE_SIMD_AB_2 0x8DB8 +#define SQ_DYN_GPR_SIZE_SIMD_AB_3 0x8DBC +#define SQ_DYN_GPR_SIZE_SIMD_AB_4 0x8DC0 +#define SQ_DYN_GPR_SIZE_SIMD_AB_5 0x8DC4 +#define SQ_DYN_GPR_SIZE_SIMD_AB_6 0x8DC8 +#define SQ_DYN_GPR_SIZE_SIMD_AB_7 0x8DCC +#define ES_PRIO(x) ((x) << 30) +#define SQ_GPR_RESOURCE_MGMT_1 0x8C04 +#define NUM_PS_GPRS(x) ((x) << 0) +#define NUM_VS_GPRS(x) ((x) << 16) +#define DYN_GPR_ENABLE (1 << 27) +#define NUM_CLAUSE_TEMP_GPRS(x) ((x) << 28) +#define SQ_GPR_RESOURCE_MGMT_2 0x8C08 +#define NUM_GS_GPRS(x) ((x) << 0) +#define NUM_ES_GPRS(x) ((x) << 16) +#define SQ_MS_FIFO_SIZES 0x8CF0 +#define CACHE_FIFO_SIZE(x) ((x) << 0) +#define FETCH_FIFO_HIWATER(x) ((x) << 8) +#define DONE_FIFO_HIWATER(x) ((x) << 16) +#define ALU_UPDATE_FIFO_HIWATER(x) ((x) << 24) +#define SQ_STACK_RESOURCE_MGMT_1 0x8C10 +#define NUM_PS_STACK_ENTRIES(x) ((x) << 0) +#define NUM_VS_STACK_ENTRIES(x) ((x) << 16) +#define SQ_STACK_RESOURCE_MGMT_2 0x8C14 +#define NUM_GS_STACK_ENTRIES(x) ((x) << 0) +#define NUM_ES_STACK_ENTRIES(x) ((x) << 16) +#define SQ_THREAD_RESOURCE_MGMT 0x8C0C +#define NUM_PS_THREADS(x) ((x) << 0) +#define NUM_VS_THREADS(x) ((x) << 8) +#define NUM_GS_THREADS(x) ((x) << 16) +#define NUM_ES_THREADS(x) ((x) << 24) + +#define SX_DEBUG_1 0x9058 +#define ENABLE_NEW_SMX_ADDRESS (1 << 16) +#define SX_EXPORT_BUFFER_SIZES 0x900C +#define COLOR_BUFFER_SIZE(x) ((x) << 0) +#define POSITION_BUFFER_SIZE(x) ((x) << 8) +#define SMX_BUFFER_SIZE(x) ((x) << 16) +#define SX_MISC 0x28350 + +#define TA_CNTL_AUX 0x9508 +#define DISABLE_CUBE_WRAP (1 << 0) +#define DISABLE_CUBE_ANISO (1 << 1) +#define SYNC_GRADIENT (1 << 24) +#define SYNC_WALKER (1 << 25) +#define SYNC_ALIGNER (1 << 26) +#define BILINEAR_PRECISION_6_BIT (0 << 31) +#define BILINEAR_PRECISION_8_BIT (1 << 31) + +#define TCP_CNTL 0x9610 + +#define VGT_CACHE_INVALIDATION 0x88C4 +#define CACHE_INVALIDATION(x) ((x)<<0) +#define VC_ONLY 0 +#define TC_ONLY 1 +#define VC_AND_TC 2 +#define AUTO_INVLD_EN(x) ((x) << 6) +#define NO_AUTO 0 +#define ES_AUTO 1 +#define GS_AUTO 2 +#define ES_AND_GS_AUTO 3 +#define VGT_ES_PER_GS 0x88CC +#define VGT_GS_PER_ES 0x88C8 +#define VGT_GS_PER_VS 0x88E8 +#define VGT_GS_VERTEX_REUSE 0x88D4 +#define VGT_NUM_INSTANCES 0x8974 +#define VGT_OUT_DEALLOC_CNTL 0x28C5C +#define DEALLOC_DIST_MASK 0x0000007F +#define VGT_STRMOUT_EN 0x28AB0 +#define VGT_VERTEX_REUSE_BLOCK_CNTL 0x28C58 +#define VTX_REUSE_DEPTH_MASK 0x000000FF + +#define VM_CONTEXT0_CNTL 0x1410 +#define ENABLE_CONTEXT (1 << 0) +#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) +#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) +#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153C +#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C +#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155C +#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x1518 +#define VM_L2_CNTL 0x1400 +#define ENABLE_L2_CACHE (1 << 0) +#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1) +#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9) +#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 14) +#define VM_L2_CNTL2 0x1404 +#define INVALIDATE_ALL_L1_TLBS (1 << 0) +#define INVALIDATE_L2_CACHE (1 << 1) +#define VM_L2_CNTL3 0x1408 +#define BANK_SELECT(x) ((x) << 0) +#define CACHE_UPDATE_MODE(x) ((x) << 6) +#define VM_L2_STATUS 0x140C +#define L2_BUSY (1 << 0) + +#define WAIT_UNTIL 0x8040 + +#endif -- cgit v1.2.3-70-g09d2 From c000273ebc830c27b8c9e03d5f4c147d3e310f48 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Thu, 10 Sep 2009 13:47:09 +0200 Subject: drm/radeon/kms: R3XX/R4XX AGP asic use PCI GART not PCIE GART R3XX/R4XX AGP asic use the old PCI GART block, not the new PCIE GART. Make sure we pick the right GART when disabling AGP. Signed-off-by: Jerome Glisse Acked-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r300.c | 6 ++++++ drivers/gpu/drm/radeon/r420.c | 9 +++++++-- drivers/gpu/drm/radeon/radeon.h | 4 ++++ drivers/gpu/drm/radeon/radeon_device.c | 2 +- 4 files changed, 18 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/radeon/r300.c') diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index a5f82f7beed..9c17b786982 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -181,6 +181,12 @@ int r300_gart_enable(struct radeon_device *rdev) rdev->asic->gart_set_page = &rv370_pcie_gart_set_page; return rv370_pcie_gart_enable(rdev); } + if (rdev->flags & RADEON_IS_PCI) { + rdev->asic->gart_disable = &r100_pci_gart_disable; + rdev->asic->gart_tlb_flush = &r100_pci_gart_tlb_flush; + rdev->asic->gart_set_page = &r100_pci_gart_set_page; + return r100_pci_gart_enable(rdev); + } return r100_pci_gart_enable(rdev); } diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index 96303f064db..551d6996d3f 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -101,8 +101,13 @@ int r420_mc_init(struct radeon_device *rdev) void r420_mc_fini(struct radeon_device *rdev) { - rv370_pcie_gart_disable(rdev); - radeon_gart_table_vram_free(rdev); + if (rdev->flags & RADEON_IS_PCIE) { + rv370_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + } else { + r100_pci_gart_disable(rdev); + radeon_gart_table_ram_free(rdev); + } radeon_gart_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index fa84c77577a..d6ff4e01206 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -935,6 +935,10 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) /* r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */ void r100_cp_disable(struct radeon_device *rdev); +void r100_pci_gart_tlb_flush(struct radeon_device *rdev); +int r100_pci_gart_enable(struct radeon_device *rdev); +void r100_pci_gart_disable(struct radeon_device *rdev); +int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr); /* r420,r423,rv410 */ u32 r420_mc_rreg(struct radeon_device *rdev, u32 reg); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 05e1af0156c..bf6939497e1 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -503,7 +503,7 @@ int radeon_device_init(struct radeon_device *rdev, if (radeon_agpmode == -1) { rdev->flags &= ~RADEON_IS_AGP; - if (rdev->family > CHIP_RV515 || + if (rdev->family >= CHIP_RV515 || rdev->family == CHIP_RV380 || rdev->family == CHIP_RV410 || rdev->family == CHIP_R423) { -- cgit v1.2.3-70-g09d2 From 9f022ddfb23793b475ff7e57ac08a766dd5d31bd Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 11 Sep 2009 15:35:22 +0200 Subject: drm/radeon/kms: convert r4xx to new init path This convert r4xx to new init path it also fix few bugs. Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r100.c | 114 +++++++- drivers/gpu/drm/radeon/r100d.h | 471 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/r300.c | 50 +++- drivers/gpu/drm/radeon/r300d.h | 25 ++ drivers/gpu/drm/radeon/r420.c | 293 +++++++++++++------- drivers/gpu/drm/radeon/r420d.h | 206 ++++++++++++++ drivers/gpu/drm/radeon/radeon.h | 32 ++- drivers/gpu/drm/radeon/radeon_asic.h | 33 +-- drivers/gpu/drm/radeon/radeon_device.c | 19 +- drivers/gpu/drm/radeon/radeon_ring.c | 2 + 10 files changed, 1121 insertions(+), 124 deletions(-) (limited to 'drivers/gpu/drm/radeon/r300.c') diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 4dd5ca50c0c..47263d3ede9 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -299,6 +299,17 @@ int r100_irq_set(struct radeon_device *rdev) return 0; } +void r100_irq_disable(struct radeon_device *rdev) +{ + u32 tmp; + + WREG32(R_000040_GEN_INT_CNTL, 0); + /* Wait and acknowledge irq */ + mdelay(1); + tmp = RREG32(R_000044_GEN_INT_STATUS); + WREG32(R_000044_GEN_INT_STATUS, tmp); +} + static inline uint32_t r100_irq_ack(struct radeon_device *rdev) { uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS); @@ -396,14 +407,21 @@ int r100_wb_init(struct radeon_device *rdev) return r; } } - WREG32(RADEON_SCRATCH_ADDR, rdev->wb.gpu_addr); - WREG32(RADEON_CP_RB_RPTR_ADDR, rdev->wb.gpu_addr + 1024); - WREG32(RADEON_SCRATCH_UMSK, 0xff); + WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr); + WREG32(R_00070C_CP_RB_RPTR_ADDR, + S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + 1024) >> 2)); + WREG32(R_000770_SCRATCH_UMSK, 0xff); return 0; } +void r100_wb_disable(struct radeon_device *rdev) +{ + WREG32(R_000770_SCRATCH_UMSK, 0); +} + void r100_wb_fini(struct radeon_device *rdev) { + r100_wb_disable(rdev); if (rdev->wb.wb_obj) { radeon_object_kunmap(rdev->wb.wb_obj); radeon_object_unpin(rdev->wb.wb_obj); @@ -1581,11 +1599,12 @@ static int r100_packet3_check(struct radeon_cs_parser *p, int r100_cs_parse(struct radeon_cs_parser *p) { struct radeon_cs_packet pkt; - struct r100_cs_track track; + struct r100_cs_track *track; int r; - r100_cs_track_clear(p->rdev, &track); - p->track = &track; + track = kzalloc(sizeof(*track), GFP_KERNEL); + r100_cs_track_clear(p->rdev, track); + p->track = track; do { r = r100_cs_packet_parse(p, &pkt, p->idx); if (r) { @@ -3085,3 +3104,86 @@ int r100_ib_test(struct radeon_device *rdev) radeon_ib_free(rdev, &ib); return r; } + +void r100_ib_fini(struct radeon_device *rdev) +{ + radeon_ib_pool_fini(rdev); +} + +int r100_ib_init(struct radeon_device *rdev) +{ + int r; + + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "failled initializing IB pool (%d).\n", r); + r100_ib_fini(rdev); + return r; + } + r = r100_ib_test(rdev); + if (r) { + dev_err(rdev->dev, "failled testing IB (%d).\n", r); + r100_ib_fini(rdev); + return r; + } + return 0; +} + +void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save) +{ + /* Shutdown CP we shouldn't need to do that but better be safe than + * sorry + */ + rdev->cp.ready = false; + WREG32(R_000740_CP_CSQ_CNTL, 0); + + /* Save few CRTC registers */ + save->GENMO_WT = RREG32(R_0003C0_GENMO_WT); + save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL); + save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL); + save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET); + if (!(rdev->flags & RADEON_SINGLE_CRTC)) { + save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL); + save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET); + } + + /* Disable VGA aperture access */ + WREG32(R_0003C0_GENMO_WT, C_0003C0_VGA_RAM_EN & save->GENMO_WT); + /* Disable cursor, overlay, crtc */ + WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1)); + WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL | + S_000054_CRTC_DISPLAY_DIS(1)); + WREG32(R_000050_CRTC_GEN_CNTL, + (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) | + S_000050_CRTC_DISP_REQ_EN_B(1)); + WREG32(R_000420_OV0_SCALE_CNTL, + C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL)); + WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET); + if (!(rdev->flags & RADEON_SINGLE_CRTC)) { + WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET | + S_000360_CUR2_LOCK(1)); + WREG32(R_0003F8_CRTC2_GEN_CNTL, + (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) | + S_0003F8_CRTC2_DISPLAY_DIS(1) | + S_0003F8_CRTC2_DISP_REQ_EN_B(1)); + WREG32(R_000360_CUR2_OFFSET, + C_000360_CUR2_LOCK & save->CUR2_OFFSET); + } +} + +void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save) +{ + /* Update base address for crtc */ + WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_location); + if (!(rdev->flags & RADEON_SINGLE_CRTC)) { + WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, + rdev->mc.vram_location); + } + /* Restore CRTC registers */ + WREG32(R_0003C0_GENMO_WT, save->GENMO_WT); + WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL); + WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL); + if (!(rdev->flags & RADEON_SINGLE_CRTC)) { + WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL); + } +} diff --git a/drivers/gpu/drm/radeon/r100d.h b/drivers/gpu/drm/radeon/r100d.h index 1d951ab77dc..c4b257ec920 100644 --- a/drivers/gpu/drm/radeon/r100d.h +++ b/drivers/gpu/drm/radeon/r100d.h @@ -74,6 +74,477 @@ #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) /* Registers */ +#define R_000040_GEN_INT_CNTL 0x000040 +#define S_000040_CRTC_VBLANK(x) (((x) & 0x1) << 0) +#define G_000040_CRTC_VBLANK(x) (((x) >> 0) & 0x1) +#define C_000040_CRTC_VBLANK 0xFFFFFFFE +#define S_000040_CRTC_VLINE(x) (((x) & 0x1) << 1) +#define G_000040_CRTC_VLINE(x) (((x) >> 1) & 0x1) +#define C_000040_CRTC_VLINE 0xFFFFFFFD +#define S_000040_CRTC_VSYNC(x) (((x) & 0x1) << 2) +#define G_000040_CRTC_VSYNC(x) (((x) >> 2) & 0x1) +#define C_000040_CRTC_VSYNC 0xFFFFFFFB +#define S_000040_SNAPSHOT(x) (((x) & 0x1) << 3) +#define G_000040_SNAPSHOT(x) (((x) >> 3) & 0x1) +#define C_000040_SNAPSHOT 0xFFFFFFF7 +#define S_000040_FP_DETECT(x) (((x) & 0x1) << 4) +#define G_000040_FP_DETECT(x) (((x) >> 4) & 0x1) +#define C_000040_FP_DETECT 0xFFFFFFEF +#define S_000040_CRTC2_VLINE(x) (((x) & 0x1) << 5) +#define G_000040_CRTC2_VLINE(x) (((x) >> 5) & 0x1) +#define C_000040_CRTC2_VLINE 0xFFFFFFDF +#define S_000040_DMA_VIPH0_INT_EN(x) (((x) & 0x1) << 12) +#define G_000040_DMA_VIPH0_INT_EN(x) (((x) >> 12) & 0x1) +#define C_000040_DMA_VIPH0_INT_EN 0xFFFFEFFF +#define S_000040_CRTC2_VSYNC(x) (((x) & 0x1) << 6) +#define G_000040_CRTC2_VSYNC(x) (((x) >> 6) & 0x1) +#define C_000040_CRTC2_VSYNC 0xFFFFFFBF +#define S_000040_SNAPSHOT2(x) (((x) & 0x1) << 7) +#define G_000040_SNAPSHOT2(x) (((x) >> 7) & 0x1) +#define C_000040_SNAPSHOT2 0xFFFFFF7F +#define S_000040_CRTC2_VBLANK(x) (((x) & 0x1) << 9) +#define G_000040_CRTC2_VBLANK(x) (((x) >> 9) & 0x1) +#define C_000040_CRTC2_VBLANK 0xFFFFFDFF +#define S_000040_FP2_DETECT(x) (((x) & 0x1) << 10) +#define G_000040_FP2_DETECT(x) (((x) >> 10) & 0x1) +#define C_000040_FP2_DETECT 0xFFFFFBFF +#define S_000040_VSYNC_DIFF_OVER_LIMIT(x) (((x) & 0x1) << 11) +#define G_000040_VSYNC_DIFF_OVER_LIMIT(x) (((x) >> 11) & 0x1) +#define C_000040_VSYNC_DIFF_OVER_LIMIT 0xFFFFF7FF +#define S_000040_DMA_VIPH1_INT_EN(x) (((x) & 0x1) << 13) +#define G_000040_DMA_VIPH1_INT_EN(x) (((x) >> 13) & 0x1) +#define C_000040_DMA_VIPH1_INT_EN 0xFFFFDFFF +#define S_000040_DMA_VIPH2_INT_EN(x) (((x) & 0x1) << 14) +#define G_000040_DMA_VIPH2_INT_EN(x) (((x) >> 14) & 0x1) +#define C_000040_DMA_VIPH2_INT_EN 0xFFFFBFFF +#define S_000040_DMA_VIPH3_INT_EN(x) (((x) & 0x1) << 15) +#define G_000040_DMA_VIPH3_INT_EN(x) (((x) >> 15) & 0x1) +#define C_000040_DMA_VIPH3_INT_EN 0xFFFF7FFF +#define S_000040_I2C_INT_EN(x) (((x) & 0x1) << 17) +#define G_000040_I2C_INT_EN(x) (((x) >> 17) & 0x1) +#define C_000040_I2C_INT_EN 0xFFFDFFFF +#define S_000040_GUI_IDLE(x) (((x) & 0x1) << 19) +#define G_000040_GUI_IDLE(x) (((x) >> 19) & 0x1) +#define C_000040_GUI_IDLE 0xFFF7FFFF +#define S_000040_VIPH_INT_EN(x) (((x) & 0x1) << 24) +#define G_000040_VIPH_INT_EN(x) (((x) >> 24) & 0x1) +#define C_000040_VIPH_INT_EN 0xFEFFFFFF +#define S_000040_SW_INT_EN(x) (((x) & 0x1) << 25) +#define G_000040_SW_INT_EN(x) (((x) >> 25) & 0x1) +#define C_000040_SW_INT_EN 0xFDFFFFFF +#define S_000040_GEYSERVILLE(x) (((x) & 0x1) << 27) +#define G_000040_GEYSERVILLE(x) (((x) >> 27) & 0x1) +#define C_000040_GEYSERVILLE 0xF7FFFFFF +#define S_000040_HDCP_AUTHORIZED_INT(x) (((x) & 0x1) << 28) +#define G_000040_HDCP_AUTHORIZED_INT(x) (((x) >> 28) & 0x1) +#define C_000040_HDCP_AUTHORIZED_INT 0xEFFFFFFF +#define S_000040_DVI_I2C_INT(x) (((x) & 0x1) << 29) +#define G_000040_DVI_I2C_INT(x) (((x) >> 29) & 0x1) +#define C_000040_DVI_I2C_INT 0xDFFFFFFF +#define S_000040_GUIDMA(x) (((x) & 0x1) << 30) +#define G_000040_GUIDMA(x) (((x) >> 30) & 0x1) +#define C_000040_GUIDMA 0xBFFFFFFF +#define S_000040_VIDDMA(x) (((x) & 0x1) << 31) +#define G_000040_VIDDMA(x) (((x) >> 31) & 0x1) +#define C_000040_VIDDMA 0x7FFFFFFF +#define R_000044_GEN_INT_STATUS 0x000044 +#define S_000044_CRTC_VBLANK_STAT(x) (((x) & 0x1) << 0) +#define G_000044_CRTC_VBLANK_STAT(x) (((x) >> 0) & 0x1) +#define C_000044_CRTC_VBLANK_STAT 0xFFFFFFFE +#define S_000044_CRTC_VBLANK_STAT_AK(x) (((x) & 0x1) << 0) +#define G_000044_CRTC_VBLANK_STAT_AK(x) (((x) >> 0) & 0x1) +#define C_000044_CRTC_VBLANK_STAT_AK 0xFFFFFFFE +#define S_000044_CRTC_VLINE_STAT(x) (((x) & 0x1) << 1) +#define G_000044_CRTC_VLINE_STAT(x) (((x) >> 1) & 0x1) +#define C_000044_CRTC_VLINE_STAT 0xFFFFFFFD +#define S_000044_CRTC_VLINE_STAT_AK(x) (((x) & 0x1) << 1) +#define G_000044_CRTC_VLINE_STAT_AK(x) (((x) >> 1) & 0x1) +#define C_000044_CRTC_VLINE_STAT_AK 0xFFFFFFFD +#define S_000044_CRTC_VSYNC_STAT(x) (((x) & 0x1) << 2) +#define G_000044_CRTC_VSYNC_STAT(x) (((x) >> 2) & 0x1) +#define C_000044_CRTC_VSYNC_STAT 0xFFFFFFFB +#define S_000044_CRTC_VSYNC_STAT_AK(x) (((x) & 0x1) << 2) +#define G_000044_CRTC_VSYNC_STAT_AK(x) (((x) >> 2) & 0x1) +#define C_000044_CRTC_VSYNC_STAT_AK 0xFFFFFFFB +#define S_000044_SNAPSHOT_STAT(x) (((x) & 0x1) << 3) +#define G_000044_SNAPSHOT_STAT(x) (((x) >> 3) & 0x1) +#define C_000044_SNAPSHOT_STAT 0xFFFFFFF7 +#define S_000044_SNAPSHOT_STAT_AK(x) (((x) & 0x1) << 3) +#define G_000044_SNAPSHOT_STAT_AK(x) (((x) >> 3) & 0x1) +#define C_000044_SNAPSHOT_STAT_AK 0xFFFFFFF7 +#define S_000044_FP_DETECT_STAT(x) (((x) & 0x1) << 4) +#define G_000044_FP_DETECT_STAT(x) (((x) >> 4) & 0x1) +#define C_000044_FP_DETECT_STAT 0xFFFFFFEF +#define S_000044_FP_DETECT_STAT_AK(x) (((x) & 0x1) << 4) +#define G_000044_FP_DETECT_STAT_AK(x) (((x) >> 4) & 0x1) +#define C_000044_FP_DETECT_STAT_AK 0xFFFFFFEF +#define S_000044_CRTC2_VLINE_STAT(x) (((x) & 0x1) << 5) +#define G_000044_CRTC2_VLINE_STAT(x) (((x) >> 5) & 0x1) +#define C_000044_CRTC2_VLINE_STAT 0xFFFFFFDF +#define S_000044_CRTC2_VLINE_STAT_AK(x) (((x) & 0x1) << 5) +#define G_000044_CRTC2_VLINE_STAT_AK(x) (((x) >> 5) & 0x1) +#define C_000044_CRTC2_VLINE_STAT_AK 0xFFFFFFDF +#define S_000044_CRTC2_VSYNC_STAT(x) (((x) & 0x1) << 6) +#define G_000044_CRTC2_VSYNC_STAT(x) (((x) >> 6) & 0x1) +#define C_000044_CRTC2_VSYNC_STAT 0xFFFFFFBF +#define S_000044_CRTC2_VSYNC_STAT_AK(x) (((x) & 0x1) << 6) +#define G_000044_CRTC2_VSYNC_STAT_AK(x) (((x) >> 6) & 0x1) +#define C_000044_CRTC2_VSYNC_STAT_AK 0xFFFFFFBF +#define S_000044_SNAPSHOT2_STAT(x) (((x) & 0x1) << 7) +#define G_000044_SNAPSHOT2_STAT(x) (((x) >> 7) & 0x1) +#define C_000044_SNAPSHOT2_STAT 0xFFFFFF7F +#define S_000044_SNAPSHOT2_STAT_AK(x) (((x) & 0x1) << 7) +#define G_000044_SNAPSHOT2_STAT_AK(x) (((x) >> 7) & 0x1) +#define C_000044_SNAPSHOT2_STAT_AK 0xFFFFFF7F +#define S_000044_CAP0_INT_ACTIVE(x) (((x) & 0x1) << 8) +#define G_000044_CAP0_INT_ACTIVE(x) (((x) >> 8) & 0x1) +#define C_000044_CAP0_INT_ACTIVE 0xFFFFFEFF +#define S_000044_CRTC2_VBLANK_STAT(x) (((x) & 0x1) << 9) +#define G_000044_CRTC2_VBLANK_STAT(x) (((x) >> 9) & 0x1) +#define C_000044_CRTC2_VBLANK_STAT 0xFFFFFDFF +#define S_000044_CRTC2_VBLANK_STAT_AK(x) (((x) & 0x1) << 9) +#define G_000044_CRTC2_VBLANK_STAT_AK(x) (((x) >> 9) & 0x1) +#define C_000044_CRTC2_VBLANK_STAT_AK 0xFFFFFDFF +#define S_000044_FP2_DETECT_STAT(x) (((x) & 0x1) << 10) +#define G_000044_FP2_DETECT_STAT(x) (((x) >> 10) & 0x1) +#define C_000044_FP2_DETECT_STAT 0xFFFFFBFF +#define S_000044_FP2_DETECT_STAT_AK(x) (((x) & 0x1) << 10) +#define G_000044_FP2_DETECT_STAT_AK(x) (((x) >> 10) & 0x1) +#define C_000044_FP2_DETECT_STAT_AK 0xFFFFFBFF +#define S_000044_VSYNC_DIFF_OVER_LIMIT_STAT(x) (((x) & 0x1) << 11) +#define G_000044_VSYNC_DIFF_OVER_LIMIT_STAT(x) (((x) >> 11) & 0x1) +#define C_000044_VSYNC_DIFF_OVER_LIMIT_STAT 0xFFFFF7FF +#define S_000044_VSYNC_DIFF_OVER_LIMIT_STAT_AK(x) (((x) & 0x1) << 11) +#define G_000044_VSYNC_DIFF_OVER_LIMIT_STAT_AK(x) (((x) >> 11) & 0x1) +#define C_000044_VSYNC_DIFF_OVER_LIMIT_STAT_AK 0xFFFFF7FF +#define S_000044_DMA_VIPH0_INT(x) (((x) & 0x1) << 12) +#define G_000044_DMA_VIPH0_INT(x) (((x) >> 12) & 0x1) +#define C_000044_DMA_VIPH0_INT 0xFFFFEFFF +#define S_000044_DMA_VIPH0_INT_AK(x) (((x) & 0x1) << 12) +#define G_000044_DMA_VIPH0_INT_AK(x) (((x) >> 12) & 0x1) +#define C_000044_DMA_VIPH0_INT_AK 0xFFFFEFFF +#define S_000044_DMA_VIPH1_INT(x) (((x) & 0x1) << 13) +#define G_000044_DMA_VIPH1_INT(x) (((x) >> 13) & 0x1) +#define C_000044_DMA_VIPH1_INT 0xFFFFDFFF +#define S_000044_DMA_VIPH1_INT_AK(x) (((x) & 0x1) << 13) +#define G_000044_DMA_VIPH1_INT_AK(x) (((x) >> 13) & 0x1) +#define C_000044_DMA_VIPH1_INT_AK 0xFFFFDFFF +#define S_000044_DMA_VIPH2_INT(x) (((x) & 0x1) << 14) +#define G_000044_DMA_VIPH2_INT(x) (((x) >> 14) & 0x1) +#define C_000044_DMA_VIPH2_INT 0xFFFFBFFF +#define S_000044_DMA_VIPH2_INT_AK(x) (((x) & 0x1) << 14) +#define G_000044_DMA_VIPH2_INT_AK(x) (((x) >> 14) & 0x1) +#define C_000044_DMA_VIPH2_INT_AK 0xFFFFBFFF +#define S_000044_DMA_VIPH3_INT(x) (((x) & 0x1) << 15) +#define G_000044_DMA_VIPH3_INT(x) (((x) >> 15) & 0x1) +#define C_000044_DMA_VIPH3_INT 0xFFFF7FFF +#define S_000044_DMA_VIPH3_INT_AK(x) (((x) & 0x1) << 15) +#define G_000044_DMA_VIPH3_INT_AK(x) (((x) >> 15) & 0x1) +#define C_000044_DMA_VIPH3_INT_AK 0xFFFF7FFF +#define S_000044_I2C_INT(x) (((x) & 0x1) << 17) +#define G_000044_I2C_INT(x) (((x) >> 17) & 0x1) +#define C_000044_I2C_INT 0xFFFDFFFF +#define S_000044_I2C_INT_AK(x) (((x) & 0x1) << 17) +#define G_000044_I2C_INT_AK(x) (((x) >> 17) & 0x1) +#define C_000044_I2C_INT_AK 0xFFFDFFFF +#define S_000044_GUI_IDLE_STAT(x) (((x) & 0x1) << 19) +#define G_000044_GUI_IDLE_STAT(x) (((x) >> 19) & 0x1) +#define C_000044_GUI_IDLE_STAT 0xFFF7FFFF +#define S_000044_GUI_IDLE_STAT_AK(x) (((x) & 0x1) << 19) +#define G_000044_GUI_IDLE_STAT_AK(x) (((x) >> 19) & 0x1) +#define C_000044_GUI_IDLE_STAT_AK 0xFFF7FFFF +#define S_000044_VIPH_INT(x) (((x) & 0x1) << 24) +#define G_000044_VIPH_INT(x) (((x) >> 24) & 0x1) +#define C_000044_VIPH_INT 0xFEFFFFFF +#define S_000044_SW_INT(x) (((x) & 0x1) << 25) +#define G_000044_SW_INT(x) (((x) >> 25) & 0x1) +#define C_000044_SW_INT 0xFDFFFFFF +#define S_000044_SW_INT_AK(x) (((x) & 0x1) << 25) +#define G_000044_SW_INT_AK(x) (((x) >> 25) & 0x1) +#define C_000044_SW_INT_AK 0xFDFFFFFF +#define S_000044_SW_INT_SET(x) (((x) & 0x1) << 26) +#define G_000044_SW_INT_SET(x) (((x) >> 26) & 0x1) +#define C_000044_SW_INT_SET 0xFBFFFFFF +#define S_000044_GEYSERVILLE_STAT(x) (((x) & 0x1) << 27) +#define G_000044_GEYSERVILLE_STAT(x) (((x) >> 27) & 0x1) +#define C_000044_GEYSERVILLE_STAT 0xF7FFFFFF +#define S_000044_GEYSERVILLE_STAT_AK(x) (((x) & 0x1) << 27) +#define G_000044_GEYSERVILLE_STAT_AK(x) (((x) >> 27) & 0x1) +#define C_000044_GEYSERVILLE_STAT_AK 0xF7FFFFFF +#define S_000044_HDCP_AUTHORIZED_INT_STAT(x) (((x) & 0x1) << 28) +#define G_000044_HDCP_AUTHORIZED_INT_STAT(x) (((x) >> 28) & 0x1) +#define C_000044_HDCP_AUTHORIZED_INT_STAT 0xEFFFFFFF +#define S_000044_HDCP_AUTHORIZED_INT_AK(x) (((x) & 0x1) << 28) +#define G_000044_HDCP_AUTHORIZED_INT_AK(x) (((x) >> 28) & 0x1) +#define C_000044_HDCP_AUTHORIZED_INT_AK 0xEFFFFFFF +#define S_000044_DVI_I2C_INT_STAT(x) (((x) & 0x1) << 29) +#define G_000044_DVI_I2C_INT_STAT(x) (((x) >> 29) & 0x1) +#define C_000044_DVI_I2C_INT_STAT 0xDFFFFFFF +#define S_000044_DVI_I2C_INT_AK(x) (((x) & 0x1) << 29) +#define G_000044_DVI_I2C_INT_AK(x) (((x) >> 29) & 0x1) +#define C_000044_DVI_I2C_INT_AK 0xDFFFFFFF +#define S_000044_GUIDMA_STAT(x) (((x) & 0x1) << 30) +#define G_000044_GUIDMA_STAT(x) (((x) >> 30) & 0x1) +#define C_000044_GUIDMA_STAT 0xBFFFFFFF +#define S_000044_GUIDMA_AK(x) (((x) & 0x1) << 30) +#define G_000044_GUIDMA_AK(x) (((x) >> 30) & 0x1) +#define C_000044_GUIDMA_AK 0xBFFFFFFF +#define S_000044_VIDDMA_STAT(x) (((x) & 0x1) << 31) +#define G_000044_VIDDMA_STAT(x) (((x) >> 31) & 0x1) +#define C_000044_VIDDMA_STAT 0x7FFFFFFF +#define S_000044_VIDDMA_AK(x) (((x) & 0x1) << 31) +#define G_000044_VIDDMA_AK(x) (((x) >> 31) & 0x1) +#define C_000044_VIDDMA_AK 0x7FFFFFFF +#define R_000050_CRTC_GEN_CNTL 0x000050 +#define S_000050_CRTC_DBL_SCAN_EN(x) (((x) & 0x1) << 0) +#define G_000050_CRTC_DBL_SCAN_EN(x) (((x) >> 0) & 0x1) +#define C_000050_CRTC_DBL_SCAN_EN 0xFFFFFFFE +#define S_000050_CRTC_INTERLACE_EN(x) (((x) & 0x1) << 1) +#define G_000050_CRTC_INTERLACE_EN(x) (((x) >> 1) & 0x1) +#define C_000050_CRTC_INTERLACE_EN 0xFFFFFFFD +#define S_000050_CRTC_C_SYNC_EN(x) (((x) & 0x1) << 4) +#define G_000050_CRTC_C_SYNC_EN(x) (((x) >> 4) & 0x1) +#define C_000050_CRTC_C_SYNC_EN 0xFFFFFFEF +#define S_000050_CRTC_PIX_WIDTH(x) (((x) & 0xF) << 8) +#define G_000050_CRTC_PIX_WIDTH(x) (((x) >> 8) & 0xF) +#define C_000050_CRTC_PIX_WIDTH 0xFFFFF0FF +#define S_000050_CRTC_ICON_EN(x) (((x) & 0x1) << 15) +#define G_000050_CRTC_ICON_EN(x) (((x) >> 15) & 0x1) +#define C_000050_CRTC_ICON_EN 0xFFFF7FFF +#define S_000050_CRTC_CUR_EN(x) (((x) & 0x1) << 16) +#define G_000050_CRTC_CUR_EN(x) (((x) >> 16) & 0x1) +#define C_000050_CRTC_CUR_EN 0xFFFEFFFF +#define S_000050_CRTC_VSTAT_MODE(x) (((x) & 0x3) << 17) +#define G_000050_CRTC_VSTAT_MODE(x) (((x) >> 17) & 0x3) +#define C_000050_CRTC_VSTAT_MODE 0xFFF9FFFF +#define S_000050_CRTC_CUR_MODE(x) (((x) & 0x7) << 20) +#define G_000050_CRTC_CUR_MODE(x) (((x) >> 20) & 0x7) +#define C_000050_CRTC_CUR_MODE 0xFF8FFFFF +#define S_000050_CRTC_EXT_DISP_EN(x) (((x) & 0x1) << 24) +#define G_000050_CRTC_EXT_DISP_EN(x) (((x) >> 24) & 0x1) +#define C_000050_CRTC_EXT_DISP_EN 0xFEFFFFFF +#define S_000050_CRTC_EN(x) (((x) & 0x1) << 25) +#define G_000050_CRTC_EN(x) (((x) >> 25) & 0x1) +#define C_000050_CRTC_EN 0xFDFFFFFF +#define S_000050_CRTC_DISP_REQ_EN_B(x) (((x) & 0x1) << 26) +#define G_000050_CRTC_DISP_REQ_EN_B(x) (((x) >> 26) & 0x1) +#define C_000050_CRTC_DISP_REQ_EN_B 0xFBFFFFFF +#define R_000054_CRTC_EXT_CNTL 0x000054 +#define S_000054_CRTC_VGA_XOVERSCAN(x) (((x) & 0x1) << 0) +#define G_000054_CRTC_VGA_XOVERSCAN(x) (((x) >> 0) & 0x1) +#define C_000054_CRTC_VGA_XOVERSCAN 0xFFFFFFFE +#define S_000054_VGA_BLINK_RATE(x) (((x) & 0x3) << 1) +#define G_000054_VGA_BLINK_RATE(x) (((x) >> 1) & 0x3) +#define C_000054_VGA_BLINK_RATE 0xFFFFFFF9 +#define S_000054_VGA_ATI_LINEAR(x) (((x) & 0x1) << 3) +#define G_000054_VGA_ATI_LINEAR(x) (((x) >> 3) & 0x1) +#define C_000054_VGA_ATI_LINEAR 0xFFFFFFF7 +#define S_000054_VGA_128KAP_PAGING(x) (((x) & 0x1) << 4) +#define G_000054_VGA_128KAP_PAGING(x) (((x) >> 4) & 0x1) +#define C_000054_VGA_128KAP_PAGING 0xFFFFFFEF +#define S_000054_VGA_TEXT_132(x) (((x) & 0x1) << 5) +#define G_000054_VGA_TEXT_132(x) (((x) >> 5) & 0x1) +#define C_000054_VGA_TEXT_132 0xFFFFFFDF +#define S_000054_VGA_XCRT_CNT_EN(x) (((x) & 0x1) << 6) +#define G_000054_VGA_XCRT_CNT_EN(x) (((x) >> 6) & 0x1) +#define C_000054_VGA_XCRT_CNT_EN 0xFFFFFFBF +#define S_000054_CRTC_HSYNC_DIS(x) (((x) & 0x1) << 8) +#define G_000054_CRTC_HSYNC_DIS(x) (((x) >> 8) & 0x1) +#define C_000054_CRTC_HSYNC_DIS 0xFFFFFEFF +#define S_000054_CRTC_VSYNC_DIS(x) (((x) & 0x1) << 9) +#define G_000054_CRTC_VSYNC_DIS(x) (((x) >> 9) & 0x1) +#define C_000054_CRTC_VSYNC_DIS 0xFFFFFDFF +#define S_000054_CRTC_DISPLAY_DIS(x) (((x) & 0x1) << 10) +#define G_000054_CRTC_DISPLAY_DIS(x) (((x) >> 10) & 0x1) +#define C_000054_CRTC_DISPLAY_DIS 0xFFFFFBFF +#define S_000054_CRTC_SYNC_TRISTATE(x) (((x) & 0x1) << 11) +#define G_000054_CRTC_SYNC_TRISTATE(x) (((x) >> 11) & 0x1) +#define C_000054_CRTC_SYNC_TRISTATE 0xFFFFF7FF +#define S_000054_CRTC_HSYNC_TRISTATE(x) (((x) & 0x1) << 12) +#define G_000054_CRTC_HSYNC_TRISTATE(x) (((x) >> 12) & 0x1) +#define C_000054_CRTC_HSYNC_TRISTATE 0xFFFFEFFF +#define S_000054_CRTC_VSYNC_TRISTATE(x) (((x) & 0x1) << 13) +#define G_000054_CRTC_VSYNC_TRISTATE(x) (((x) >> 13) & 0x1) +#define C_000054_CRTC_VSYNC_TRISTATE 0xFFFFDFFF +#define S_000054_CRT_ON(x) (((x) & 0x1) << 15) +#define G_000054_CRT_ON(x) (((x) >> 15) & 0x1) +#define C_000054_CRT_ON 0xFFFF7FFF +#define S_000054_VGA_CUR_B_TEST(x) (((x) & 0x1) << 17) +#define G_000054_VGA_CUR_B_TEST(x) (((x) >> 17) & 0x1) +#define C_000054_VGA_CUR_B_TEST 0xFFFDFFFF +#define S_000054_VGA_PACK_DIS(x) (((x) & 0x1) << 18) +#define G_000054_VGA_PACK_DIS(x) (((x) >> 18) & 0x1) +#define C_000054_VGA_PACK_DIS 0xFFFBFFFF +#define S_000054_VGA_MEM_PS_EN(x) (((x) & 0x1) << 19) +#define G_000054_VGA_MEM_PS_EN(x) (((x) >> 19) & 0x1) +#define C_000054_VGA_MEM_PS_EN 0xFFF7FFFF +#define S_000054_VCRTC_IDX_MASTER(x) (((x) & 0x7F) << 24) +#define G_000054_VCRTC_IDX_MASTER(x) (((x) >> 24) & 0x7F) +#define C_000054_VCRTC_IDX_MASTER 0x80FFFFFF +#define R_00023C_DISPLAY_BASE_ADDR 0x00023C +#define S_00023C_DISPLAY_BASE_ADDR(x) (((x) & 0xFFFFFFFF) << 0) +#define G_00023C_DISPLAY_BASE_ADDR(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_00023C_DISPLAY_BASE_ADDR 0x00000000 +#define R_000260_CUR_OFFSET 0x000260 +#define S_000260_CUR_OFFSET(x) (((x) & 0x7FFFFFF) << 0) +#define G_000260_CUR_OFFSET(x) (((x) >> 0) & 0x7FFFFFF) +#define C_000260_CUR_OFFSET 0xF8000000 +#define S_000260_CUR_LOCK(x) (((x) & 0x1) << 31) +#define G_000260_CUR_LOCK(x) (((x) >> 31) & 0x1) +#define C_000260_CUR_LOCK 0x7FFFFFFF +#define R_00033C_CRTC2_DISPLAY_BASE_ADDR 0x00033C +#define S_00033C_CRTC2_DISPLAY_BASE_ADDR(x) (((x) & 0xFFFFFFFF) << 0) +#define G_00033C_CRTC2_DISPLAY_BASE_ADDR(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_00033C_CRTC2_DISPLAY_BASE_ADDR 0x00000000 +#define R_000360_CUR2_OFFSET 0x000360 +#define S_000360_CUR2_OFFSET(x) (((x) & 0x7FFFFFF) << 0) +#define G_000360_CUR2_OFFSET(x) (((x) >> 0) & 0x7FFFFFF) +#define C_000360_CUR2_OFFSET 0xF8000000 +#define S_000360_CUR2_LOCK(x) (((x) & 0x1) << 31) +#define G_000360_CUR2_LOCK(x) (((x) >> 31) & 0x1) +#define C_000360_CUR2_LOCK 0x7FFFFFFF +#define R_0003C0_GENMO_WT 0x0003C0 +#define S_0003C0_GENMO_MONO_ADDRESS_B(x) (((x) & 0x1) << 0) +#define G_0003C0_GENMO_MONO_ADDRESS_B(x) (((x) >> 0) & 0x1) +#define C_0003C0_GENMO_MONO_ADDRESS_B 0xFFFFFFFE +#define S_0003C0_VGA_RAM_EN(x) (((x) & 0x1) << 1) +#define G_0003C0_VGA_RAM_EN(x) (((x) >> 1) & 0x1) +#define C_0003C0_VGA_RAM_EN 0xFFFFFFFD +#define S_0003C0_VGA_CKSEL(x) (((x) & 0x3) << 2) +#define G_0003C0_VGA_CKSEL(x) (((x) >> 2) & 0x3) +#define C_0003C0_VGA_CKSEL 0xFFFFFFF3 +#define S_0003C0_ODD_EVEN_MD_PGSEL(x) (((x) & 0x1) << 5) +#define G_0003C0_ODD_EVEN_MD_PGSEL(x) (((x) >> 5) & 0x1) +#define C_0003C0_ODD_EVEN_MD_PGSEL 0xFFFFFFDF +#define S_0003C0_VGA_HSYNC_POL(x) (((x) & 0x1) << 6) +#define G_0003C0_VGA_HSYNC_POL(x) (((x) >> 6) & 0x1) +#define C_0003C0_VGA_HSYNC_POL 0xFFFFFFBF +#define S_0003C0_VGA_VSYNC_POL(x) (((x) & 0x1) << 7) +#define G_0003C0_VGA_VSYNC_POL(x) (((x) >> 7) & 0x1) +#define C_0003C0_VGA_VSYNC_POL 0xFFFFFF7F +#define R_0003F8_CRTC2_GEN_CNTL 0x0003F8 +#define S_0003F8_CRTC2_DBL_SCAN_EN(x) (((x) & 0x1) << 0) +#define G_0003F8_CRTC2_DBL_SCAN_EN(x) (((x) >> 0) & 0x1) +#define C_0003F8_CRTC2_DBL_SCAN_EN 0xFFFFFFFE +#define S_0003F8_CRTC2_INTERLACE_EN(x) (((x) & 0x1) << 1) +#define G_0003F8_CRTC2_INTERLACE_EN(x) (((x) >> 1) & 0x1) +#define C_0003F8_CRTC2_INTERLACE_EN 0xFFFFFFFD +#define S_0003F8_CRTC2_SYNC_TRISTATE(x) (((x) & 0x1) << 4) +#define G_0003F8_CRTC2_SYNC_TRISTATE(x) (((x) >> 4) & 0x1) +#define C_0003F8_CRTC2_SYNC_TRISTATE 0xFFFFFFEF +#define S_0003F8_CRTC2_HSYNC_TRISTATE(x) (((x) & 0x1) << 5) +#define G_0003F8_CRTC2_HSYNC_TRISTATE(x) (((x) >> 5) & 0x1) +#define C_0003F8_CRTC2_HSYNC_TRISTATE 0xFFFFFFDF +#define S_0003F8_CRTC2_VSYNC_TRISTATE(x) (((x) & 0x1) << 6) +#define G_0003F8_CRTC2_VSYNC_TRISTATE(x) (((x) >> 6) & 0x1) +#define C_0003F8_CRTC2_VSYNC_TRISTATE 0xFFFFFFBF +#define S_0003F8_CRT2_ON(x) (((x) & 0x1) << 7) +#define G_0003F8_CRT2_ON(x) (((x) >> 7) & 0x1) +#define C_0003F8_CRT2_ON 0xFFFFFF7F +#define S_0003F8_CRTC2_PIX_WIDTH(x) (((x) & 0xF) << 8) +#define G_0003F8_CRTC2_PIX_WIDTH(x) (((x) >> 8) & 0xF) +#define C_0003F8_CRTC2_PIX_WIDTH 0xFFFFF0FF +#define S_0003F8_CRTC2_ICON_EN(x) (((x) & 0x1) << 15) +#define G_0003F8_CRTC2_ICON_EN(x) (((x) >> 15) & 0x1) +#define C_0003F8_CRTC2_ICON_EN 0xFFFF7FFF +#define S_0003F8_CRTC2_CUR_EN(x) (((x) & 0x1) << 16) +#define G_0003F8_CRTC2_CUR_EN(x) (((x) >> 16) & 0x1) +#define C_0003F8_CRTC2_CUR_EN 0xFFFEFFFF +#define S_0003F8_CRTC2_CUR_MODE(x) (((x) & 0x7) << 20) +#define G_0003F8_CRTC2_CUR_MODE(x) (((x) >> 20) & 0x7) +#define C_0003F8_CRTC2_CUR_MODE 0xFF8FFFFF +#define S_0003F8_CRTC2_DISPLAY_DIS(x) (((x) & 0x1) << 23) +#define G_0003F8_CRTC2_DISPLAY_DIS(x) (((x) >> 23) & 0x1) +#define C_0003F8_CRTC2_DISPLAY_DIS 0xFF7FFFFF +#define S_0003F8_CRTC2_EN(x) (((x) & 0x1) << 25) +#define G_0003F8_CRTC2_EN(x) (((x) >> 25) & 0x1) +#define C_0003F8_CRTC2_EN 0xFDFFFFFF +#define S_0003F8_CRTC2_DISP_REQ_EN_B(x) (((x) & 0x1) << 26) +#define G_0003F8_CRTC2_DISP_REQ_EN_B(x) (((x) >> 26) & 0x1) +#define C_0003F8_CRTC2_DISP_REQ_EN_B 0xFBFFFFFF +#define S_0003F8_CRTC2_C_SYNC_EN(x) (((x) & 0x1) << 27) +#define G_0003F8_CRTC2_C_SYNC_EN(x) (((x) >> 27) & 0x1) +#define C_0003F8_CRTC2_C_SYNC_EN 0xF7FFFFFF +#define S_0003F8_CRTC2_HSYNC_DIS(x) (((x) & 0x1) << 28) +#define G_0003F8_CRTC2_HSYNC_DIS(x) (((x) >> 28) & 0x1) +#define C_0003F8_CRTC2_HSYNC_DIS 0xEFFFFFFF +#define S_0003F8_CRTC2_VSYNC_DIS(x) (((x) & 0x1) << 29) +#define G_0003F8_CRTC2_VSYNC_DIS(x) (((x) >> 29) & 0x1) +#define C_0003F8_CRTC2_VSYNC_DIS 0xDFFFFFFF +#define R_000420_OV0_SCALE_CNTL 0x000420 +#define S_000420_OV0_NO_READ_BEHIND_SCAN(x) (((x) & 0x1) << 1) +#define G_000420_OV0_NO_READ_BEHIND_SCAN(x) (((x) >> 1) & 0x1) +#define C_000420_OV0_NO_READ_BEHIND_SCAN 0xFFFFFFFD +#define S_000420_OV0_HORZ_PICK_NEAREST(x) (((x) & 0x1) << 2) +#define G_000420_OV0_HORZ_PICK_NEAREST(x) (((x) >> 2) & 0x1) +#define C_000420_OV0_HORZ_PICK_NEAREST 0xFFFFFFFB +#define S_000420_OV0_VERT_PICK_NEAREST(x) (((x) & 0x1) << 3) +#define G_000420_OV0_VERT_PICK_NEAREST(x) (((x) >> 3) & 0x1) +#define C_000420_OV0_VERT_PICK_NEAREST 0xFFFFFFF7 +#define S_000420_OV0_SIGNED_UV(x) (((x) & 0x1) << 4) +#define G_000420_OV0_SIGNED_UV(x) (((x) >> 4) & 0x1) +#define C_000420_OV0_SIGNED_UV 0xFFFFFFEF +#define S_000420_OV0_GAMMA_SEL(x) (((x) & 0x7) << 5) +#define G_000420_OV0_GAMMA_SEL(x) (((x) >> 5) & 0x7) +#define C_000420_OV0_GAMMA_SEL 0xFFFFFF1F +#define S_000420_OV0_SURFACE_FORMAT(x) (((x) & 0xF) << 8) +#define G_000420_OV0_SURFACE_FORMAT(x) (((x) >> 8) & 0xF) +#define C_000420_OV0_SURFACE_FORMAT 0xFFFFF0FF +#define S_000420_OV0_ADAPTIVE_DEINT(x) (((x) & 0x1) << 12) +#define G_000420_OV0_ADAPTIVE_DEINT(x) (((x) >> 12) & 0x1) +#define C_000420_OV0_ADAPTIVE_DEINT 0xFFFFEFFF +#define S_000420_OV0_CRTC_SEL(x) (((x) & 0x1) << 14) +#define G_000420_OV0_CRTC_SEL(x) (((x) >> 14) & 0x1) +#define C_000420_OV0_CRTC_SEL 0xFFFFBFFF +#define S_000420_OV0_BURST_PER_PLANE(x) (((x) & 0x7F) << 16) +#define G_000420_OV0_BURST_PER_PLANE(x) (((x) >> 16) & 0x7F) +#define C_000420_OV0_BURST_PER_PLANE 0xFF80FFFF +#define S_000420_OV0_DOUBLE_BUFFER_REGS(x) (((x) & 0x1) << 24) +#define G_000420_OV0_DOUBLE_BUFFER_REGS(x) (((x) >> 24) & 0x1) +#define C_000420_OV0_DOUBLE_BUFFER_REGS 0xFEFFFFFF +#define S_000420_OV0_BANDWIDTH(x) (((x) & 0x1) << 26) +#define G_000420_OV0_BANDWIDTH(x) (((x) >> 26) & 0x1) +#define C_000420_OV0_BANDWIDTH 0xFBFFFFFF +#define S_000420_OV0_LIN_TRANS_BYPASS(x) (((x) & 0x1) << 28) +#define G_000420_OV0_LIN_TRANS_BYPASS(x) (((x) >> 28) & 0x1) +#define C_000420_OV0_LIN_TRANS_BYPASS 0xEFFFFFFF +#define S_000420_OV0_INT_EMU(x) (((x) & 0x1) << 29) +#define G_000420_OV0_INT_EMU(x) (((x) >> 29) & 0x1) +#define C_000420_OV0_INT_EMU 0xDFFFFFFF +#define S_000420_OV0_OVERLAY_EN(x) (((x) & 0x1) << 30) +#define G_000420_OV0_OVERLAY_EN(x) (((x) >> 30) & 0x1) +#define C_000420_OV0_OVERLAY_EN 0xBFFFFFFF +#define S_000420_OV0_SOFT_RESET(x) (((x) & 0x1) << 31) +#define G_000420_OV0_SOFT_RESET(x) (((x) >> 31) & 0x1) +#define C_000420_OV0_SOFT_RESET 0x7FFFFFFF +#define R_00070C_CP_RB_RPTR_ADDR 0x00070C +#define S_00070C_RB_RPTR_SWAP(x) (((x) & 0x3) << 0) +#define G_00070C_RB_RPTR_SWAP(x) (((x) >> 0) & 0x3) +#define C_00070C_RB_RPTR_SWAP 0xFFFFFFFC +#define S_00070C_RB_RPTR_ADDR(x) (((x) & 0x3FFFFFFF) << 2) +#define G_00070C_RB_RPTR_ADDR(x) (((x) >> 2) & 0x3FFFFFFF) +#define C_00070C_RB_RPTR_ADDR 0x00000003 +#define R_000740_CP_CSQ_CNTL 0x000740 +#define S_000740_CSQ_CNT_PRIMARY(x) (((x) & 0xFF) << 0) +#define G_000740_CSQ_CNT_PRIMARY(x) (((x) >> 0) & 0xFF) +#define C_000740_CSQ_CNT_PRIMARY 0xFFFFFF00 +#define S_000740_CSQ_CNT_INDIRECT(x) (((x) & 0xFF) << 8) +#define G_000740_CSQ_CNT_INDIRECT(x) (((x) >> 8) & 0xFF) +#define C_000740_CSQ_CNT_INDIRECT 0xFFFF00FF +#define S_000740_CSQ_MODE(x) (((x) & 0xF) << 28) +#define G_000740_CSQ_MODE(x) (((x) >> 28) & 0xF) +#define C_000740_CSQ_MODE 0x0FFFFFFF +#define R_000770_SCRATCH_UMSK 0x000770 +#define S_000770_SCRATCH_UMSK(x) (((x) & 0x3F) << 0) +#define G_000770_SCRATCH_UMSK(x) (((x) >> 0) & 0x3F) +#define C_000770_SCRATCH_UMSK 0xFFFFFFC0 +#define S_000770_SCRATCH_SWAP(x) (((x) & 0x3) << 16) +#define G_000770_SCRATCH_SWAP(x) (((x) >> 16) & 0x3) +#define C_000770_SCRATCH_SWAP 0xFFFCFFFF +#define R_000774_SCRATCH_ADDR 0x000774 +#define S_000774_SCRATCH_ADDR(x) (((x) & 0x7FFFFFF) << 5) +#define G_000774_SCRATCH_ADDR(x) (((x) >> 5) & 0x7FFFFFF) +#define C_000774_SCRATCH_ADDR 0x0000001F #define R_000E40_RBBM_STATUS 0x000E40 #define S_000E40_CMDFIFO_AVAIL(x) (((x) & 0x7F) << 0) #define G_000E40_CMDFIFO_AVAIL(x) (((x) >> 0) & 0x7F) diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 9c17b786982..92f9cb74a7d 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -1241,11 +1241,12 @@ static int r300_packet3_check(struct radeon_cs_parser *p, int r300_cs_parse(struct radeon_cs_parser *p) { struct radeon_cs_packet pkt; - struct r100_cs_track track; + struct r100_cs_track *track; int r; - r100_cs_track_clear(p->rdev, &track); - p->track = &track; + track = kzalloc(sizeof(*track), GFP_KERNEL); + r100_cs_track_clear(p->rdev, track); + p->track = track; do { r = r100_cs_packet_parse(p, &pkt, p->idx); if (r) { @@ -1275,9 +1276,50 @@ int r300_cs_parse(struct radeon_cs_parser *p) return 0; } -int r300_init(struct radeon_device *rdev) +void r300_set_reg_safe(struct radeon_device *rdev) { rdev->config.r300.reg_safe_bm = r300_reg_safe_bm; rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm); +} + +int r300_init(struct radeon_device *rdev) +{ + r300_set_reg_safe(rdev); return 0; } + +void r300_mc_program(struct radeon_device *rdev) +{ + struct r100_mc_save save; + int r; + + r = r100_debugfs_mc_info_init(rdev); + if (r) { + dev_err(rdev->dev, "Failed to create r100_mc debugfs file.\n"); + } + + /* Stops all mc clients */ + r100_mc_stop(rdev, &save); + /* Shutdown PCI/PCIE GART */ + radeon_gart_disable(rdev); + if (rdev->flags & RADEON_IS_AGP) { + WREG32(R_00014C_MC_AGP_LOCATION, + S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) | + S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16)); + WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base)); + WREG32(R_00015C_AGP_BASE_2, + upper_32_bits(rdev->mc.agp_base) & 0xff); + } else { + WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF); + WREG32(R_000170_AGP_BASE, 0); + WREG32(R_00015C_AGP_BASE_2, 0); + } + /* Wait for mc idle */ + if (r300_mc_wait_for_idle(rdev)) + DRM_INFO("Failed to wait MC idle before programming MC.\n"); + /* Program MC, should be a 32bits limited address space */ + WREG32(R_000148_MC_FB_LOCATION, + S_000148_MC_FB_START(rdev->mc.vram_start >> 16) | + S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16)); + r100_mc_resume(rdev, &save); +} diff --git a/drivers/gpu/drm/radeon/r300d.h b/drivers/gpu/drm/radeon/r300d.h index 63ec076f2cd..d4fa3eb1074 100644 --- a/drivers/gpu/drm/radeon/r300d.h +++ b/drivers/gpu/drm/radeon/r300d.h @@ -73,4 +73,29 @@ #define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) +/* Registers */ +#define R_000148_MC_FB_LOCATION 0x000148 +#define S_000148_MC_FB_START(x) (((x) & 0xFFFF) << 0) +#define G_000148_MC_FB_START(x) (((x) >> 0) & 0xFFFF) +#define C_000148_MC_FB_START 0xFFFF0000 +#define S_000148_MC_FB_TOP(x) (((x) & 0xFFFF) << 16) +#define G_000148_MC_FB_TOP(x) (((x) >> 16) & 0xFFFF) +#define C_000148_MC_FB_TOP 0x0000FFFF +#define R_00014C_MC_AGP_LOCATION 0x00014C +#define S_00014C_MC_AGP_START(x) (((x) & 0xFFFF) << 0) +#define G_00014C_MC_AGP_START(x) (((x) >> 0) & 0xFFFF) +#define C_00014C_MC_AGP_START 0xFFFF0000 +#define S_00014C_MC_AGP_TOP(x) (((x) & 0xFFFF) << 16) +#define G_00014C_MC_AGP_TOP(x) (((x) >> 16) & 0xFFFF) +#define C_00014C_MC_AGP_TOP 0x0000FFFF +#define R_00015C_AGP_BASE_2 0x00015C +#define S_00015C_AGP_BASE_ADDR_2(x) (((x) & 0xF) << 0) +#define G_00015C_AGP_BASE_ADDR_2(x) (((x) >> 0) & 0xF) +#define C_00015C_AGP_BASE_ADDR_2 0xFFFFFFF0 +#define R_000170_AGP_BASE 0x000170 +#define S_000170_AGP_BASE_ADDR(x) (((x) & 0xFFFFFFFF) << 0) +#define G_000170_AGP_BASE_ADDR(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_000170_AGP_BASE_ADDR 0x00000000 + + #endif diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index 551d6996d3f..e57b9ba4aaf 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -29,48 +29,13 @@ #include "drmP.h" #include "radeon_reg.h" #include "radeon.h" +#include "atom.h" #include "r420d.h" -/* r420,r423,rv410 depends on : */ -void r100_pci_gart_disable(struct radeon_device *rdev); -void r100_hdp_reset(struct radeon_device *rdev); -void r100_mc_setup(struct radeon_device *rdev); -int r100_gui_wait_for_idle(struct radeon_device *rdev); -void r100_mc_disable_clients(struct radeon_device *rdev); -void r300_vram_info(struct radeon_device *rdev); -int r300_mc_wait_for_idle(struct radeon_device *rdev); -int rv370_pcie_gart_enable(struct radeon_device *rdev); -void rv370_pcie_gart_disable(struct radeon_device *rdev); - -/* This files gather functions specifics to : - * r420,r423,rv410 - * - * Some of these functions might be used by newer ASICs. - */ -void r420_gpu_init(struct radeon_device *rdev); -int r420_debugfs_pipes_info_init(struct radeon_device *rdev); - - -/* - * MC - */ int r420_mc_init(struct radeon_device *rdev) { int r; - if (r100_debugfs_rbbm_init(rdev)) { - DRM_ERROR("Failed to register debugfs file for RBBM !\n"); - } - if (r420_debugfs_pipes_info_init(rdev)) { - DRM_ERROR("Failed to register debugfs file for pipes !\n"); - } - - r420_gpu_init(rdev); - r100_pci_gart_disable(rdev); - if (rdev->flags & RADEON_IS_PCIE) { - rv370_pcie_gart_disable(rdev); - } - /* Setup GPU memory space */ rdev->mc.vram_location = 0xFFFFFFFFUL; rdev->mc.gtt_location = 0xFFFFFFFFUL; @@ -88,38 +53,9 @@ int r420_mc_init(struct radeon_device *rdev) if (r) { return r; } - - /* Program GPU memory space */ - r100_mc_disable_clients(rdev); - if (r300_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); - } - r100_mc_setup(rdev); return 0; } -void r420_mc_fini(struct radeon_device *rdev) -{ - if (rdev->flags & RADEON_IS_PCIE) { - rv370_pcie_gart_disable(rdev); - radeon_gart_table_vram_free(rdev); - } else { - r100_pci_gart_disable(rdev); - radeon_gart_table_ram_free(rdev); - } - radeon_gart_fini(rdev); -} - - -/* - * Global GPU functions - */ -void r420_errata(struct radeon_device *rdev) -{ - rdev->pll_errata = 0; -} - void r420_pipes_init(struct radeon_device *rdev) { unsigned tmp; @@ -185,25 +121,216 @@ void r420_pipes_init(struct radeon_device *rdev) rdev->num_gb_pipes, rdev->num_z_pipes); } -void r420_gpu_init(struct radeon_device *rdev) +u32 r420_mc_rreg(struct radeon_device *rdev, u32 reg) { - r100_hdp_reset(rdev); + u32 r; + + WREG32(R_0001F8_MC_IND_INDEX, S_0001F8_MC_IND_ADDR(reg)); + r = RREG32(R_0001FC_MC_IND_DATA); + return r; +} + +void r420_mc_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + WREG32(R_0001F8_MC_IND_INDEX, S_0001F8_MC_IND_ADDR(reg) | + S_0001F8_MC_IND_WR_EN(1)); + WREG32(R_0001FC_MC_IND_DATA, v); +} + +static void r420_debugfs(struct radeon_device *rdev) +{ + if (r100_debugfs_rbbm_init(rdev)) { + DRM_ERROR("Failed to register debugfs file for RBBM !\n"); + } + if (r420_debugfs_pipes_info_init(rdev)) { + DRM_ERROR("Failed to register debugfs file for pipes !\n"); + } +} + +static void r420_clock_resume(struct radeon_device *rdev) +{ + u32 sclk_cntl; + sclk_cntl = RREG32_PLL(R_00000D_SCLK_CNTL); + sclk_cntl |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1); + if (rdev->family == CHIP_R420) + sclk_cntl |= S_00000D_FORCE_PX(1) | S_00000D_FORCE_TX(1); + WREG32_PLL(R_00000D_SCLK_CNTL, sclk_cntl); +} + +int r420_resume(struct radeon_device *rdev) +{ + int r; + + /* Resume clock before doing reset */ + r420_clock_resume(rdev); + /* Reset gpu before posting otherwise ATOM will enter infinite loop */ + if (radeon_gpu_reset(rdev)) { + dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n", + RREG32(R_000E40_RBBM_STATUS), + RREG32(R_0007C0_CP_STAT)); + } + /* check if cards are posted or not */ + if (rdev->is_atom_bios) { + atom_asic_init(rdev->mode_info.atom_context); + } else { + radeon_combios_asic_init(rdev->ddev); + } + /* Resume clock after posting */ + r420_clock_resume(rdev); + r300_mc_program(rdev); + /* Initialize GART (initialize after TTM so we can allocate + * memory through TTM but finalize after TTM) */ + r = radeon_gart_enable(rdev); + if (r) { + dev_err(rdev->dev, "failled initializing GART (%d).\n", r); + return r; + } r420_pipes_init(rdev); - if (r300_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); + /* Enable IRQ */ + rdev->irq.sw_int = true; + r100_irq_set(rdev); + /* 1M ring buffer */ + r = r100_cp_init(rdev, 1024 * 1024); + if (r) { + dev_err(rdev->dev, "failled initializing CP (%d).\n", r); + return r; + } + r = r100_wb_init(rdev); + if (r) { + dev_err(rdev->dev, "failled initializing WB (%d).\n", r); } + r = r100_ib_init(rdev); + if (r) { + dev_err(rdev->dev, "failled initializing IB (%d).\n", r); + return r; + } + return 0; } +int r420_suspend(struct radeon_device *rdev) +{ + r100_cp_disable(rdev); + r100_wb_disable(rdev); + r100_irq_disable(rdev); + radeon_gart_disable(rdev); + return 0; +} -/* - * r420,r423,rv410 VRAM info - */ -void r420_vram_info(struct radeon_device *rdev) +void r420_fini(struct radeon_device *rdev) { - r300_vram_info(rdev); + r100_cp_fini(rdev); + r100_wb_fini(rdev); + r100_ib_fini(rdev); + radeon_gem_fini(rdev); + if (rdev->flags & RADEON_IS_PCIE) { + rv370_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + } else { + r100_pci_gart_disable(rdev); + radeon_gart_table_ram_free(rdev); + } + radeon_gart_fini(rdev); + radeon_agp_fini(rdev); + radeon_irq_kms_fini(rdev); + radeon_fence_driver_fini(rdev); + radeon_object_fini(rdev); + if (rdev->is_atom_bios) { + radeon_atombios_fini(rdev); + } else { + radeon_combios_fini(rdev); + } + kfree(rdev->bios); + rdev->bios = NULL; } +int r420_init(struct radeon_device *rdev) +{ + int r; + + rdev->new_init_path = true; + /* Initialize scratch registers */ + radeon_scratch_init(rdev); + /* Initialize surface registers */ + radeon_surface_init(rdev); + /* TODO: disable VGA need to use VGA request */ + /* BIOS*/ + if (!radeon_get_bios(rdev)) { + if (ASIC_IS_AVIVO(rdev)) + return -EINVAL; + } + if (rdev->is_atom_bios) { + r = radeon_atombios_init(rdev); + if (r) { + return r; + } + } else { + r = radeon_combios_init(rdev); + if (r) { + return r; + } + } + /* Reset gpu before posting otherwise ATOM will enter infinite loop */ + if (radeon_gpu_reset(rdev)) { + dev_warn(rdev->dev, + "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n", + RREG32(R_000E40_RBBM_STATUS), + RREG32(R_0007C0_CP_STAT)); + } + /* check if cards are posted or not */ + if (!radeon_card_posted(rdev) && rdev->bios) { + DRM_INFO("GPU not posted. posting now...\n"); + if (rdev->is_atom_bios) { + atom_asic_init(rdev->mode_info.atom_context); + } else { + radeon_combios_asic_init(rdev->ddev); + } + } + /* Initialize clocks */ + radeon_get_clock_info(rdev->ddev); + /* Get vram informations */ + r300_vram_info(rdev); + /* Initialize memory controller (also test AGP) */ + r = r420_mc_init(rdev); + if (r) { + return r; + } + r420_debugfs(rdev); + /* Fence driver */ + r = radeon_fence_driver_init(rdev); + if (r) { + return r; + } + r = radeon_irq_kms_init(rdev); + if (r) { + return r; + } + /* Memory manager */ + r = radeon_object_init(rdev); + if (r) { + return r; + } + r300_set_reg_safe(rdev); + r = r420_resume(rdev); + if (r) { + /* Somethings want wront with the accel init stop accel */ + dev_err(rdev->dev, "Disabling GPU acceleration\n"); + r420_suspend(rdev); + r100_cp_fini(rdev); + r100_wb_fini(rdev); + r100_ib_fini(rdev); + if (rdev->flags & RADEON_IS_PCIE) { + rv370_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + } else { + r100_pci_gart_disable(rdev); + radeon_gart_table_ram_free(rdev); + } + radeon_gart_fini(rdev); + radeon_agp_fini(rdev); + radeon_irq_kms_fini(rdev); + } + return 0; +} /* * Debugfs info @@ -238,19 +365,3 @@ int r420_debugfs_pipes_info_init(struct radeon_device *rdev) return 0; #endif } - -u32 r420_mc_rreg(struct radeon_device *rdev, u32 reg) -{ - u32 r; - - WREG32(R_0001F8_MC_IND_INDEX, S_0001F8_MC_IND_ADDR(reg)); - r = RREG32(R_0001FC_MC_IND_DATA); - return r; -} - -void r420_mc_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - WREG32(R_0001F8_MC_IND_INDEX, S_0001F8_MC_IND_ADDR(reg) | - S_0001F8_MC_IND_WR_EN(1)); - WREG32(R_0001FC_MC_IND_DATA, v); -} diff --git a/drivers/gpu/drm/radeon/r420d.h b/drivers/gpu/drm/radeon/r420d.h index 8b946c1883b..a48a7db1e2a 100644 --- a/drivers/gpu/drm/radeon/r420d.h +++ b/drivers/gpu/drm/radeon/r420d.h @@ -39,5 +39,211 @@ #define S_0001FC_MC_IND_DATA(x) (((x) & 0xFFFFFFFF) << 0) #define G_0001FC_MC_IND_DATA(x) (((x) >> 0) & 0xFFFFFFFF) #define C_0001FC_MC_IND_DATA 0x00000000 +#define R_0007C0_CP_STAT 0x0007C0 +#define S_0007C0_MRU_BUSY(x) (((x) & 0x1) << 0) +#define G_0007C0_MRU_BUSY(x) (((x) >> 0) & 0x1) +#define C_0007C0_MRU_BUSY 0xFFFFFFFE +#define S_0007C0_MWU_BUSY(x) (((x) & 0x1) << 1) +#define G_0007C0_MWU_BUSY(x) (((x) >> 1) & 0x1) +#define C_0007C0_MWU_BUSY 0xFFFFFFFD +#define S_0007C0_RSIU_BUSY(x) (((x) & 0x1) << 2) +#define G_0007C0_RSIU_BUSY(x) (((x) >> 2) & 0x1) +#define C_0007C0_RSIU_BUSY 0xFFFFFFFB +#define S_0007C0_RCIU_BUSY(x) (((x) & 0x1) << 3) +#define G_0007C0_RCIU_BUSY(x) (((x) >> 3) & 0x1) +#define C_0007C0_RCIU_BUSY 0xFFFFFFF7 +#define S_0007C0_CSF_PRIMARY_BUSY(x) (((x) & 0x1) << 9) +#define G_0007C0_CSF_PRIMARY_BUSY(x) (((x) >> 9) & 0x1) +#define C_0007C0_CSF_PRIMARY_BUSY 0xFFFFFDFF +#define S_0007C0_CSF_INDIRECT_BUSY(x) (((x) & 0x1) << 10) +#define G_0007C0_CSF_INDIRECT_BUSY(x) (((x) >> 10) & 0x1) +#define C_0007C0_CSF_INDIRECT_BUSY 0xFFFFFBFF +#define S_0007C0_CSQ_PRIMARY_BUSY(x) (((x) & 0x1) << 11) +#define G_0007C0_CSQ_PRIMARY_BUSY(x) (((x) >> 11) & 0x1) +#define C_0007C0_CSQ_PRIMARY_BUSY 0xFFFFF7FF +#define S_0007C0_CSQ_INDIRECT_BUSY(x) (((x) & 0x1) << 12) +#define G_0007C0_CSQ_INDIRECT_BUSY(x) (((x) >> 12) & 0x1) +#define C_0007C0_CSQ_INDIRECT_BUSY 0xFFFFEFFF +#define S_0007C0_CSI_BUSY(x) (((x) & 0x1) << 13) +#define G_0007C0_CSI_BUSY(x) (((x) >> 13) & 0x1) +#define C_0007C0_CSI_BUSY 0xFFFFDFFF +#define S_0007C0_CSF_INDIRECT2_BUSY(x) (((x) & 0x1) << 14) +#define G_0007C0_CSF_INDIRECT2_BUSY(x) (((x) >> 14) & 0x1) +#define C_0007C0_CSF_INDIRECT2_BUSY 0xFFFFBFFF +#define S_0007C0_CSQ_INDIRECT2_BUSY(x) (((x) & 0x1) << 15) +#define G_0007C0_CSQ_INDIRECT2_BUSY(x) (((x) >> 15) & 0x1) +#define C_0007C0_CSQ_INDIRECT2_BUSY 0xFFFF7FFF +#define S_0007C0_GUIDMA_BUSY(x) (((x) & 0x1) << 28) +#define G_0007C0_GUIDMA_BUSY(x) (((x) >> 28) & 0x1) +#define C_0007C0_GUIDMA_BUSY 0xEFFFFFFF +#define S_0007C0_VIDDMA_BUSY(x) (((x) & 0x1) << 29) +#define G_0007C0_VIDDMA_BUSY(x) (((x) >> 29) & 0x1) +#define C_0007C0_VIDDMA_BUSY 0xDFFFFFFF +#define S_0007C0_CMDSTRM_BUSY(x) (((x) & 0x1) << 30) +#define G_0007C0_CMDSTRM_BUSY(x) (((x) >> 30) & 0x1) +#define C_0007C0_CMDSTRM_BUSY 0xBFFFFFFF +#define S_0007C0_CP_BUSY(x) (((x) & 0x1) << 31) +#define G_0007C0_CP_BUSY(x) (((x) >> 31) & 0x1) +#define C_0007C0_CP_BUSY 0x7FFFFFFF +#define R_000E40_RBBM_STATUS 0x000E40 +#define S_000E40_CMDFIFO_AVAIL(x) (((x) & 0x7F) << 0) +#define G_000E40_CMDFIFO_AVAIL(x) (((x) >> 0) & 0x7F) +#define C_000E40_CMDFIFO_AVAIL 0xFFFFFF80 +#define S_000E40_HIRQ_ON_RBB(x) (((x) & 0x1) << 8) +#define G_000E40_HIRQ_ON_RBB(x) (((x) >> 8) & 0x1) +#define C_000E40_HIRQ_ON_RBB 0xFFFFFEFF +#define S_000E40_CPRQ_ON_RBB(x) (((x) & 0x1) << 9) +#define G_000E40_CPRQ_ON_RBB(x) (((x) >> 9) & 0x1) +#define C_000E40_CPRQ_ON_RBB 0xFFFFFDFF +#define S_000E40_CFRQ_ON_RBB(x) (((x) & 0x1) << 10) +#define G_000E40_CFRQ_ON_RBB(x) (((x) >> 10) & 0x1) +#define C_000E40_CFRQ_ON_RBB 0xFFFFFBFF +#define S_000E40_HIRQ_IN_RTBUF(x) (((x) & 0x1) << 11) +#define G_000E40_HIRQ_IN_RTBUF(x) (((x) >> 11) & 0x1) +#define C_000E40_HIRQ_IN_RTBUF 0xFFFFF7FF +#define S_000E40_CPRQ_IN_RTBUF(x) (((x) & 0x1) << 12) +#define G_000E40_CPRQ_IN_RTBUF(x) (((x) >> 12) & 0x1) +#define C_000E40_CPRQ_IN_RTBUF 0xFFFFEFFF +#define S_000E40_CFRQ_IN_RTBUF(x) (((x) & 0x1) << 13) +#define G_000E40_CFRQ_IN_RTBUF(x) (((x) >> 13) & 0x1) +#define C_000E40_CFRQ_IN_RTBUF 0xFFFFDFFF +#define S_000E40_CF_PIPE_BUSY(x) (((x) & 0x1) << 14) +#define G_000E40_CF_PIPE_BUSY(x) (((x) >> 14) & 0x1) +#define C_000E40_CF_PIPE_BUSY 0xFFFFBFFF +#define S_000E40_ENG_EV_BUSY(x) (((x) & 0x1) << 15) +#define G_000E40_ENG_EV_BUSY(x) (((x) >> 15) & 0x1) +#define C_000E40_ENG_EV_BUSY 0xFFFF7FFF +#define S_000E40_CP_CMDSTRM_BUSY(x) (((x) & 0x1) << 16) +#define G_000E40_CP_CMDSTRM_BUSY(x) (((x) >> 16) & 0x1) +#define C_000E40_CP_CMDSTRM_BUSY 0xFFFEFFFF +#define S_000E40_E2_BUSY(x) (((x) & 0x1) << 17) +#define G_000E40_E2_BUSY(x) (((x) >> 17) & 0x1) +#define C_000E40_E2_BUSY 0xFFFDFFFF +#define S_000E40_RB2D_BUSY(x) (((x) & 0x1) << 18) +#define G_000E40_RB2D_BUSY(x) (((x) >> 18) & 0x1) +#define C_000E40_RB2D_BUSY 0xFFFBFFFF +#define S_000E40_RB3D_BUSY(x) (((x) & 0x1) << 19) +#define G_000E40_RB3D_BUSY(x) (((x) >> 19) & 0x1) +#define C_000E40_RB3D_BUSY 0xFFF7FFFF +#define S_000E40_VAP_BUSY(x) (((x) & 0x1) << 20) +#define G_000E40_VAP_BUSY(x) (((x) >> 20) & 0x1) +#define C_000E40_VAP_BUSY 0xFFEFFFFF +#define S_000E40_RE_BUSY(x) (((x) & 0x1) << 21) +#define G_000E40_RE_BUSY(x) (((x) >> 21) & 0x1) +#define C_000E40_RE_BUSY 0xFFDFFFFF +#define S_000E40_TAM_BUSY(x) (((x) & 0x1) << 22) +#define G_000E40_TAM_BUSY(x) (((x) >> 22) & 0x1) +#define C_000E40_TAM_BUSY 0xFFBFFFFF +#define S_000E40_TDM_BUSY(x) (((x) & 0x1) << 23) +#define G_000E40_TDM_BUSY(x) (((x) >> 23) & 0x1) +#define C_000E40_TDM_BUSY 0xFF7FFFFF +#define S_000E40_PB_BUSY(x) (((x) & 0x1) << 24) +#define G_000E40_PB_BUSY(x) (((x) >> 24) & 0x1) +#define C_000E40_PB_BUSY 0xFEFFFFFF +#define S_000E40_TIM_BUSY(x) (((x) & 0x1) << 25) +#define G_000E40_TIM_BUSY(x) (((x) >> 25) & 0x1) +#define C_000E40_TIM_BUSY 0xFDFFFFFF +#define S_000E40_GA_BUSY(x) (((x) & 0x1) << 26) +#define G_000E40_GA_BUSY(x) (((x) >> 26) & 0x1) +#define C_000E40_GA_BUSY 0xFBFFFFFF +#define S_000E40_CBA2D_BUSY(x) (((x) & 0x1) << 27) +#define G_000E40_CBA2D_BUSY(x) (((x) >> 27) & 0x1) +#define C_000E40_CBA2D_BUSY 0xF7FFFFFF +#define S_000E40_GUI_ACTIVE(x) (((x) & 0x1) << 31) +#define G_000E40_GUI_ACTIVE(x) (((x) >> 31) & 0x1) +#define C_000E40_GUI_ACTIVE 0x7FFFFFFF + +/* CLK registers */ +#define R_00000D_SCLK_CNTL 0x00000D +#define S_00000D_SCLK_SRC_SEL(x) (((x) & 0x7) << 0) +#define G_00000D_SCLK_SRC_SEL(x) (((x) >> 0) & 0x7) +#define C_00000D_SCLK_SRC_SEL 0xFFFFFFF8 +#define S_00000D_CP_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 3) +#define G_00000D_CP_MAX_DYN_STOP_LAT(x) (((x) >> 3) & 0x1) +#define C_00000D_CP_MAX_DYN_STOP_LAT 0xFFFFFFF7 +#define S_00000D_HDP_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 4) +#define G_00000D_HDP_MAX_DYN_STOP_LAT(x) (((x) >> 4) & 0x1) +#define C_00000D_HDP_MAX_DYN_STOP_LAT 0xFFFFFFEF +#define S_00000D_TV_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 5) +#define G_00000D_TV_MAX_DYN_STOP_LAT(x) (((x) >> 5) & 0x1) +#define C_00000D_TV_MAX_DYN_STOP_LAT 0xFFFFFFDF +#define S_00000D_E2_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 6) +#define G_00000D_E2_MAX_DYN_STOP_LAT(x) (((x) >> 6) & 0x1) +#define C_00000D_E2_MAX_DYN_STOP_LAT 0xFFFFFFBF +#define S_00000D_SE_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 7) +#define G_00000D_SE_MAX_DYN_STOP_LAT(x) (((x) >> 7) & 0x1) +#define C_00000D_SE_MAX_DYN_STOP_LAT 0xFFFFFF7F +#define S_00000D_IDCT_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 8) +#define G_00000D_IDCT_MAX_DYN_STOP_LAT(x) (((x) >> 8) & 0x1) +#define C_00000D_IDCT_MAX_DYN_STOP_LAT 0xFFFFFEFF +#define S_00000D_VIP_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 9) +#define G_00000D_VIP_MAX_DYN_STOP_LAT(x) (((x) >> 9) & 0x1) +#define C_00000D_VIP_MAX_DYN_STOP_LAT 0xFFFFFDFF +#define S_00000D_RE_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 10) +#define G_00000D_RE_MAX_DYN_STOP_LAT(x) (((x) >> 10) & 0x1) +#define C_00000D_RE_MAX_DYN_STOP_LAT 0xFFFFFBFF +#define S_00000D_PB_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 11) +#define G_00000D_PB_MAX_DYN_STOP_LAT(x) (((x) >> 11) & 0x1) +#define C_00000D_PB_MAX_DYN_STOP_LAT 0xFFFFF7FF +#define S_00000D_TAM_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 12) +#define G_00000D_TAM_MAX_DYN_STOP_LAT(x) (((x) >> 12) & 0x1) +#define C_00000D_TAM_MAX_DYN_STOP_LAT 0xFFFFEFFF +#define S_00000D_TDM_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 13) +#define G_00000D_TDM_MAX_DYN_STOP_LAT(x) (((x) >> 13) & 0x1) +#define C_00000D_TDM_MAX_DYN_STOP_LAT 0xFFFFDFFF +#define S_00000D_RB_MAX_DYN_STOP_LAT(x) (((x) & 0x1) << 14) +#define G_00000D_RB_MAX_DYN_STOP_LAT(x) (((x) >> 14) & 0x1) +#define C_00000D_RB_MAX_DYN_STOP_LAT 0xFFFFBFFF +#define S_00000D_FORCE_DISP2(x) (((x) & 0x1) << 15) +#define G_00000D_FORCE_DISP2(x) (((x) >> 15) & 0x1) +#define C_00000D_FORCE_DISP2 0xFFFF7FFF +#define S_00000D_FORCE_CP(x) (((x) & 0x1) << 16) +#define G_00000D_FORCE_CP(x) (((x) >> 16) & 0x1) +#define C_00000D_FORCE_CP 0xFFFEFFFF +#define S_00000D_FORCE_HDP(x) (((x) & 0x1) << 17) +#define G_00000D_FORCE_HDP(x) (((x) >> 17) & 0x1) +#define C_00000D_FORCE_HDP 0xFFFDFFFF +#define S_00000D_FORCE_DISP1(x) (((x) & 0x1) << 18) +#define G_00000D_FORCE_DISP1(x) (((x) >> 18) & 0x1) +#define C_00000D_FORCE_DISP1 0xFFFBFFFF +#define S_00000D_FORCE_TOP(x) (((x) & 0x1) << 19) +#define G_00000D_FORCE_TOP(x) (((x) >> 19) & 0x1) +#define C_00000D_FORCE_TOP 0xFFF7FFFF +#define S_00000D_FORCE_E2(x) (((x) & 0x1) << 20) +#define G_00000D_FORCE_E2(x) (((x) >> 20) & 0x1) +#define C_00000D_FORCE_E2 0xFFEFFFFF +#define S_00000D_FORCE_SE(x) (((x) & 0x1) << 21) +#define G_00000D_FORCE_SE(x) (((x) >> 21) & 0x1) +#define C_00000D_FORCE_SE 0xFFDFFFFF +#define S_00000D_FORCE_IDCT(x) (((x) & 0x1) << 22) +#define G_00000D_FORCE_IDCT(x) (((x) >> 22) & 0x1) +#define C_00000D_FORCE_IDCT 0xFFBFFFFF +#define S_00000D_FORCE_VIP(x) (((x) & 0x1) << 23) +#define G_00000D_FORCE_VIP(x) (((x) >> 23) & 0x1) +#define C_00000D_FORCE_VIP 0xFF7FFFFF +#define S_00000D_FORCE_RE(x) (((x) & 0x1) << 24) +#define G_00000D_FORCE_RE(x) (((x) >> 24) & 0x1) +#define C_00000D_FORCE_RE 0xFEFFFFFF +#define S_00000D_FORCE_PB(x) (((x) & 0x1) << 25) +#define G_00000D_FORCE_PB(x) (((x) >> 25) & 0x1) +#define C_00000D_FORCE_PB 0xFDFFFFFF +#define S_00000D_FORCE_PX(x) (((x) & 0x1) << 26) +#define G_00000D_FORCE_PX(x) (((x) >> 26) & 0x1) +#define C_00000D_FORCE_PX 0xFBFFFFFF +#define S_00000D_FORCE_TX(x) (((x) & 0x1) << 27) +#define G_00000D_FORCE_TX(x) (((x) >> 27) & 0x1) +#define C_00000D_FORCE_TX 0xF7FFFFFF +#define S_00000D_FORCE_RB(x) (((x) & 0x1) << 28) +#define G_00000D_FORCE_RB(x) (((x) >> 28) & 0x1) +#define C_00000D_FORCE_RB 0xEFFFFFFF +#define S_00000D_FORCE_TV_SCLK(x) (((x) & 0x1) << 29) +#define G_00000D_FORCE_TV_SCLK(x) (((x) >> 29) & 0x1) +#define C_00000D_FORCE_TV_SCLK 0xDFFFFFFF +#define S_00000D_FORCE_SUBPIC(x) (((x) & 0x1) << 30) +#define G_00000D_FORCE_SUBPIC(x) (((x) >> 30) & 0x1) +#define C_00000D_FORCE_SUBPIC 0xBFFFFFFF +#define S_00000D_FORCE_OV0(x) (((x) & 0x1) << 31) +#define G_00000D_FORCE_OV0(x) (((x) >> 31) & 0x1) +#define C_00000D_FORCE_OV0 0x7FFFFFFF #endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 23ede0e4783..e314756dacc 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -690,6 +690,7 @@ typedef uint32_t (*radeon_rreg_t)(struct radeon_device*, uint32_t); typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t); struct radeon_device { + struct device *dev; struct drm_device *ddev; struct pci_dev *pdev; /* ASIC */ @@ -936,16 +937,45 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) /* Common functions */ int radeon_modeset_init(struct radeon_device *rdev); void radeon_modeset_fini(struct radeon_device *rdev); +extern bool radeon_card_posted(struct radeon_device *rdev); /* r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */ -void r100_cp_disable(struct radeon_device *rdev); +struct r100_mc_save { + u32 GENMO_WT; + u32 CRTC_EXT_CNTL; + u32 CRTC_GEN_CNTL; + u32 CRTC2_GEN_CNTL; + u32 CUR_OFFSET; + u32 CUR2_OFFSET; +}; +extern void r100_cp_disable(struct radeon_device *rdev); +extern int r100_cp_init(struct radeon_device *rdev, unsigned ring_size); +extern void r100_cp_fini(struct radeon_device *rdev); void r100_pci_gart_tlb_flush(struct radeon_device *rdev); int r100_pci_gart_enable(struct radeon_device *rdev); void r100_pci_gart_disable(struct radeon_device *rdev); int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr); +extern int r100_debugfs_mc_info_init(struct radeon_device *rdev); +extern int r100_gui_wait_for_idle(struct radeon_device *rdev); +extern void r100_ib_fini(struct radeon_device *rdev); +extern int r100_ib_init(struct radeon_device *rdev); +extern void r100_irq_disable(struct radeon_device *rdev); +extern int r100_irq_set(struct radeon_device *rdev); +extern void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save); +extern void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save); +extern void r100_wb_disable(struct radeon_device *rdev); +extern void r100_wb_fini(struct radeon_device *rdev); +extern int r100_wb_init(struct radeon_device *rdev); + +/* r300,r350,rv350,rv370,rv380 */ +extern void r300_set_reg_safe(struct radeon_device *rdev); +extern void r300_mc_program(struct radeon_device *rdev); +extern void r300_vram_info(struct radeon_device *rdev); +extern void rv370_pcie_gart_disable(struct radeon_device *rdev); /* r420,r423,rv410 */ u32 r420_mc_rreg(struct radeon_device *rdev, u32 reg); void r420_mc_wreg(struct radeon_device *rdev, u32 reg, u32 v); +extern int r420_debugfs_pipes_info_init(struct radeon_device *rdev); #endif diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index f9c8f9a041d..8f27be31e09 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -189,31 +189,34 @@ static struct radeon_asic r300_asic = { /* * r420,r423,rv410 */ -void r420_errata(struct radeon_device *rdev); -void r420_vram_info(struct radeon_device *rdev); -int r420_mc_init(struct radeon_device *rdev); -void r420_mc_fini(struct radeon_device *rdev); +extern int r420_init(struct radeon_device *rdev); +extern void r420_fini(struct radeon_device *rdev); +extern int r420_suspend(struct radeon_device *rdev); +extern int r420_resume(struct radeon_device *rdev); static struct radeon_asic r420_asic = { - .init = &r300_init, - .errata = &r420_errata, - .vram_info = &r420_vram_info, + .init = &r420_init, + .fini = &r420_fini, + .suspend = &r420_suspend, + .resume = &r420_resume, + .errata = NULL, + .vram_info = NULL, .gpu_reset = &r300_gpu_reset, - .mc_init = &r420_mc_init, - .mc_fini = &r420_mc_fini, - .wb_init = &r100_wb_init, - .wb_fini = &r100_wb_fini, + .mc_init = NULL, + .mc_fini = NULL, + .wb_init = NULL, + .wb_fini = NULL, .gart_enable = &r300_gart_enable, .gart_disable = &rv370_pcie_gart_disable, .gart_tlb_flush = &rv370_pcie_gart_tlb_flush, .gart_set_page = &rv370_pcie_gart_set_page, - .cp_init = &r100_cp_init, - .cp_fini = &r100_cp_fini, - .cp_disable = &r100_cp_disable, + .cp_init = NULL, + .cp_fini = NULL, + .cp_disable = NULL, .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, .ring_ib_execute = &r100_ring_ib_execute, - .ib_test = &r100_ib_test, + .ib_test = NULL, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 72f6262ea73..633acf71400 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -156,6 +156,10 @@ int radeon_mc_setup(struct radeon_device *rdev) tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1); rdev->mc.gtt_location = tmp; } + rdev->mc.vram_start = rdev->mc.vram_location; + rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1; + rdev->mc.gtt_start = rdev->mc.gtt_location; + rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; DRM_INFO("radeon: VRAM %uM\n", (unsigned)(rdev->mc.mc_vram_size >> 20)); DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n", (unsigned)rdev->mc.vram_location, @@ -171,7 +175,7 @@ int radeon_mc_setup(struct radeon_device *rdev) /* * GPU helpers function. */ -static bool radeon_card_posted(struct radeon_device *rdev) +bool radeon_card_posted(struct radeon_device *rdev) { uint32_t reg; @@ -483,6 +487,7 @@ int radeon_device_init(struct radeon_device *rdev, DRM_INFO("radeon: Initializing kernel modesetting.\n"); rdev->shutdown = false; + rdev->dev = &pdev->dev; rdev->ddev = ddev; rdev->pdev = pdev; rdev->flags = flags; @@ -497,6 +502,7 @@ int radeon_device_init(struct radeon_device *rdev, mutex_init(&rdev->ib_pool.mutex); mutex_init(&rdev->cp.mutex); rwlock_init(&rdev->fence_drv.lock); + INIT_LIST_HEAD(&rdev->gem.objects); if (radeon_agpmode == -1) { rdev->flags &= ~RADEON_IS_AGP; @@ -736,15 +742,14 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) if (!rdev->new_init_path) { radeon_cp_disable(rdev); radeon_gart_disable(rdev); + rdev->irq.sw_int = false; + radeon_irq_set(rdev); } else { radeon_suspend(rdev); } /* evict remaining vram memory */ radeon_object_evict_vram(rdev); - rdev->irq.sw_int = false; - radeon_irq_set(rdev); - pci_save_state(dev->pdev); if (state.event == PM_EVENT_SUSPEND) { /* Shut down the device */ @@ -771,10 +776,10 @@ int radeon_resume_kms(struct drm_device *dev) } pci_set_master(dev->pdev); /* Reset gpu before posting otherwise ATOM will enter infinite loop */ - if (radeon_gpu_reset(rdev)) { - /* FIXME: what do we want to do here ? */ - } if (!rdev->new_init_path) { + if (radeon_gpu_reset(rdev)) { + /* FIXME: what do we want to do here ? */ + } /* post card */ if (rdev->is_atom_bios) { atom_asic_init(rdev->mode_info.atom_context); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index aa9837a6aa7..168a555d6fb 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -151,6 +151,8 @@ int radeon_ib_pool_init(struct radeon_device *rdev) int i; int r = 0; + if (rdev->ib_pool.robj) + return 0; /* Allocate 1M object buffer */ INIT_LIST_HEAD(&rdev->ib_pool.scheduled_ibs); r = radeon_object_create(rdev, NULL, RADEON_IB_POOL_SIZE*64*1024, -- cgit v1.2.3-70-g09d2 From 21f9a437222e92adb3abc68584a5f04801b92739 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 11 Sep 2009 15:55:33 +0200 Subject: drm/radeon/kms: cleanup - remove radeon_share.h radeon_share.h was begining to give problem with include order in respect of radeon.h. It's easier and also i think cleaner to move what was in radeon_share.h into radeon.h. At the same time use the extern keyword for function shared accross the module. Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r300.c | 1 - drivers/gpu/drm/radeon/r520.c | 1 - drivers/gpu/drm/radeon/r600.c | 1 - drivers/gpu/drm/radeon/r600_cs.c | 1 - drivers/gpu/drm/radeon/radeon.h | 93 ++++++++++++++++++++++++--- drivers/gpu/drm/radeon/radeon_share.h | 116 ---------------------------------- drivers/gpu/drm/radeon/rs400.c | 1 - drivers/gpu/drm/radeon/rv515.c | 1 - drivers/gpu/drm/radeon/rv770.c | 1 - 9 files changed, 84 insertions(+), 132 deletions(-) delete mode 100644 drivers/gpu/drm/radeon/radeon_share.h (limited to 'drivers/gpu/drm/radeon/r300.c') diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 92f9cb74a7d..ced3322bd5f 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -31,7 +31,6 @@ #include "radeon_reg.h" #include "radeon.h" #include "radeon_drm.h" -#include "radeon_share.h" #include "r100_track.h" #include "r300d.h" diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index ebd6b0f7bdf..0e1686d1c87 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -28,7 +28,6 @@ #include "drmP.h" #include "radeon_reg.h" #include "radeon.h" -#include "radeon_share.h" /* r520,rv530,rv560,rv570,r580 depends on : */ void r100_hdp_reset(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index d8fcef44a69..1bc25678986 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -32,7 +32,6 @@ #include "radeon_drm.h" #include "radeon.h" #include "radeon_mode.h" -#include "radeon_share.h" #include "r600d.h" #include "avivod.h" #include "atom.h" diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 39bf6349351..33b89cd8743 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -27,7 +27,6 @@ */ #include "drmP.h" #include "radeon.h" -#include "radeon_share.h" #include "r600d.h" #include "avivod.h" diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e314756dacc..8cec5bf2922 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -50,7 +50,6 @@ #include #include "radeon_mode.h" -#include "radeon_share.h" #include "radeon_reg.h" /* @@ -640,11 +639,55 @@ struct radeon_asic { void (*bandwidth_update)(struct radeon_device *rdev); }; +/* + * Asic structures + */ struct r100_asic { const unsigned *reg_safe_bm; unsigned reg_safe_bm_size; }; +struct r300_asic { + const unsigned *reg_safe_bm; + unsigned reg_safe_bm_size; +}; + +struct r600_asic { + unsigned max_pipes; + unsigned max_tile_pipes; + unsigned max_simds; + unsigned max_backends; + unsigned max_gprs; + unsigned max_threads; + unsigned max_stack_entries; + unsigned max_hw_contexts; + unsigned max_gs_threads; + unsigned sx_max_export_size; + unsigned sx_max_export_pos_size; + unsigned sx_max_export_smx_size; + unsigned sq_num_cf_insts; +}; + +struct rv770_asic { + unsigned max_pipes; + unsigned max_tile_pipes; + unsigned max_simds; + unsigned max_backends; + unsigned max_gprs; + unsigned max_threads; + unsigned max_stack_entries; + unsigned max_hw_contexts; + unsigned max_gs_threads; + unsigned sx_max_export_size; + unsigned sx_max_export_pos_size; + unsigned sx_max_export_smx_size; + unsigned sq_num_cf_insts; + unsigned sx_num_of_sets; + unsigned sc_prim_fifo_size; + unsigned sc_hiz_tile_fifo_size; + unsigned sc_earlyz_tile_fifo_fize; +}; + union radeon_asic_config { struct r300_asic r300; struct r100_asic r100; @@ -935,9 +978,14 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) #define radeon_bandwidth_update(rdev) (rdev)->asic->bandwidth_update((rdev)) /* Common functions */ -int radeon_modeset_init(struct radeon_device *rdev); -void radeon_modeset_fini(struct radeon_device *rdev); +extern int radeon_modeset_init(struct radeon_device *rdev); +extern void radeon_modeset_fini(struct radeon_device *rdev); extern bool radeon_card_posted(struct radeon_device *rdev); +extern int radeon_clocks_init(struct radeon_device *rdev); +extern void radeon_clocks_fini(struct radeon_device *rdev); +extern void radeon_scratch_init(struct radeon_device *rdev); +extern void radeon_surface_init(struct radeon_device *rdev); +extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data); /* r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */ struct r100_mc_save { @@ -951,10 +999,10 @@ struct r100_mc_save { extern void r100_cp_disable(struct radeon_device *rdev); extern int r100_cp_init(struct radeon_device *rdev, unsigned ring_size); extern void r100_cp_fini(struct radeon_device *rdev); -void r100_pci_gart_tlb_flush(struct radeon_device *rdev); -int r100_pci_gart_enable(struct radeon_device *rdev); -void r100_pci_gart_disable(struct radeon_device *rdev); -int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr); +extern void r100_pci_gart_tlb_flush(struct radeon_device *rdev); +extern int r100_pci_gart_enable(struct radeon_device *rdev); +extern void r100_pci_gart_disable(struct radeon_device *rdev); +extern int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr); extern int r100_debugfs_mc_info_init(struct radeon_device *rdev); extern int r100_gui_wait_for_idle(struct radeon_device *rdev); extern void r100_ib_fini(struct radeon_device *rdev); @@ -963,6 +1011,7 @@ extern void r100_irq_disable(struct radeon_device *rdev); extern int r100_irq_set(struct radeon_device *rdev); extern void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save); extern void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save); +extern void r100_vram_init_sizes(struct radeon_device *rdev); extern void r100_wb_disable(struct radeon_device *rdev); extern void r100_wb_fini(struct radeon_device *rdev); extern int r100_wb_init(struct radeon_device *rdev); @@ -974,8 +1023,34 @@ extern void r300_vram_info(struct radeon_device *rdev); extern void rv370_pcie_gart_disable(struct radeon_device *rdev); /* r420,r423,rv410 */ -u32 r420_mc_rreg(struct radeon_device *rdev, u32 reg); -void r420_mc_wreg(struct radeon_device *rdev, u32 reg, u32 v); +extern u32 r420_mc_rreg(struct radeon_device *rdev, u32 reg); +extern void r420_mc_wreg(struct radeon_device *rdev, u32 reg, u32 v); extern int r420_debugfs_pipes_info_init(struct radeon_device *rdev); +/* rv515 */ +extern void rv515_bandwidth_avivo_update(struct radeon_device *rdev); + +/* rs690, rs740 */ +extern void rs690_line_buffer_adjust(struct radeon_device *rdev, + struct drm_display_mode *mode1, + struct drm_display_mode *mode2); + +/* r600, rv610, rv630, rv620, rv635, rv670, rs780, rs880 */ +extern bool r600_card_posted(struct radeon_device *rdev); +extern void r600_cp_stop(struct radeon_device *rdev); +extern void r600_ring_init(struct radeon_device *rdev, unsigned ring_size); +extern int r600_cp_resume(struct radeon_device *rdev); +extern int r600_count_pipe_bits(uint32_t val); +extern int r600_gart_clear_page(struct radeon_device *rdev, int i); +extern int r600_mc_wait_for_idle(struct radeon_device *rdev); +extern void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); +extern int r600_ib_test(struct radeon_device *rdev); +extern int r600_ring_test(struct radeon_device *rdev); +extern int r600_wb_init(struct radeon_device *rdev); +extern void r600_wb_fini(struct radeon_device *rdev); +extern void r600_scratch_init(struct radeon_device *rdev); +extern int r600_blit_init(struct radeon_device *rdev); +extern void r600_blit_fini(struct radeon_device *rdev); +extern int r600_cp_init_microcode(struct radeon_device *rdev); + #endif diff --git a/drivers/gpu/drm/radeon/radeon_share.h b/drivers/gpu/drm/radeon/radeon_share.h deleted file mode 100644 index 5f9e358ab50..00000000000 --- a/drivers/gpu/drm/radeon/radeon_share.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright 2008 Advanced Micro Devices, Inc. - * Copyright 2008 Red Hat Inc. - * Copyright 2009 Jerome Glisse. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Dave Airlie - * Alex Deucher - * Jerome Glisse - */ -#ifndef __RADEON_SHARE_H__ -#define __RADEON_SHARE_H__ - -/* Common */ -struct radeon_device; -struct radeon_cs_parser; -int radeon_clocks_init(struct radeon_device *rdev); -void radeon_clocks_fini(struct radeon_device *rdev); -void radeon_scratch_init(struct radeon_device *rdev); -void radeon_surface_init(struct radeon_device *rdev); -int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data); - - -/* R100, RV100, RS100, RV200, RS200, R200, RV250, RS300, RV280 */ -void r100_vram_init_sizes(struct radeon_device *rdev); - - -/* R300, R350, RV350, RV380 */ -struct r300_asic { - const unsigned *reg_safe_bm; - unsigned reg_safe_bm_size; -}; - - -/* RS690, RS740 */ -void rs690_line_buffer_adjust(struct radeon_device *rdev, - struct drm_display_mode *mode1, - struct drm_display_mode *mode2); - - -/* RV515 */ -void rv515_bandwidth_avivo_update(struct radeon_device *rdev); - - -/* R600, RV610, RV630, RV620, RV635, RV670, RS780, RS880 */ -bool r600_card_posted(struct radeon_device *rdev); -void r600_cp_stop(struct radeon_device *rdev); -void r600_ring_init(struct radeon_device *rdev, unsigned ring_size); -int r600_cp_resume(struct radeon_device *rdev); -int r600_count_pipe_bits(uint32_t val); -int r600_gart_clear_page(struct radeon_device *rdev, int i); -int r600_mc_wait_for_idle(struct radeon_device *rdev); -void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); -int r600_ib_test(struct radeon_device *rdev); -int r600_ring_test(struct radeon_device *rdev); -int r600_wb_init(struct radeon_device *rdev); -void r600_wb_fini(struct radeon_device *rdev); -void r600_scratch_init(struct radeon_device *rdev); -int r600_blit_init(struct radeon_device *rdev); -void r600_blit_fini(struct radeon_device *rdev); -int r600_cp_init_microcode(struct radeon_device *rdev); -struct r600_asic { - unsigned max_pipes; - unsigned max_tile_pipes; - unsigned max_simds; - unsigned max_backends; - unsigned max_gprs; - unsigned max_threads; - unsigned max_stack_entries; - unsigned max_hw_contexts; - unsigned max_gs_threads; - unsigned sx_max_export_size; - unsigned sx_max_export_pos_size; - unsigned sx_max_export_smx_size; - unsigned sq_num_cf_insts; -}; - -/* RV770, RV7300, RV710 */ -struct rv770_asic { - unsigned max_pipes; - unsigned max_tile_pipes; - unsigned max_simds; - unsigned max_backends; - unsigned max_gprs; - unsigned max_threads; - unsigned max_stack_entries; - unsigned max_hw_contexts; - unsigned max_gs_threads; - unsigned sx_max_export_size; - unsigned sx_max_export_pos_size; - unsigned sx_max_export_smx_size; - unsigned sq_num_cf_insts; - unsigned sx_num_of_sets; - unsigned sc_prim_fifo_size; - unsigned sc_hiz_tile_fifo_size; - unsigned sc_earlyz_tile_fifo_fize; -}; - -#endif diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index 8c3ea7e3606..e1e4ce42782 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -29,7 +29,6 @@ #include #include "radeon_reg.h" #include "radeon.h" -#include "radeon_share.h" /* rs400,rs480 depends on : */ void r100_hdp_reset(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 99e397f1638..03d490269ed 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -29,7 +29,6 @@ #include "drmP.h" #include "rv515d.h" #include "radeon.h" -#include "radeon_share.h" #include "rv515_reg_safe.h" /* rv515 depends on : */ diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 57765f6d5b2..5ba5204091e 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -29,7 +29,6 @@ #include #include "drmP.h" #include "radeon.h" -#include "radeon_share.h" #include "rv770d.h" #include "avivod.h" #include "atom.h" -- cgit v1.2.3-70-g09d2 From 4aac047323e3082d0866b8ad3784236632105af4 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Mon, 14 Sep 2009 18:29:49 +0200 Subject: drm/radeon/kms: clear confusion in GART init/deinit path GART static one time initialization was mixed up with GART enabling/disabling which could happen several time for instance during suspend/resume cycles. This patch splits all GART handling into 4 differents function. gart_init is for one time initialization, gart_deinit is called upon module unload to free resources allocated by gart_init, gart_enable enable the GART and is intented to be call after first initialization and at each resume cycle or reset cycle. Finaly gart_disable stop the GART and is intended to be call at suspend time or when unloading the module. Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r100.c | 40 ++++++------ drivers/gpu/drm/radeon/r300.c | 108 ++++++++++++++------------------- drivers/gpu/drm/radeon/r420.c | 57 ++++++++++------- drivers/gpu/drm/radeon/r520.c | 5 -- drivers/gpu/drm/radeon/r600.c | 53 +++++++++++----- drivers/gpu/drm/radeon/radeon.h | 11 ++++ drivers/gpu/drm/radeon/radeon_asic.h | 38 +++++++++--- drivers/gpu/drm/radeon/radeon_device.c | 36 +++++++++-- drivers/gpu/drm/radeon/radeon_gart.c | 9 ++- drivers/gpu/drm/radeon/rs400.c | 53 ++++++++++------ drivers/gpu/drm/radeon/rs600.c | 41 +++++++++---- drivers/gpu/drm/radeon/rs690.c | 3 - drivers/gpu/drm/radeon/rv515.c | 5 -- drivers/gpu/drm/radeon/rv770.c | 37 ++++++----- 14 files changed, 306 insertions(+), 190 deletions(-) (limited to 'drivers/gpu/drm/radeon/r300.c') diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 47263d3ede9..fa0fdc1e345 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -84,23 +84,28 @@ void r100_pci_gart_tlb_flush(struct radeon_device *rdev) * could end up in wrong address. */ } -int r100_pci_gart_enable(struct radeon_device *rdev) +int r100_pci_gart_init(struct radeon_device *rdev) { - uint32_t tmp; int r; + if (rdev->gart.table.ram.ptr) { + WARN(1, "R100 PCI GART already initialized.\n"); + return 0; + } /* Initialize common gart structure */ r = radeon_gart_init(rdev); - if (r) { + if (r) return r; - } - if (rdev->gart.table.ram.ptr == NULL) { - rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; - r = radeon_gart_table_ram_alloc(rdev); - if (r) { - return r; - } - } + rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; + rdev->asic->gart_tlb_flush = &r100_pci_gart_tlb_flush; + rdev->asic->gart_set_page = &r100_pci_gart_set_page; + return radeon_gart_table_ram_alloc(rdev); +} + +int r100_pci_gart_enable(struct radeon_device *rdev) +{ + uint32_t tmp; + /* discard memory request outside of configured range */ tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; WREG32(RADEON_AIC_CNTL, tmp); @@ -140,13 +145,11 @@ int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) return 0; } -int r100_gart_enable(struct radeon_device *rdev) +void r100_pci_gart_fini(struct radeon_device *rdev) { - if (rdev->flags & RADEON_IS_AGP) { - r100_pci_gart_disable(rdev); - return 0; - } - return r100_pci_gart_enable(rdev); + r100_pci_gart_disable(rdev); + radeon_gart_table_ram_free(rdev); + radeon_gart_fini(rdev); } @@ -273,9 +276,6 @@ int r100_mc_init(struct radeon_device *rdev) void r100_mc_fini(struct radeon_device *rdev) { - r100_pci_gart_disable(rdev); - radeon_gart_table_ram_free(rdev); - radeon_gart_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index ced3322bd5f..bb151ecdf8f 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -42,7 +42,6 @@ int r100_cp_reset(struct radeon_device *rdev); int r100_rb2d_reset(struct radeon_device *rdev); int r100_cp_init(struct radeon_device *rdev, unsigned ring_size); int r100_pci_gart_enable(struct radeon_device *rdev); -void r100_pci_gart_disable(struct radeon_device *rdev); void r100_mc_setup(struct radeon_device *rdev); void r100_mc_disable_clients(struct radeon_device *rdev); int r100_gui_wait_for_idle(struct radeon_device *rdev); @@ -86,26 +85,57 @@ void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev) mb(); } -int rv370_pcie_gart_enable(struct radeon_device *rdev) +int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) +{ + void __iomem *ptr = (void *)rdev->gart.table.vram.ptr; + + if (i < 0 || i > rdev->gart.num_gpu_pages) { + return -EINVAL; + } + addr = (lower_32_bits(addr) >> 8) | + ((upper_32_bits(addr) & 0xff) << 24) | + 0xc; + /* on x86 we want this to be CPU endian, on powerpc + * on powerpc without HW swappers, it'll get swapped on way + * into VRAM - so no need for cpu_to_le32 on VRAM tables */ + writel(addr, ((void __iomem *)ptr) + (i * 4)); + return 0; +} + +int rv370_pcie_gart_init(struct radeon_device *rdev) { - uint32_t table_addr; - uint32_t tmp; int r; + if (rdev->gart.table.vram.robj) { + WARN(1, "RV370 PCIE GART already initialized.\n"); + return 0; + } /* Initialize common gart structure */ r = radeon_gart_init(rdev); - if (r) { + if (r) return r; - } r = rv370_debugfs_pcie_gart_info_init(rdev); - if (r) { + if (r) DRM_ERROR("Failed to register debugfs file for PCIE gart !\n"); - } rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; - r = radeon_gart_table_vram_alloc(rdev); - if (r) { - return r; + rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush; + rdev->asic->gart_set_page = &rv370_pcie_gart_set_page; + return radeon_gart_table_vram_alloc(rdev); +} + +int rv370_pcie_gart_enable(struct radeon_device *rdev) +{ + uint32_t table_addr; + uint32_t tmp; + int r; + + if (rdev->gart.table.vram.robj == NULL) { + dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); + return -EINVAL; } + r = radeon_gart_table_vram_pin(rdev); + if (r) + return r; /* discard memory request outside of configured range */ tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD; WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp); @@ -145,51 +175,13 @@ void rv370_pcie_gart_disable(struct radeon_device *rdev) } } -int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) -{ - void __iomem *ptr = (void *)rdev->gart.table.vram.ptr; - - if (i < 0 || i > rdev->gart.num_gpu_pages) { - return -EINVAL; - } - addr = (lower_32_bits(addr) >> 8) | - ((upper_32_bits(addr) & 0xff) << 24) | - 0xc; - /* on x86 we want this to be CPU endian, on powerpc - * on powerpc without HW swappers, it'll get swapped on way - * into VRAM - so no need for cpu_to_le32 on VRAM tables */ - writel(addr, ((void __iomem *)ptr) + (i * 4)); - return 0; -} - -int r300_gart_enable(struct radeon_device *rdev) +void rv370_pcie_gart_fini(struct radeon_device *rdev) { -#if __OS_HAS_AGP - if (rdev->flags & RADEON_IS_AGP) { - if (rdev->family > CHIP_RV350) { - rv370_pcie_gart_disable(rdev); - } else { - r100_pci_gart_disable(rdev); - } - return 0; - } -#endif - if (rdev->flags & RADEON_IS_PCIE) { - rdev->asic->gart_disable = &rv370_pcie_gart_disable; - rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush; - rdev->asic->gart_set_page = &rv370_pcie_gart_set_page; - return rv370_pcie_gart_enable(rdev); - } - if (rdev->flags & RADEON_IS_PCI) { - rdev->asic->gart_disable = &r100_pci_gart_disable; - rdev->asic->gart_tlb_flush = &r100_pci_gart_tlb_flush; - rdev->asic->gart_set_page = &r100_pci_gart_set_page; - return r100_pci_gart_enable(rdev); - } - return r100_pci_gart_enable(rdev); + rv370_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); } - /* * MC */ @@ -237,14 +229,6 @@ int r300_mc_init(struct radeon_device *rdev) void r300_mc_fini(struct radeon_device *rdev) { - if (rdev->flags & RADEON_IS_PCIE) { - rv370_pcie_gart_disable(rdev); - radeon_gart_table_vram_free(rdev); - } else { - r100_pci_gart_disable(rdev); - radeon_gart_table_ram_free(rdev); - } - radeon_gart_fini(rdev); } @@ -1299,8 +1283,6 @@ void r300_mc_program(struct radeon_device *rdev) /* Stops all mc clients */ r100_mc_stop(rdev, &save); - /* Shutdown PCI/PCIE GART */ - radeon_gart_disable(rdev); if (rdev->flags & RADEON_IS_AGP) { WREG32(R_00014C_MC_AGP_LOCATION, S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) | diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index e57b9ba4aaf..33a25a4377b 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -161,6 +161,11 @@ int r420_resume(struct radeon_device *rdev) { int r; + /* Make sur GART are not working */ + if (rdev->flags & RADEON_IS_PCIE) + rv370_pcie_gart_disable(rdev); + if (rdev->flags & RADEON_IS_PCI) + r100_pci_gart_disable(rdev); /* Resume clock before doing reset */ r420_clock_resume(rdev); /* Reset gpu before posting otherwise ATOM will enter infinite loop */ @@ -180,10 +185,15 @@ int r420_resume(struct radeon_device *rdev) r300_mc_program(rdev); /* Initialize GART (initialize after TTM so we can allocate * memory through TTM but finalize after TTM) */ - r = radeon_gart_enable(rdev); - if (r) { - dev_err(rdev->dev, "failled initializing GART (%d).\n", r); - return r; + if (rdev->flags & RADEON_IS_PCIE) { + r = rv370_pcie_gart_enable(rdev); + if (r) + return r; + } + if (rdev->flags & RADEON_IS_PCI) { + r = r100_pci_gart_enable(rdev); + if (r) + return r; } r420_pipes_init(rdev); /* Enable IRQ */ @@ -212,7 +222,10 @@ int r420_suspend(struct radeon_device *rdev) r100_cp_disable(rdev); r100_wb_disable(rdev); r100_irq_disable(rdev); - radeon_gart_disable(rdev); + if (rdev->flags & RADEON_IS_PCIE) + rv370_pcie_gart_disable(rdev); + if (rdev->flags & RADEON_IS_PCI) + r100_pci_gart_disable(rdev); return 0; } @@ -222,14 +235,10 @@ void r420_fini(struct radeon_device *rdev) r100_wb_fini(rdev); r100_ib_fini(rdev); radeon_gem_fini(rdev); - if (rdev->flags & RADEON_IS_PCIE) { - rv370_pcie_gart_disable(rdev); - radeon_gart_table_vram_free(rdev); - } else { - r100_pci_gart_disable(rdev); - radeon_gart_table_ram_free(rdev); - } - radeon_gart_fini(rdev); + if (rdev->flags & RADEON_IS_PCIE) + rv370_pcie_gart_fini(rdev); + if (rdev->flags & RADEON_IS_PCI) + r100_pci_gart_fini(rdev); radeon_agp_fini(rdev); radeon_irq_kms_fini(rdev); radeon_fence_driver_fini(rdev); @@ -309,6 +318,16 @@ int r420_init(struct radeon_device *rdev) if (r) { return r; } + if (rdev->flags & RADEON_IS_PCIE) { + r = rv370_pcie_gart_init(rdev); + if (r) + return r; + } + if (rdev->flags & RADEON_IS_PCI) { + r = r100_pci_gart_init(rdev); + if (r) + return r; + } r300_set_reg_safe(rdev); r = r420_resume(rdev); if (r) { @@ -318,14 +337,10 @@ int r420_init(struct radeon_device *rdev) r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); - if (rdev->flags & RADEON_IS_PCIE) { - rv370_pcie_gart_disable(rdev); - radeon_gart_table_vram_free(rdev); - } else { - r100_pci_gart_disable(rdev); - radeon_gart_table_ram_free(rdev); - } - radeon_gart_fini(rdev); + if (rdev->flags & RADEON_IS_PCIE) + rv370_pcie_gart_fini(rdev); + if (rdev->flags & RADEON_IS_PCI) + r100_pci_gart_fini(rdev); radeon_agp_fini(rdev); radeon_irq_kms_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index 0e1686d1c87..d4b0b9d2e39 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -31,8 +31,6 @@ /* r520,rv530,rv560,rv570,r580 depends on : */ void r100_hdp_reset(struct radeon_device *rdev); -int rv370_pcie_gart_enable(struct radeon_device *rdev); -void rv370_pcie_gart_disable(struct radeon_device *rdev); void r420_pipes_init(struct radeon_device *rdev); void rs600_mc_disable_clients(struct radeon_device *rdev); void rs600_disable_vga(struct radeon_device *rdev); @@ -118,9 +116,6 @@ int r520_mc_init(struct radeon_device *rdev) void r520_mc_fini(struct radeon_device *rdev) { - rv370_pcie_gart_disable(rdev); - radeon_gart_table_vram_free(rdev); - radeon_gart_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 1bc25678986..65699e9f202 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -113,21 +113,34 @@ void r600_pcie_gart_tlb_flush(struct radeon_device *rdev) } } -int r600_pcie_gart_enable(struct radeon_device *rdev) +int r600_pcie_gart_init(struct radeon_device *rdev) { - u32 tmp; - int r, i; + int r; + if (rdev->gart.table.vram.robj) { + WARN(1, "R600 PCIE GART already initialized.\n"); + return 0; + } /* Initialize common gart structure */ r = radeon_gart_init(rdev); - if (r) { + if (r) return r; - } rdev->gart.table_size = rdev->gart.num_gpu_pages * 8; - r = radeon_gart_table_vram_alloc(rdev); - if (r) { - return r; + return radeon_gart_table_vram_alloc(rdev); +} + +int r600_pcie_gart_enable(struct radeon_device *rdev) +{ + u32 tmp; + int r, i; + + if (rdev->gart.table.vram.robj == NULL) { + dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); + return -EINVAL; } + r = radeon_gart_table_vram_pin(rdev); + if (r) + return r; for (i = 0; i < rdev->gart.num_gpu_pages; i++) r600_gart_clear_page(rdev, i); /* Setup L2 cache */ @@ -175,10 +188,6 @@ void r600_pcie_gart_disable(struct radeon_device *rdev) u32 tmp; int i; - /* Clear ptes*/ - for (i = 0; i < rdev->gart.num_gpu_pages; i++) - r600_gart_clear_page(rdev, i); - r600_pcie_gart_tlb_flush(rdev); /* Disable all tables */ for (i = 0; i < 7; i++) WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); @@ -204,6 +213,17 @@ void r600_pcie_gart_disable(struct radeon_device *rdev) WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp); + if (rdev->gart.table.vram.robj) { + radeon_object_kunmap(rdev->gart.table.vram.robj); + radeon_object_unpin(rdev->gart.table.vram.robj); + } +} + +void r600_pcie_gart_fini(struct radeon_device *rdev) +{ + r600_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); } int r600_mc_wait_for_idle(struct radeon_device *rdev) @@ -1472,6 +1492,7 @@ int r600_suspend(struct radeon_device *rdev) { /* FIXME: we should wait for ring to be empty */ r600_cp_stop(rdev); + r600_pcie_gart_disable(rdev); return 0; } @@ -1548,6 +1569,10 @@ int r600_init(struct radeon_device *rdev) } } + r = r600_pcie_gart_init(rdev); + if (r) + return r; + r = r600_resume(rdev); if (r) { if (rdev->flags & RADEON_IS_AGP) { @@ -1583,9 +1608,7 @@ void r600_fini(struct radeon_device *rdev) r600_blit_fini(rdev); radeon_ring_fini(rdev); - r600_pcie_gart_disable(rdev); - radeon_gart_table_vram_free(rdev); - radeon_gart_fini(rdev); + r600_pcie_gart_fini(rdev); radeon_gem_fini(rdev); radeon_fence_driver_fini(rdev); radeon_clocks_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8cec5bf2922..99292be8bc9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -596,6 +596,8 @@ struct radeon_asic { void (*mc_fini)(struct radeon_device *rdev); int (*wb_init)(struct radeon_device *rdev); void (*wb_fini)(struct radeon_device *rdev); + int (*gart_init)(struct radeon_device *rdev); + void (*gart_fini)(struct radeon_device *rdev); int (*gart_enable)(struct radeon_device *rdev); void (*gart_disable)(struct radeon_device *rdev); void (*gart_tlb_flush)(struct radeon_device *rdev); @@ -950,6 +952,8 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) #define radeon_mc_fini(rdev) (rdev)->asic->mc_fini((rdev)) #define radeon_wb_init(rdev) (rdev)->asic->wb_init((rdev)) #define radeon_wb_fini(rdev) (rdev)->asic->wb_fini((rdev)) +#define radeon_gpu_gart_init(rdev) (rdev)->asic->gart_init((rdev)) +#define radeon_gpu_gart_fini(rdev) (rdev)->asic->gart_fini((rdev)) #define radeon_gart_enable(rdev) (rdev)->asic->gart_enable((rdev)) #define radeon_gart_disable(rdev) (rdev)->asic->gart_disable((rdev)) #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart_tlb_flush((rdev)) @@ -978,6 +982,7 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) #define radeon_bandwidth_update(rdev) (rdev)->asic->bandwidth_update((rdev)) /* Common functions */ +extern int radeon_gart_table_vram_pin(struct radeon_device *rdev); extern int radeon_modeset_init(struct radeon_device *rdev); extern void radeon_modeset_fini(struct radeon_device *rdev); extern bool radeon_card_posted(struct radeon_device *rdev); @@ -1000,6 +1005,8 @@ extern void r100_cp_disable(struct radeon_device *rdev); extern int r100_cp_init(struct radeon_device *rdev, unsigned ring_size); extern void r100_cp_fini(struct radeon_device *rdev); extern void r100_pci_gart_tlb_flush(struct radeon_device *rdev); +extern int r100_pci_gart_init(struct radeon_device *rdev); +extern void r100_pci_gart_fini(struct radeon_device *rdev); extern int r100_pci_gart_enable(struct radeon_device *rdev); extern void r100_pci_gart_disable(struct radeon_device *rdev); extern int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr); @@ -1020,6 +1027,9 @@ extern int r100_wb_init(struct radeon_device *rdev); extern void r300_set_reg_safe(struct radeon_device *rdev); extern void r300_mc_program(struct radeon_device *rdev); extern void r300_vram_info(struct radeon_device *rdev); +extern int rv370_pcie_gart_init(struct radeon_device *rdev); +extern void rv370_pcie_gart_fini(struct radeon_device *rdev); +extern int rv370_pcie_gart_enable(struct radeon_device *rdev); extern void rv370_pcie_gart_disable(struct radeon_device *rdev); /* r420,r423,rv410 */ @@ -1043,6 +1053,7 @@ extern int r600_cp_resume(struct radeon_device *rdev); extern int r600_count_pipe_bits(uint32_t val); extern int r600_gart_clear_page(struct radeon_device *rdev, int i); extern int r600_mc_wait_for_idle(struct radeon_device *rdev); +extern int r600_pcie_gart_init(struct radeon_device *rdev); extern void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); extern int r600_ib_test(struct radeon_device *rdev); extern int r600_ring_test(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 8f27be31e09..5f2a9e6f12c 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -53,7 +53,9 @@ void r100_mc_fini(struct radeon_device *rdev); u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc); int r100_wb_init(struct radeon_device *rdev); void r100_wb_fini(struct radeon_device *rdev); -int r100_gart_enable(struct radeon_device *rdev); +int r100_pci_gart_init(struct radeon_device *rdev); +void r100_pci_gart_fini(struct radeon_device *rdev); +int r100_pci_gart_enable(struct radeon_device *rdev); void r100_pci_gart_disable(struct radeon_device *rdev); void r100_pci_gart_tlb_flush(struct radeon_device *rdev); int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr); @@ -92,7 +94,9 @@ static struct radeon_asic r100_asic = { .mc_fini = &r100_mc_fini, .wb_init = &r100_wb_init, .wb_fini = &r100_wb_fini, - .gart_enable = &r100_gart_enable, + .gart_init = &r100_pci_gart_init, + .gart_fini = &r100_pci_gart_fini, + .gart_enable = &r100_pci_gart_enable, .gart_disable = &r100_pci_gart_disable, .gart_tlb_flush = &r100_pci_gart_tlb_flush, .gart_set_page = &r100_pci_gart_set_page, @@ -135,7 +139,9 @@ void r300_ring_start(struct radeon_device *rdev); void r300_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); int r300_cs_parse(struct radeon_cs_parser *p); -int r300_gart_enable(struct radeon_device *rdev); +int rv370_pcie_gart_init(struct radeon_device *rdev); +void rv370_pcie_gart_fini(struct radeon_device *rdev); +int rv370_pcie_gart_enable(struct radeon_device *rdev); void rv370_pcie_gart_disable(struct radeon_device *rdev); void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev); int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr); @@ -157,7 +163,9 @@ static struct radeon_asic r300_asic = { .mc_fini = &r300_mc_fini, .wb_init = &r100_wb_init, .wb_fini = &r100_wb_fini, - .gart_enable = &r300_gart_enable, + .gart_init = &r100_pci_gart_init, + .gart_fini = &r100_pci_gart_fini, + .gart_enable = &r100_pci_gart_enable, .gart_disable = &r100_pci_gart_disable, .gart_tlb_flush = &r100_pci_gart_tlb_flush, .gart_set_page = &r100_pci_gart_set_page, @@ -205,8 +213,8 @@ static struct radeon_asic r420_asic = { .mc_fini = NULL, .wb_init = NULL, .wb_fini = NULL, - .gart_enable = &r300_gart_enable, - .gart_disable = &rv370_pcie_gart_disable, + .gart_enable = NULL, + .gart_disable = NULL, .gart_tlb_flush = &rv370_pcie_gart_tlb_flush, .gart_set_page = &rv370_pcie_gart_set_page, .cp_init = NULL, @@ -242,6 +250,8 @@ void rs400_errata(struct radeon_device *rdev); void rs400_vram_info(struct radeon_device *rdev); int rs400_mc_init(struct radeon_device *rdev); void rs400_mc_fini(struct radeon_device *rdev); +int rs400_gart_init(struct radeon_device *rdev); +void rs400_gart_fini(struct radeon_device *rdev); int rs400_gart_enable(struct radeon_device *rdev); void rs400_gart_disable(struct radeon_device *rdev); void rs400_gart_tlb_flush(struct radeon_device *rdev); @@ -257,6 +267,8 @@ static struct radeon_asic rs400_asic = { .mc_fini = &rs400_mc_fini, .wb_init = &r100_wb_init, .wb_fini = &r100_wb_fini, + .gart_init = &rs400_gart_init, + .gart_fini = &rs400_gart_fini, .gart_enable = &rs400_gart_enable, .gart_disable = &rs400_gart_disable, .gart_tlb_flush = &rs400_gart_tlb_flush, @@ -298,6 +310,8 @@ void rs600_mc_fini(struct radeon_device *rdev); int rs600_irq_set(struct radeon_device *rdev); int rs600_irq_process(struct radeon_device *rdev); u32 rs600_get_vblank_counter(struct radeon_device *rdev, int crtc); +int rs600_gart_init(struct radeon_device *rdev); +void rs600_gart_fini(struct radeon_device *rdev); int rs600_gart_enable(struct radeon_device *rdev); void rs600_gart_disable(struct radeon_device *rdev); void rs600_gart_tlb_flush(struct radeon_device *rdev); @@ -314,6 +328,8 @@ static struct radeon_asic rs600_asic = { .mc_fini = &rs600_mc_fini, .wb_init = &r100_wb_init, .wb_fini = &r100_wb_fini, + .gart_init = &rs600_gart_init, + .gart_fini = &rs600_gart_fini, .gart_enable = &rs600_gart_enable, .gart_disable = &rs600_gart_disable, .gart_tlb_flush = &rs600_gart_tlb_flush, @@ -361,6 +377,8 @@ static struct radeon_asic rs690_asic = { .mc_fini = &rs690_mc_fini, .wb_init = &r100_wb_init, .wb_fini = &r100_wb_fini, + .gart_init = &rs400_gart_init, + .gart_fini = &rs400_gart_fini, .gart_enable = &rs400_gart_enable, .gart_disable = &rs400_gart_disable, .gart_tlb_flush = &rs400_gart_tlb_flush, @@ -415,7 +433,9 @@ static struct radeon_asic rv515_asic = { .mc_fini = &rv515_mc_fini, .wb_init = &r100_wb_init, .wb_fini = &r100_wb_fini, - .gart_enable = &r300_gart_enable, + .gart_init = &rv370_pcie_gart_init, + .gart_fini = &rv370_pcie_gart_fini, + .gart_enable = &rv370_pcie_gart_enable, .gart_disable = &rv370_pcie_gart_disable, .gart_tlb_flush = &rv370_pcie_gart_tlb_flush, .gart_set_page = &rv370_pcie_gart_set_page, @@ -462,7 +482,9 @@ static struct radeon_asic r520_asic = { .mc_fini = &r520_mc_fini, .wb_init = &r100_wb_init, .wb_fini = &r100_wb_fini, - .gart_enable = &r300_gart_enable, + .gart_init = &rv370_pcie_gart_init, + .gart_fini = &rv370_pcie_gart_fini, + .gart_enable = &rv370_pcie_gart_enable, .gart_disable = &rv370_pcie_gart_disable, .gart_tlb_flush = &rv370_pcie_gart_tlb_flush, .gart_set_page = &rv370_pcie_gart_set_page, diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index ece097c3e07..7b6d0b1a596 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -320,6 +320,14 @@ int radeon_asic_init(struct radeon_device *rdev) case CHIP_RV350: case CHIP_RV380: rdev->asic = &r300_asic; + if (rdev->flags & RADEON_IS_PCIE) { + rdev->asic->gart_init = &rv370_pcie_gart_init; + rdev->asic->gart_fini = &rv370_pcie_gart_fini; + rdev->asic->gart_enable = &rv370_pcie_gart_enable; + rdev->asic->gart_disable = &rv370_pcie_gart_disable; + rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush; + rdev->asic->gart_set_page = &rv370_pcie_gart_set_page; + } break; case CHIP_R420: case CHIP_R423: @@ -504,6 +512,12 @@ int radeon_device_init(struct radeon_device *rdev, rwlock_init(&rdev->fence_drv.lock); INIT_LIST_HEAD(&rdev->gem.objects); + /* Set asic functions */ + r = radeon_asic_init(rdev); + if (r) { + return r; + } + if (radeon_agpmode == -1) { rdev->flags &= ~RADEON_IS_AGP; if (rdev->family >= CHIP_RV515 || @@ -512,18 +526,24 @@ int radeon_device_init(struct radeon_device *rdev, rdev->family == CHIP_R423) { DRM_INFO("Forcing AGP to PCIE mode\n"); rdev->flags |= RADEON_IS_PCIE; + rdev->asic->gart_init = &rv370_pcie_gart_init; + rdev->asic->gart_fini = &rv370_pcie_gart_fini; + rdev->asic->gart_enable = &rv370_pcie_gart_enable; + rdev->asic->gart_disable = &rv370_pcie_gart_disable; + rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush; + rdev->asic->gart_set_page = &rv370_pcie_gart_set_page; } else { DRM_INFO("Forcing AGP to PCI mode\n"); rdev->flags |= RADEON_IS_PCI; + rdev->asic->gart_init = &r100_pci_gart_init; + rdev->asic->gart_fini = &r100_pci_gart_fini; + rdev->asic->gart_enable = &r100_pci_gart_enable; + rdev->asic->gart_disable = &r100_pci_gart_disable; + rdev->asic->gart_tlb_flush = &r100_pci_gart_tlb_flush; + rdev->asic->gart_set_page = &r100_pci_gart_set_page; } } - /* Set asic functions */ - r = radeon_asic_init(rdev); - if (r) { - return r; - } - /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. * IGP - can handle 40-bits (in theory) @@ -623,6 +643,9 @@ int radeon_device_init(struct radeon_device *rdev, if (r) { return r; } + r = radeon_gpu_gart_init(rdev); + if (r) + return r; /* Initialize GART (initialize after TTM so we can allocate * memory through TTM but finalize after TTM) */ r = radeon_gart_enable(rdev); @@ -675,6 +698,7 @@ void radeon_device_fini(struct radeon_device *rdev) radeon_ib_pool_fini(rdev); radeon_cp_fini(rdev); radeon_wb_fini(rdev); + radeon_gpu_gart_fini(rdev); radeon_gem_fini(rdev); radeon_mc_fini(rdev); #if __OS_HAS_AGP diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 2977539880f..a931af065dd 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -75,7 +75,6 @@ void radeon_gart_table_ram_free(struct radeon_device *rdev) int radeon_gart_table_vram_alloc(struct radeon_device *rdev) { - uint64_t gpu_addr; int r; if (rdev->gart.table.vram.robj == NULL) { @@ -88,6 +87,14 @@ int radeon_gart_table_vram_alloc(struct radeon_device *rdev) return r; } } + return 0; +} + +int radeon_gart_table_vram_pin(struct radeon_device *rdev) +{ + uint64_t gpu_addr; + int r; + r = radeon_object_pin(rdev->gart.table.vram.robj, RADEON_GEM_DOMAIN_VRAM, &gpu_addr); if (r) { diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index e1e4ce42782..a3fbdad938c 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -92,20 +92,41 @@ void rs400_gart_tlb_flush(struct radeon_device *rdev) WREG32_MC(RS480_GART_CACHE_CNTRL, 0); } -int rs400_gart_enable(struct radeon_device *rdev) +int rs400_gart_init(struct radeon_device *rdev) { - uint32_t size_reg; - uint32_t tmp; int r; + if (rdev->gart.table.ram.ptr) { + WARN(1, "RS400 GART already initialized.\n"); + return 0; + } + /* Check gart size */ + switch(rdev->mc.gtt_size / (1024 * 1024)) { + case 32: + case 64: + case 128: + case 256: + case 512: + case 1024: + case 2048: + break; + default: + return -EINVAL; + } /* Initialize common gart structure */ r = radeon_gart_init(rdev); - if (r) { + if (r) return r; - } - if (rs400_debugfs_pcie_gart_info_init(rdev)) { + if (rs400_debugfs_pcie_gart_info_init(rdev)) DRM_ERROR("Failed to register debugfs file for RS400 GART !\n"); - } + rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; + return radeon_gart_table_ram_alloc(rdev); +} + +int rs400_gart_enable(struct radeon_device *rdev) +{ + uint32_t size_reg; + uint32_t tmp; tmp = RREG32_MC(RS690_AIC_CTRL_SCRATCH); tmp |= RS690_DIS_OUT_OF_PCI_GART_ACCESS; @@ -136,13 +157,6 @@ int rs400_gart_enable(struct radeon_device *rdev) default: return -EINVAL; } - if (rdev->gart.table.ram.ptr == NULL) { - rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; - r = radeon_gart_table_ram_alloc(rdev); - if (r) { - return r; - } - } /* It should be fine to program it to max value */ if (rdev->family == CHIP_RS690 || (rdev->family == CHIP_RS740)) { WREG32_MC(RS690_MCCFG_AGP_BASE, 0xFFFFFFFF); @@ -201,6 +215,13 @@ void rs400_gart_disable(struct radeon_device *rdev) WREG32_MC(RS480_AGP_ADDRESS_SPACE_SIZE, 0); } +void rs400_gart_fini(struct radeon_device *rdev) +{ + rs400_gart_disable(rdev); + radeon_gart_table_ram_free(rdev); + radeon_gart_fini(rdev); +} + int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) { uint32_t entry; @@ -255,14 +276,12 @@ int rs400_mc_init(struct radeon_device *rdev) (void)RREG32(RADEON_HOST_PATH_CNTL); WREG32(RADEON_HOST_PATH_CNTL, tmp); (void)RREG32(RADEON_HOST_PATH_CNTL); + return 0; } void rs400_mc_fini(struct radeon_device *rdev) { - rs400_gart_disable(rdev); - radeon_gart_table_ram_free(rdev); - radeon_gart_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 1b8d62f5e73..c31bd843925 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -68,22 +68,35 @@ void rs600_gart_tlb_flush(struct radeon_device *rdev) tmp = RREG32_MC(RS600_MC_PT0_CNTL); } -int rs600_gart_enable(struct radeon_device *rdev) +int rs600_gart_init(struct radeon_device *rdev) { - uint32_t tmp; - int i; int r; + if (rdev->gart.table.vram.robj) { + WARN(1, "RS600 GART already initialized.\n"); + return 0; + } /* Initialize common gart structure */ r = radeon_gart_init(rdev); if (r) { return r; } rdev->gart.table_size = rdev->gart.num_gpu_pages * 8; - r = radeon_gart_table_vram_alloc(rdev); - if (r) { - return r; + return radeon_gart_table_vram_alloc(rdev); +} + +int rs600_gart_enable(struct radeon_device *rdev) +{ + uint32_t tmp; + int r, i; + + if (rdev->gart.table.vram.robj == NULL) { + dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); + return -EINVAL; } + r = radeon_gart_table_vram_pin(rdev); + if (r) + return r; /* FIXME: setup default page */ WREG32_MC(RS600_MC_PT0_CNTL, (RS600_EFFECTIVE_L2_CACHE_SIZE(6) | @@ -138,8 +151,17 @@ void rs600_gart_disable(struct radeon_device *rdev) tmp = RREG32_MC(RS600_MC_CNTL1); tmp &= ~RS600_ENABLE_PAGE_TABLES; WREG32_MC(RS600_MC_CNTL1, tmp); - radeon_object_kunmap(rdev->gart.table.vram.robj); - radeon_object_unpin(rdev->gart.table.vram.robj); + if (rdev->gart.table.vram.robj) { + radeon_object_kunmap(rdev->gart.table.vram.robj); + radeon_object_unpin(rdev->gart.table.vram.robj); + } +} + +void rs600_gart_fini(struct radeon_device *rdev) +{ + rs600_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); } #define R600_PTE_VALID (1 << 0) @@ -235,9 +257,6 @@ int rs600_mc_init(struct radeon_device *rdev) void rs600_mc_fini(struct radeon_device *rdev) { - rs600_gart_disable(rdev); - radeon_gart_table_vram_free(rdev); - radeon_gart_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 839595b0072..0f585ca8276 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -94,9 +94,6 @@ int rs690_mc_init(struct radeon_device *rdev) void rs690_mc_fini(struct radeon_device *rdev) { - rs400_gart_disable(rdev); - radeon_gart_table_ram_free(rdev); - radeon_gart_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 03d490269ed..fd799748e7d 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -37,8 +37,6 @@ int r100_cp_reset(struct radeon_device *rdev); int r100_rb2d_reset(struct radeon_device *rdev); int r100_gui_wait_for_idle(struct radeon_device *rdev); int r100_cp_init(struct radeon_device *rdev, unsigned ring_size); -int rv370_pcie_gart_enable(struct radeon_device *rdev); -void rv370_pcie_gart_disable(struct radeon_device *rdev); void r420_pipes_init(struct radeon_device *rdev); void rs600_mc_disable_clients(struct radeon_device *rdev); void rs600_disable_vga(struct radeon_device *rdev); @@ -126,9 +124,6 @@ int rv515_mc_init(struct radeon_device *rdev) void rv515_mc_fini(struct radeon_device *rdev) { - rv370_pcie_gart_disable(rdev); - radeon_gart_table_vram_free(rdev); - radeon_gart_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 5ba5204091e..4f2098bc797 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -48,16 +48,13 @@ int rv770_pcie_gart_enable(struct radeon_device *rdev) u32 tmp; int r, i; - /* Initialize common gart structure */ - r = radeon_gart_init(rdev); - if (r) { - return r; + if (rdev->gart.table.vram.robj == NULL) { + dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); + return -EINVAL; } - rdev->gart.table_size = rdev->gart.num_gpu_pages * 8; - r = radeon_gart_table_vram_alloc(rdev); - if (r) { + r = radeon_gart_table_vram_pin(rdev); + if (r) return r; - } for (i = 0; i < rdev->gart.num_gpu_pages; i++) r600_gart_clear_page(rdev, i); /* Setup L2 cache */ @@ -98,10 +95,6 @@ void rv770_pcie_gart_disable(struct radeon_device *rdev) u32 tmp; int i; - /* Clear ptes*/ - for (i = 0; i < rdev->gart.num_gpu_pages; i++) - r600_gart_clear_page(rdev, i); - r600_pcie_gart_tlb_flush(rdev); /* Disable all tables */ for (i = 0; i < 7; i++) WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); @@ -120,6 +113,17 @@ void rv770_pcie_gart_disable(struct radeon_device *rdev) WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); + if (rdev->gart.table.vram.robj) { + radeon_object_kunmap(rdev->gart.table.vram.robj); + radeon_object_unpin(rdev->gart.table.vram.robj); + } +} + +void rv770_pcie_gart_fini(struct radeon_device *rdev) +{ + rv770_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); } @@ -871,6 +875,7 @@ int rv770_suspend(struct radeon_device *rdev) { /* FIXME: we should wait for ring to be empty */ r700_cp_stop(rdev); + rv770_pcie_gart_disable(rdev); return 0; } @@ -944,6 +949,10 @@ int rv770_init(struct radeon_device *rdev) } } + r = r600_pcie_gart_init(rdev); + if (r) + return r; + r = rv770_resume(rdev); if (r) { if (rdev->flags & RADEON_IS_AGP) { @@ -976,9 +985,7 @@ void rv770_fini(struct radeon_device *rdev) { r600_blit_fini(rdev); radeon_ring_fini(rdev); - rv770_pcie_gart_disable(rdev); - radeon_gart_table_vram_free(rdev); - radeon_gart_fini(rdev); + rv770_pcie_gart_fini(rdev); radeon_gem_fini(rdev); radeon_fence_driver_fini(rdev); radeon_clocks_fini(rdev); -- cgit v1.2.3-70-g09d2 From 513bcb4655e68706594e45dfa1d4b181500110ba Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 23 Sep 2009 16:56:27 +1000 Subject: drm/radeon/kms: don't require up to 64k allocations. (v2) This avoids needing to do a kmalloc > PAGE_SIZE for the main indirect buffer chunk, it adds an accessor for all reads from the chunk and caches a single page at a time for subsequent reads. changes since v1: Use a two page pool which should be the most common case a single packet spanning > PAGE_SIZE will be hit, but I'm having trouble seeing anywhere we currently generate anything like that. hopefully proper short page copying at end added parser_error flag to set deep errors instead of having to test every ib value fetch. fixed bug in patch that went to list. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r100.c | 188 ++++++++++++++---------------------- drivers/gpu/drm/radeon/r100_track.h | 69 ++++++++++++- drivers/gpu/drm/radeon/r200.c | 79 ++++++++------- drivers/gpu/drm/radeon/r300.c | 137 ++++++++++---------------- drivers/gpu/drm/radeon/r600_cs.c | 26 ++--- drivers/gpu/drm/radeon/radeon.h | 37 ++++++- drivers/gpu/drm/radeon/radeon_cs.c | 105 ++++++++++++++++++-- 7 files changed, 370 insertions(+), 271 deletions(-) (limited to 'drivers/gpu/drm/radeon/r300.c') diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 737970b43ae..9ab976d97e9 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -863,13 +863,11 @@ int r100_cs_parse_packet0(struct radeon_cs_parser *p, void r100_cs_dump_packet(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt) { - struct radeon_cs_chunk *ib_chunk; volatile uint32_t *ib; unsigned i; unsigned idx; ib = p->ib->ptr; - ib_chunk = &p->chunks[p->chunk_ib_idx]; idx = pkt->idx; for (i = 0; i <= (pkt->count + 1); i++, idx++) { DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); @@ -896,7 +894,7 @@ int r100_cs_packet_parse(struct radeon_cs_parser *p, idx, ib_chunk->length_dw); return -EINVAL; } - header = ib_chunk->kdata[idx]; + header = radeon_get_ib_value(p, idx); pkt->idx = idx; pkt->type = CP_PACKET_GET_TYPE(header); pkt->count = CP_PACKET_GET_COUNT(header); @@ -939,7 +937,6 @@ int r100_cs_packet_parse(struct radeon_cs_parser *p, */ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) { - struct radeon_cs_chunk *ib_chunk; struct drm_mode_object *obj; struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; @@ -947,8 +944,9 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) int crtc_id; int r; uint32_t header, h_idx, reg; + volatile uint32_t *ib; - ib_chunk = &p->chunks[p->chunk_ib_idx]; + ib = p->ib->ptr; /* parse the wait until */ r = r100_cs_packet_parse(p, &waitreloc, p->idx); @@ -963,7 +961,7 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) return r; } - if (ib_chunk->kdata[waitreloc.idx + 1] != RADEON_WAIT_CRTC_VLINE) { + if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) { DRM_ERROR("vline wait had illegal wait until\n"); r = -EINVAL; return r; @@ -978,9 +976,9 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) p->idx += waitreloc.count; p->idx += p3reloc.count; - header = ib_chunk->kdata[h_idx]; - crtc_id = ib_chunk->kdata[h_idx + 5]; - reg = ib_chunk->kdata[h_idx] >> 2; + header = radeon_get_ib_value(p, h_idx); + crtc_id = radeon_get_ib_value(p, h_idx + 5); + reg = header >> 2; mutex_lock(&p->rdev->ddev->mode_config.mutex); obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC); if (!obj) { @@ -994,8 +992,9 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) if (!crtc->enabled) { /* if the CRTC isn't enabled - we need to nop out the wait until */ - ib_chunk->kdata[h_idx + 2] = PACKET2(0); - ib_chunk->kdata[h_idx + 3] = PACKET2(0); + + ib[h_idx + 2] = PACKET2(0); + ib[h_idx + 3] = PACKET2(0); } else if (crtc_id == 1) { switch (reg) { case AVIVO_D1MODE_VLINE_START_END: @@ -1011,8 +1010,8 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) r = -EINVAL; goto out; } - ib_chunk->kdata[h_idx] = header; - ib_chunk->kdata[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1; + ib[h_idx] = header; + ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1; } out: mutex_unlock(&p->rdev->ddev->mode_config.mutex); @@ -1033,7 +1032,6 @@ out: int r100_cs_packet_next_reloc(struct radeon_cs_parser *p, struct radeon_cs_reloc **cs_reloc) { - struct radeon_cs_chunk *ib_chunk; struct radeon_cs_chunk *relocs_chunk; struct radeon_cs_packet p3reloc; unsigned idx; @@ -1044,7 +1042,6 @@ int r100_cs_packet_next_reloc(struct radeon_cs_parser *p, return -EINVAL; } *cs_reloc = NULL; - ib_chunk = &p->chunks[p->chunk_ib_idx]; relocs_chunk = &p->chunks[p->chunk_relocs_idx]; r = r100_cs_packet_parse(p, &p3reloc, p->idx); if (r) { @@ -1057,7 +1054,7 @@ int r100_cs_packet_next_reloc(struct radeon_cs_parser *p, r100_cs_dump_packet(p, &p3reloc); return -EINVAL; } - idx = ib_chunk->kdata[p3reloc.idx + 1]; + idx = radeon_get_ib_value(p, p3reloc.idx + 1); if (idx >= relocs_chunk->length_dw) { DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", idx, relocs_chunk->length_dw); @@ -1126,7 +1123,6 @@ static int r100_packet0_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, unsigned idx, unsigned reg) { - struct radeon_cs_chunk *ib_chunk; struct radeon_cs_reloc *reloc; struct r100_cs_track *track; volatile uint32_t *ib; @@ -1134,11 +1130,13 @@ static int r100_packet0_check(struct radeon_cs_parser *p, int r; int i, face; u32 tile_flags = 0; + u32 idx_value; ib = p->ib->ptr; - ib_chunk = &p->chunks[p->chunk_ib_idx]; track = (struct r100_cs_track *)p->track; + idx_value = radeon_get_ib_value(p, idx); + switch (reg) { case RADEON_CRTC_GUI_TRIG_VLINE: r = r100_cs_packet_parse_vline(p); @@ -1166,8 +1164,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } track->zb.robj = reloc->robj; - track->zb.offset = ib_chunk->kdata[idx]; - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->zb.offset = idx_value; + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); break; case RADEON_RB3D_COLOROFFSET: r = r100_cs_packet_next_reloc(p, &reloc); @@ -1178,8 +1176,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } track->cb[0].robj = reloc->robj; - track->cb[0].offset = ib_chunk->kdata[idx]; - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->cb[0].offset = idx_value; + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); break; case RADEON_PP_TXOFFSET_0: case RADEON_PP_TXOFFSET_1: @@ -1192,7 +1190,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); track->textures[i].robj = reloc->robj; break; case RADEON_PP_CUBIC_OFFSET_T0_0: @@ -1208,8 +1206,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - track->textures[0].cube_info[i].offset = ib_chunk->kdata[idx]; - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->textures[0].cube_info[i].offset = idx_value; + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); track->textures[0].cube_info[i].robj = reloc->robj; break; case RADEON_PP_CUBIC_OFFSET_T1_0: @@ -1225,8 +1223,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - track->textures[1].cube_info[i].offset = ib_chunk->kdata[idx]; - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->textures[1].cube_info[i].offset = idx_value; + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); track->textures[1].cube_info[i].robj = reloc->robj; break; case RADEON_PP_CUBIC_OFFSET_T2_0: @@ -1242,12 +1240,12 @@ static int r100_packet0_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - track->textures[2].cube_info[i].offset = ib_chunk->kdata[idx]; - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->textures[2].cube_info[i].offset = idx_value; + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); track->textures[2].cube_info[i].robj = reloc->robj; break; case RADEON_RE_WIDTH_HEIGHT: - track->maxy = ((ib_chunk->kdata[idx] >> 16) & 0x7FF); + track->maxy = ((idx_value >> 16) & 0x7FF); break; case RADEON_RB3D_COLORPITCH: r = r100_cs_packet_next_reloc(p, &reloc); @@ -1263,17 +1261,17 @@ static int r100_packet0_check(struct radeon_cs_parser *p, if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; - tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); + tmp = idx_value & ~(0x7 << 16); tmp |= tile_flags; ib[idx] = tmp; - track->cb[0].pitch = ib_chunk->kdata[idx] & RADEON_COLORPITCH_MASK; + track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; break; case RADEON_RB3D_DEPTHPITCH: - track->zb.pitch = ib_chunk->kdata[idx] & RADEON_DEPTHPITCH_MASK; + track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; break; case RADEON_RB3D_CNTL: - switch ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { + switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { case 7: case 8: case 9: @@ -1291,13 +1289,13 @@ static int r100_packet0_check(struct radeon_cs_parser *p, break; default: DRM_ERROR("Invalid color buffer format (%d) !\n", - ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); + ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); return -EINVAL; } - track->z_enabled = !!(ib_chunk->kdata[idx] & RADEON_Z_ENABLE); + track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); break; case RADEON_RB3D_ZSTENCILCNTL: - switch (ib_chunk->kdata[idx] & 0xf) { + switch (idx_value & 0xf) { case 0: track->zb.cpp = 2; break; @@ -1321,44 +1319,44 @@ static int r100_packet0_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); break; case RADEON_PP_CNTL: { - uint32_t temp = ib_chunk->kdata[idx] >> 4; + uint32_t temp = idx_value >> 4; for (i = 0; i < track->num_texture; i++) track->textures[i].enabled = !!(temp & (1 << i)); } break; case RADEON_SE_VF_CNTL: - track->vap_vf_cntl = ib_chunk->kdata[idx]; + track->vap_vf_cntl = idx_value; break; case RADEON_SE_VTX_FMT: - track->vtx_size = r100_get_vtx_size(ib_chunk->kdata[idx]); + track->vtx_size = r100_get_vtx_size(idx_value); break; case RADEON_PP_TEX_SIZE_0: case RADEON_PP_TEX_SIZE_1: case RADEON_PP_TEX_SIZE_2: i = (reg - RADEON_PP_TEX_SIZE_0) / 8; - track->textures[i].width = (ib_chunk->kdata[idx] & RADEON_TEX_USIZE_MASK) + 1; - track->textures[i].height = ((ib_chunk->kdata[idx] & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; + track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; + track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; break; case RADEON_PP_TEX_PITCH_0: case RADEON_PP_TEX_PITCH_1: case RADEON_PP_TEX_PITCH_2: i = (reg - RADEON_PP_TEX_PITCH_0) / 8; - track->textures[i].pitch = ib_chunk->kdata[idx] + 32; + track->textures[i].pitch = idx_value + 32; break; case RADEON_PP_TXFILTER_0: case RADEON_PP_TXFILTER_1: case RADEON_PP_TXFILTER_2: i = (reg - RADEON_PP_TXFILTER_0) / 24; - track->textures[i].num_levels = ((ib_chunk->kdata[idx] & RADEON_MAX_MIP_LEVEL_MASK) + track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK) >> RADEON_MAX_MIP_LEVEL_SHIFT); - tmp = (ib_chunk->kdata[idx] >> 23) & 0x7; + tmp = (idx_value >> 23) & 0x7; if (tmp == 2 || tmp == 6) track->textures[i].roundup_w = false; - tmp = (ib_chunk->kdata[idx] >> 27) & 0x7; + tmp = (idx_value >> 27) & 0x7; if (tmp == 2 || tmp == 6) track->textures[i].roundup_h = false; break; @@ -1366,16 +1364,16 @@ static int r100_packet0_check(struct radeon_cs_parser *p, case RADEON_PP_TXFORMAT_1: case RADEON_PP_TXFORMAT_2: i = (reg - RADEON_PP_TXFORMAT_0) / 24; - if (ib_chunk->kdata[idx] & RADEON_TXFORMAT_NON_POWER2) { + if (idx_value & RADEON_TXFORMAT_NON_POWER2) { track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); + track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); } - if (ib_chunk->kdata[idx] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) + if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) track->textures[i].tex_coord_type = 2; - switch ((ib_chunk->kdata[idx] & RADEON_TXFORMAT_FORMAT_MASK)) { + switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) { case RADEON_TXFORMAT_I8: case RADEON_TXFORMAT_RGB332: case RADEON_TXFORMAT_Y8: @@ -1402,13 +1400,13 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->textures[i].cpp = 4; break; } - track->textures[i].cube_info[4].width = 1 << ((ib_chunk->kdata[idx] >> 16) & 0xf); - track->textures[i].cube_info[4].height = 1 << ((ib_chunk->kdata[idx] >> 20) & 0xf); + track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); + track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); break; case RADEON_PP_CUBIC_FACES_0: case RADEON_PP_CUBIC_FACES_1: case RADEON_PP_CUBIC_FACES_2: - tmp = ib_chunk->kdata[idx]; + tmp = idx_value; i = (reg - RADEON_PP_CUBIC_FACES_0) / 4; for (face = 0; face < 4; face++) { track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); @@ -1427,15 +1425,14 @@ int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, struct radeon_object *robj) { - struct radeon_cs_chunk *ib_chunk; unsigned idx; - - ib_chunk = &p->chunks[p->chunk_ib_idx]; + u32 value; idx = pkt->idx + 1; - if ((ib_chunk->kdata[idx+2] + 1) > radeon_object_size(robj)) { + value = radeon_get_ib_value(p, idx + 2); + if ((value + 1) > radeon_object_size(robj)) { DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER " "(need %u have %lu) !\n", - ib_chunk->kdata[idx+2] + 1, + value + 1, radeon_object_size(robj)); return -EINVAL; } @@ -1445,59 +1442,20 @@ int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, static int r100_packet3_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt) { - struct radeon_cs_chunk *ib_chunk; struct radeon_cs_reloc *reloc; struct r100_cs_track *track; unsigned idx; - unsigned i, c; volatile uint32_t *ib; int r; ib = p->ib->ptr; - ib_chunk = &p->chunks[p->chunk_ib_idx]; idx = pkt->idx + 1; track = (struct r100_cs_track *)p->track; switch (pkt->opcode) { case PACKET3_3D_LOAD_VBPNTR: - c = ib_chunk->kdata[idx++]; - track->num_arrays = c; - for (i = 0; i < (c - 1); i += 2, idx += 3) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8; - track->arrays[i + 0].esize &= 0x7F; - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 1].robj = reloc->robj; - track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24; - track->arrays[i + 1].esize &= 0x7F; - } - if (c & 1) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8; - track->arrays[i + 0].esize &= 0x7F; - } + r = r100_packet3_load_vbpntr(p, pkt, idx); + if (r) + return r; break; case PACKET3_INDX_BUFFER: r = r100_cs_packet_next_reloc(p, &reloc); @@ -1506,7 +1464,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); + ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset); r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); if (r) { return r; @@ -1520,27 +1478,27 @@ static int r100_packet3_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset); track->num_arrays = 1; - track->vtx_size = r100_get_vtx_size(ib_chunk->kdata[idx+2]); + track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2)); track->arrays[0].robj = reloc->robj; track->arrays[0].esize = track->vtx_size; - track->max_indx = ib_chunk->kdata[idx+1]; + track->max_indx = radeon_get_ib_value(p, idx+1); - track->vap_vf_cntl = ib_chunk->kdata[idx+3]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx+3); track->immd_dwords = pkt->count - 1; r = r100_cs_track_check(p->rdev, track); if (r) return r; break; case PACKET3_3D_DRAW_IMMD: - if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) { + if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) { DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } - track->vap_vf_cntl = ib_chunk->kdata[idx+1]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); track->immd_dwords = pkt->count - 1; r = r100_cs_track_check(p->rdev, track); if (r) @@ -1548,11 +1506,11 @@ static int r100_packet3_check(struct radeon_cs_parser *p, break; /* triggers drawing using in-packet vertex data */ case PACKET3_3D_DRAW_IMMD_2: - if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) { + if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) { DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } - track->vap_vf_cntl = ib_chunk->kdata[idx]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx); track->immd_dwords = pkt->count; r = r100_cs_track_check(p->rdev, track); if (r) @@ -1560,28 +1518,28 @@ static int r100_packet3_check(struct radeon_cs_parser *p, break; /* triggers drawing using in-packet vertex data */ case PACKET3_3D_DRAW_VBUF_2: - track->vap_vf_cntl = ib_chunk->kdata[idx]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx); r = r100_cs_track_check(p->rdev, track); if (r) return r; break; /* triggers drawing of vertex buffers setup elsewhere */ case PACKET3_3D_DRAW_INDX_2: - track->vap_vf_cntl = ib_chunk->kdata[idx]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx); r = r100_cs_track_check(p->rdev, track); if (r) return r; break; /* triggers drawing using indices to vertex buffer */ case PACKET3_3D_DRAW_VBUF: - track->vap_vf_cntl = ib_chunk->kdata[idx + 1]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); r = r100_cs_track_check(p->rdev, track); if (r) return r; break; /* triggers drawing of vertex buffers setup elsewhere */ case PACKET3_3D_DRAW_INDX: - track->vap_vf_cntl = ib_chunk->kdata[idx + 1]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); r = r100_cs_track_check(p->rdev, track); if (r) return r; diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h index 70a82eda394..0daf0d76a89 100644 --- a/drivers/gpu/drm/radeon/r100_track.h +++ b/drivers/gpu/drm/radeon/r100_track.h @@ -84,6 +84,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, unsigned idx, unsigned reg); + + static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, unsigned idx, @@ -93,9 +95,7 @@ static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p, u32 tile_flags = 0; u32 tmp; struct radeon_cs_reloc *reloc; - struct radeon_cs_chunk *ib_chunk; - - ib_chunk = &p->chunks[p->chunk_ib_idx]; + u32 value; r = r100_cs_packet_next_reloc(p, &reloc); if (r) { @@ -104,7 +104,8 @@ static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - tmp = ib_chunk->kdata[idx] & 0x003fffff; + value = radeon_get_ib_value(p, idx); + tmp = value & 0x003fffff; tmp += (((u32)reloc->lobj.gpu_offset) >> 10); if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) @@ -119,6 +120,64 @@ static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p, } tmp |= tile_flags; - p->ib->ptr[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp; + p->ib->ptr[idx] = (value & 0x3fc00000) | tmp; return 0; } + +static inline int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + int idx) +{ + unsigned c, i; + struct radeon_cs_reloc *reloc; + struct r100_cs_track *track; + int r = 0; + volatile uint32_t *ib; + u32 idx_value; + + ib = p->ib->ptr; + track = (struct r100_cs_track *)p->track; + c = radeon_get_ib_value(p, idx++) & 0x1F; + track->num_arrays = c; + for (i = 0; i < (c - 1); i+=2, idx+=3) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize &= 0x7F; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 1].robj = reloc->robj; + track->arrays[i + 1].esize = idx_value >> 24; + track->arrays[i + 1].esize &= 0x7F; + } + if (c & 1) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].esize &= 0x7F; + } + return r; +} diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index 568c74bfba3..cf7fea5ff2e 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -96,7 +96,6 @@ int r200_packet0_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, unsigned idx, unsigned reg) { - struct radeon_cs_chunk *ib_chunk; struct radeon_cs_reloc *reloc; struct r100_cs_track *track; volatile uint32_t *ib; @@ -105,11 +104,11 @@ int r200_packet0_check(struct radeon_cs_parser *p, int i; int face; u32 tile_flags = 0; + u32 idx_value; ib = p->ib->ptr; - ib_chunk = &p->chunks[p->chunk_ib_idx]; track = (struct r100_cs_track *)p->track; - + idx_value = radeon_get_ib_value(p, idx); switch (reg) { case RADEON_CRTC_GUI_TRIG_VLINE: r = r100_cs_packet_parse_vline(p); @@ -137,8 +136,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, return r; } track->zb.robj = reloc->robj; - track->zb.offset = ib_chunk->kdata[idx]; - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->zb.offset = idx_value; + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); break; case RADEON_RB3D_COLOROFFSET: r = r100_cs_packet_next_reloc(p, &reloc); @@ -149,8 +148,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, return r; } track->cb[0].robj = reloc->robj; - track->cb[0].offset = ib_chunk->kdata[idx]; - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->cb[0].offset = idx_value; + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); break; case R200_PP_TXOFFSET_0: case R200_PP_TXOFFSET_1: @@ -166,7 +165,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); track->textures[i].robj = reloc->robj; break; case R200_PP_CUBIC_OFFSET_F1_0: @@ -208,12 +207,12 @@ int r200_packet0_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - track->textures[i].cube_info[face - 1].offset = ib_chunk->kdata[idx]; - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->textures[i].cube_info[face - 1].offset = idx_value; + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); track->textures[i].cube_info[face - 1].robj = reloc->robj; break; case RADEON_RE_WIDTH_HEIGHT: - track->maxy = ((ib_chunk->kdata[idx] >> 16) & 0x7FF); + track->maxy = ((idx_value >> 16) & 0x7FF); break; case RADEON_RB3D_COLORPITCH: r = r100_cs_packet_next_reloc(p, &reloc); @@ -229,17 +228,17 @@ int r200_packet0_check(struct radeon_cs_parser *p, if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; - tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); + tmp = idx_value & ~(0x7 << 16); tmp |= tile_flags; ib[idx] = tmp; - track->cb[0].pitch = ib_chunk->kdata[idx] & RADEON_COLORPITCH_MASK; + track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; break; case RADEON_RB3D_DEPTHPITCH: - track->zb.pitch = ib_chunk->kdata[idx] & RADEON_DEPTHPITCH_MASK; + track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; break; case RADEON_RB3D_CNTL: - switch ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { + switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { case 7: case 8: case 9: @@ -257,18 +256,18 @@ int r200_packet0_check(struct radeon_cs_parser *p, break; default: DRM_ERROR("Invalid color buffer format (%d) !\n", - ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); + ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); return -EINVAL; } - if (ib_chunk->kdata[idx] & RADEON_DEPTHXY_OFFSET_ENABLE) { + if (idx_value & RADEON_DEPTHXY_OFFSET_ENABLE) { DRM_ERROR("No support for depth xy offset in kms\n"); return -EINVAL; } - track->z_enabled = !!(ib_chunk->kdata[idx] & RADEON_Z_ENABLE); + track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); break; case RADEON_RB3D_ZSTENCILCNTL: - switch (ib_chunk->kdata[idx] & 0xf) { + switch (idx_value & 0xf) { case 0: track->zb.cpp = 2; break; @@ -292,27 +291,27 @@ int r200_packet0_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); break; case RADEON_PP_CNTL: { - uint32_t temp = ib_chunk->kdata[idx] >> 4; + uint32_t temp = idx_value >> 4; for (i = 0; i < track->num_texture; i++) track->textures[i].enabled = !!(temp & (1 << i)); } break; case RADEON_SE_VF_CNTL: - track->vap_vf_cntl = ib_chunk->kdata[idx]; + track->vap_vf_cntl = idx_value; break; case 0x210c: /* VAP_VF_MAX_VTX_INDX */ - track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL; + track->max_indx = idx_value & 0x00FFFFFFUL; break; case R200_SE_VTX_FMT_0: - track->vtx_size = r200_get_vtx_size_0(ib_chunk->kdata[idx]); + track->vtx_size = r200_get_vtx_size_0(idx_value); break; case R200_SE_VTX_FMT_1: - track->vtx_size += r200_get_vtx_size_1(ib_chunk->kdata[idx]); + track->vtx_size += r200_get_vtx_size_1(idx_value); break; case R200_PP_TXSIZE_0: case R200_PP_TXSIZE_1: @@ -321,8 +320,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case R200_PP_TXSIZE_4: case R200_PP_TXSIZE_5: i = (reg - R200_PP_TXSIZE_0) / 32; - track->textures[i].width = (ib_chunk->kdata[idx] & RADEON_TEX_USIZE_MASK) + 1; - track->textures[i].height = ((ib_chunk->kdata[idx] & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; + track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; + track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; break; case R200_PP_TXPITCH_0: case R200_PP_TXPITCH_1: @@ -331,7 +330,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, case R200_PP_TXPITCH_4: case R200_PP_TXPITCH_5: i = (reg - R200_PP_TXPITCH_0) / 32; - track->textures[i].pitch = ib_chunk->kdata[idx] + 32; + track->textures[i].pitch = idx_value + 32; break; case R200_PP_TXFILTER_0: case R200_PP_TXFILTER_1: @@ -340,12 +339,12 @@ int r200_packet0_check(struct radeon_cs_parser *p, case R200_PP_TXFILTER_4: case R200_PP_TXFILTER_5: i = (reg - R200_PP_TXFILTER_0) / 32; - track->textures[i].num_levels = ((ib_chunk->kdata[idx] & R200_MAX_MIP_LEVEL_MASK) + track->textures[i].num_levels = ((idx_value & R200_MAX_MIP_LEVEL_MASK) >> R200_MAX_MIP_LEVEL_SHIFT); - tmp = (ib_chunk->kdata[idx] >> 23) & 0x7; + tmp = (idx_value >> 23) & 0x7; if (tmp == 2 || tmp == 6) track->textures[i].roundup_w = false; - tmp = (ib_chunk->kdata[idx] >> 27) & 0x7; + tmp = (idx_value >> 27) & 0x7; if (tmp == 2 || tmp == 6) track->textures[i].roundup_h = false; break; @@ -364,8 +363,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case R200_PP_TXFORMAT_X_4: case R200_PP_TXFORMAT_X_5: i = (reg - R200_PP_TXFORMAT_X_0) / 32; - track->textures[i].txdepth = ib_chunk->kdata[idx] & 0x7; - tmp = (ib_chunk->kdata[idx] >> 16) & 0x3; + track->textures[i].txdepth = idx_value & 0x7; + tmp = (idx_value >> 16) & 0x3; /* 2D, 3D, CUBE */ switch (tmp) { case 0: @@ -389,14 +388,14 @@ int r200_packet0_check(struct radeon_cs_parser *p, case R200_PP_TXFORMAT_4: case R200_PP_TXFORMAT_5: i = (reg - R200_PP_TXFORMAT_0) / 32; - if (ib_chunk->kdata[idx] & R200_TXFORMAT_NON_POWER2) { + if (idx_value & R200_TXFORMAT_NON_POWER2) { track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); + track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); } - switch ((ib_chunk->kdata[idx] & RADEON_TXFORMAT_FORMAT_MASK)) { + switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) { case R200_TXFORMAT_I8: case R200_TXFORMAT_RGB332: case R200_TXFORMAT_Y8: @@ -424,8 +423,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->textures[i].cpp = 4; break; } - track->textures[i].cube_info[4].width = 1 << ((ib_chunk->kdata[idx] >> 16) & 0xf); - track->textures[i].cube_info[4].height = 1 << ((ib_chunk->kdata[idx] >> 20) & 0xf); + track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); + track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); break; case R200_PP_CUBIC_FACES_0: case R200_PP_CUBIC_FACES_1: @@ -433,7 +432,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, case R200_PP_CUBIC_FACES_3: case R200_PP_CUBIC_FACES_4: case R200_PP_CUBIC_FACES_5: - tmp = ib_chunk->kdata[idx]; + tmp = idx_value; i = (reg - R200_PP_CUBIC_FACES_0) / 32; for (face = 0; face < 4; face++) { track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index bb151ecdf8f..1ebea8cc8c9 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -697,17 +697,18 @@ static int r300_packet0_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, unsigned idx, unsigned reg) { - struct radeon_cs_chunk *ib_chunk; struct radeon_cs_reloc *reloc; struct r100_cs_track *track; volatile uint32_t *ib; uint32_t tmp, tile_flags = 0; unsigned i; int r; + u32 idx_value; ib = p->ib->ptr; - ib_chunk = &p->chunks[p->chunk_ib_idx]; track = (struct r100_cs_track *)p->track; + idx_value = radeon_get_ib_value(p, idx); + switch(reg) { case AVIVO_D1MODE_VLINE_START_END: case RADEON_CRTC_GUI_TRIG_VLINE: @@ -738,8 +739,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, return r; } track->cb[i].robj = reloc->robj; - track->cb[i].offset = ib_chunk->kdata[idx]; - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->cb[i].offset = idx_value; + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); break; case R300_ZB_DEPTHOFFSET: r = r100_cs_packet_next_reloc(p, &reloc); @@ -750,8 +751,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, return r; } track->zb.robj = reloc->robj; - track->zb.offset = ib_chunk->kdata[idx]; - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + track->zb.offset = idx_value; + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); break; case R300_TX_OFFSET_0: case R300_TX_OFFSET_0+4: @@ -777,32 +778,32 @@ static int r300_packet0_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); track->textures[i].robj = reloc->robj; break; /* Tracked registers */ case 0x2084: /* VAP_VF_CNTL */ - track->vap_vf_cntl = ib_chunk->kdata[idx]; + track->vap_vf_cntl = idx_value; break; case 0x20B4: /* VAP_VTX_SIZE */ - track->vtx_size = ib_chunk->kdata[idx] & 0x7F; + track->vtx_size = idx_value & 0x7F; break; case 0x2134: /* VAP_VF_MAX_VTX_INDX */ - track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL; + track->max_indx = idx_value & 0x00FFFFFFUL; break; case 0x43E4: /* SC_SCISSOR1 */ - track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1; + track->maxy = ((idx_value >> 13) & 0x1FFF) + 1; if (p->rdev->family < CHIP_RV515) { track->maxy -= 1440; } break; case 0x4E00: /* RB3D_CCTL */ - track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1; + track->num_cb = ((idx_value >> 5) & 0x3) + 1; break; case 0x4E38: case 0x4E3C: @@ -825,13 +826,13 @@ static int r300_packet0_check(struct radeon_cs_parser *p, if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) tile_flags |= R300_COLOR_MICROTILE_ENABLE; - tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); + tmp = idx_value & ~(0x7 << 16); tmp |= tile_flags; ib[idx] = tmp; i = (reg - 0x4E38) >> 2; - track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE; - switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) { + track->cb[i].pitch = idx_value & 0x3FFE; + switch (((idx_value >> 21) & 0xF)) { case 9: case 11: case 12: @@ -854,13 +855,13 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; default: DRM_ERROR("Invalid color buffer format (%d) !\n", - ((ib_chunk->kdata[idx] >> 21) & 0xF)); + ((idx_value >> 21) & 0xF)); return -EINVAL; } break; case 0x4F00: /* ZB_CNTL */ - if (ib_chunk->kdata[idx] & 2) { + if (idx_value & 2) { track->z_enabled = true; } else { track->z_enabled = false; @@ -868,7 +869,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; case 0x4F10: /* ZB_FORMAT */ - switch ((ib_chunk->kdata[idx] & 0xF)) { + switch ((idx_value & 0xF)) { case 0: case 1: track->zb.cpp = 2; @@ -878,7 +879,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; default: DRM_ERROR("Invalid z buffer format (%d) !\n", - (ib_chunk->kdata[idx] & 0xF)); + (idx_value & 0xF)); return -EINVAL; } break; @@ -897,17 +898,17 @@ static int r300_packet0_check(struct radeon_cs_parser *p, if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) tile_flags |= R300_DEPTHMICROTILE_TILED;; - tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); + tmp = idx_value & ~(0x7 << 16); tmp |= tile_flags; ib[idx] = tmp; - track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC; + track->zb.pitch = idx_value & 0x3FFC; break; case 0x4104: for (i = 0; i < 16; i++) { bool enabled; - enabled = !!(ib_chunk->kdata[idx] & (1 << i)); + enabled = !!(idx_value & (1 << i)); track->textures[i].enabled = enabled; } break; @@ -929,9 +930,9 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case 0x44FC: /* TX_FORMAT1_[0-15] */ i = (reg - 0x44C0) >> 2; - tmp = (ib_chunk->kdata[idx] >> 25) & 0x3; + tmp = (idx_value >> 25) & 0x3; track->textures[i].tex_coord_type = tmp; - switch ((ib_chunk->kdata[idx] & 0x1F)) { + switch ((idx_value & 0x1F)) { case R300_TX_FORMAT_X8: case R300_TX_FORMAT_Y4X4: case R300_TX_FORMAT_Z3Y3X2: @@ -971,7 +972,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; default: DRM_ERROR("Invalid texture format %u\n", - (ib_chunk->kdata[idx] & 0x1F)); + (idx_value & 0x1F)); return -EINVAL; break; } @@ -994,11 +995,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case 0x443C: /* TX_FILTER0_[0-15] */ i = (reg - 0x4400) >> 2; - tmp = ib_chunk->kdata[idx] & 0x7; + tmp = idx_value & 0x7; if (tmp == 2 || tmp == 4 || tmp == 6) { track->textures[i].roundup_w = false; } - tmp = (ib_chunk->kdata[idx] >> 3) & 0x7; + tmp = (idx_value >> 3) & 0x7; if (tmp == 2 || tmp == 4 || tmp == 6) { track->textures[i].roundup_h = false; } @@ -1021,12 +1022,12 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case 0x453C: /* TX_FORMAT2_[0-15] */ i = (reg - 0x4500) >> 2; - tmp = ib_chunk->kdata[idx] & 0x3FFF; + tmp = idx_value & 0x3FFF; track->textures[i].pitch = tmp + 1; if (p->rdev->family >= CHIP_RV515) { - tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11; + tmp = ((idx_value >> 15) & 1) << 11; track->textures[i].width_11 = tmp; - tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11; + tmp = ((idx_value >> 16) & 1) << 11; track->textures[i].height_11 = tmp; } break; @@ -1048,15 +1049,15 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case 0x44BC: /* TX_FORMAT0_[0-15] */ i = (reg - 0x4480) >> 2; - tmp = ib_chunk->kdata[idx] & 0x7FF; + tmp = idx_value & 0x7FF; track->textures[i].width = tmp + 1; - tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF; + tmp = (idx_value >> 11) & 0x7FF; track->textures[i].height = tmp + 1; - tmp = (ib_chunk->kdata[idx] >> 26) & 0xF; + tmp = (idx_value >> 26) & 0xF; track->textures[i].num_levels = tmp; - tmp = ib_chunk->kdata[idx] & (1 << 31); + tmp = idx_value & (1 << 31); track->textures[i].use_pitch = !!tmp; - tmp = (ib_chunk->kdata[idx] >> 22) & 0xF; + tmp = (idx_value >> 22) & 0xF; track->textures[i].txdepth = tmp; break; case R300_ZB_ZPASS_ADDR: @@ -1067,7 +1068,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); break; case 0x4be8: /* valid register only on RV530 */ @@ -1085,60 +1086,20 @@ static int r300_packet0_check(struct radeon_cs_parser *p, static int r300_packet3_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt) { - struct radeon_cs_chunk *ib_chunk; - struct radeon_cs_reloc *reloc; struct r100_cs_track *track; volatile uint32_t *ib; unsigned idx; - unsigned i, c; int r; ib = p->ib->ptr; - ib_chunk = &p->chunks[p->chunk_ib_idx]; idx = pkt->idx + 1; track = (struct r100_cs_track *)p->track; switch(pkt->opcode) { case PACKET3_3D_LOAD_VBPNTR: - c = ib_chunk->kdata[idx++] & 0x1F; - track->num_arrays = c; - for (i = 0; i < (c - 1); i+=2, idx+=3) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8; - track->arrays[i + 0].esize &= 0x7F; - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 1].robj = reloc->robj; - track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24; - track->arrays[i + 1].esize &= 0x7F; - } - if (c & 1) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8; - track->arrays[i + 0].esize &= 0x7F; - } + r = r100_packet3_load_vbpntr(p, pkt, idx); + if (r) + return r; break; case PACKET3_INDX_BUFFER: r = r100_cs_packet_next_reloc(p, &reloc); @@ -1147,7 +1108,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, r100_cs_dump_packet(p, pkt); return r; } - ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); if (r) { return r; @@ -1158,11 +1119,11 @@ static int r300_packet3_check(struct radeon_cs_parser *p, /* Number of dwords is vtx_size * (num_vertices - 1) * PRIM_WALK must be equal to 3 vertex data in embedded * in cmd stream */ - if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) { + if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) { DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } - track->vap_vf_cntl = ib_chunk->kdata[idx+1]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); track->immd_dwords = pkt->count - 1; r = r100_cs_track_check(p->rdev, track); if (r) { @@ -1173,11 +1134,11 @@ static int r300_packet3_check(struct radeon_cs_parser *p, /* Number of dwords is vtx_size * (num_vertices - 1) * PRIM_WALK must be equal to 3 vertex data in embedded * in cmd stream */ - if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) { + if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) { DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } - track->vap_vf_cntl = ib_chunk->kdata[idx]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx); track->immd_dwords = pkt->count; r = r100_cs_track_check(p->rdev, track); if (r) { @@ -1185,28 +1146,28 @@ static int r300_packet3_check(struct radeon_cs_parser *p, } break; case PACKET3_3D_DRAW_VBUF: - track->vap_vf_cntl = ib_chunk->kdata[idx + 1]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); r = r100_cs_track_check(p->rdev, track); if (r) { return r; } break; case PACKET3_3D_DRAW_VBUF_2: - track->vap_vf_cntl = ib_chunk->kdata[idx]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx); r = r100_cs_track_check(p->rdev, track); if (r) { return r; } break; case PACKET3_3D_DRAW_INDX: - track->vap_vf_cntl = ib_chunk->kdata[idx + 1]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); r = r100_cs_track_check(p->rdev, track); if (r) { return r; } break; case PACKET3_3D_DRAW_INDX_2: - track->vap_vf_cntl = ib_chunk->kdata[idx]; + track->vap_vf_cntl = radeon_get_ib_value(p, idx); r = r100_cs_track_check(p->rdev, track); if (r) { return r; diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 33b89cd8743..c629b5aa4a3 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -57,7 +57,7 @@ int r600_cs_packet_parse(struct radeon_cs_parser *p, idx, ib_chunk->length_dw); return -EINVAL; } - header = ib_chunk->kdata[idx]; + header = radeon_get_ib_value(p, idx); pkt->idx = idx; pkt->type = CP_PACKET_GET_TYPE(header); pkt->count = CP_PACKET_GET_COUNT(header); @@ -98,7 +98,6 @@ int r600_cs_packet_parse(struct radeon_cs_parser *p, static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p, struct radeon_cs_reloc **cs_reloc) { - struct radeon_cs_chunk *ib_chunk; struct radeon_cs_chunk *relocs_chunk; struct radeon_cs_packet p3reloc; unsigned idx; @@ -109,7 +108,6 @@ static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p, return -EINVAL; } *cs_reloc = NULL; - ib_chunk = &p->chunks[p->chunk_ib_idx]; relocs_chunk = &p->chunks[p->chunk_relocs_idx]; r = r600_cs_packet_parse(p, &p3reloc, p->idx); if (r) { @@ -121,7 +119,7 @@ static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p, p3reloc.idx); return -EINVAL; } - idx = ib_chunk->kdata[p3reloc.idx + 1]; + idx = radeon_get_ib_value(p, p3reloc.idx + 1); if (idx >= relocs_chunk->length_dw) { DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", idx, relocs_chunk->length_dw); @@ -146,7 +144,6 @@ static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p, static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p, struct radeon_cs_reloc **cs_reloc) { - struct radeon_cs_chunk *ib_chunk; struct radeon_cs_chunk *relocs_chunk; struct radeon_cs_packet p3reloc; unsigned idx; @@ -157,7 +154,6 @@ static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p, return -EINVAL; } *cs_reloc = NULL; - ib_chunk = &p->chunks[p->chunk_ib_idx]; relocs_chunk = &p->chunks[p->chunk_relocs_idx]; r = r600_cs_packet_parse(p, &p3reloc, p->idx); if (r) { @@ -169,7 +165,7 @@ static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p, p3reloc.idx); return -EINVAL; } - idx = ib_chunk->kdata[p3reloc.idx + 1]; + idx = radeon_get_ib_value(p, p3reloc.idx + 1); if (idx >= relocs_chunk->length_dw) { DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", idx, relocs_chunk->length_dw); @@ -218,7 +214,6 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p, static int r600_packet3_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt) { - struct radeon_cs_chunk *ib_chunk; struct radeon_cs_reloc *reloc; volatile u32 *ib; unsigned idx; @@ -227,8 +222,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p, int r; ib = p->ib->ptr; - ib_chunk = &p->chunks[p->chunk_ib_idx]; idx = pkt->idx + 1; + switch (pkt->opcode) { case PACKET3_START_3D_CMDBUF: if (p->family >= CHIP_RV770 || pkt->count) { @@ -281,7 +276,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } /* bit 4 is reg (0) or mem (1) */ - if (ib_chunk->kdata[idx+0] & 0x10) { + if (radeon_get_ib_value(p, idx) & 0x10) { r = r600_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("bad WAIT_REG_MEM\n"); @@ -297,8 +292,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } /* 0xffffffff/0x0 is flush all cache flag */ - if (ib_chunk->kdata[idx+1] != 0xffffffff || - ib_chunk->kdata[idx+2] != 0) { + if (radeon_get_ib_value(p, idx + 1) != 0xffffffff || + radeon_get_ib_value(p, idx + 2) != 0) { r = r600_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("bad SURFACE_SYNC\n"); @@ -639,7 +634,6 @@ int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, * uncached). */ ib_chunk = &parser.chunks[parser.chunk_ib_idx]; parser.ib->length_dw = ib_chunk->length_dw; - memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4); *l = parser.ib->length_dw; r = r600_cs_parse(&parser); if (r) { @@ -647,6 +641,12 @@ int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, r600_cs_parser_fini(&parser, r); return r; } + r = radeon_cs_finish_pages(&parser); + if (r) { + DRM_ERROR("Invalid command stream !\n"); + r600_cs_parser_fini(&parser, r); + return r; + } r600_cs_parser_fini(&parser, r); return r; } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index d5de53e06ce..7e34e4376f9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -342,7 +342,7 @@ struct radeon_ib { unsigned long idx; uint64_t gpu_addr; struct radeon_fence *fence; - volatile uint32_t *ptr; + uint32_t *ptr; uint32_t length_dw; }; @@ -415,7 +415,12 @@ struct radeon_cs_reloc { struct radeon_cs_chunk { uint32_t chunk_id; uint32_t length_dw; + int kpage_idx[2]; + uint32_t *kpage[2]; uint32_t *kdata; + void __user *user_ptr; + int last_copied_page; + int last_page_index; }; struct radeon_cs_parser { @@ -438,8 +443,38 @@ struct radeon_cs_parser { struct radeon_ib *ib; void *track; unsigned family; + int parser_error; }; +extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx); +extern int radeon_cs_finish_pages(struct radeon_cs_parser *p); + + +static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) +{ + struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; + u32 pg_idx, pg_offset; + u32 idx_value = 0; + int new_page; + + pg_idx = (idx * 4) / PAGE_SIZE; + pg_offset = (idx * 4) % PAGE_SIZE; + + if (ibc->kpage_idx[0] == pg_idx) + return ibc->kpage[0][pg_offset/4]; + if (ibc->kpage_idx[1] == pg_idx) + return ibc->kpage[1][pg_offset/4]; + + new_page = radeon_cs_update_pages(p, pg_idx); + if (new_page < 0) { + p->parser_error = new_page; + return 0; + } + + idx_value = ibc->kpage[new_page][pg_offset/4]; + return idx_value; +} + struct radeon_cs_packet { unsigned idx; unsigned type; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 12f5990c2d2..dea8acf8886 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -142,15 +142,31 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) } p->chunks[i].length_dw = user_chunk.length_dw; - cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data; + p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data; - size = p->chunks[i].length_dw * sizeof(uint32_t); - p->chunks[i].kdata = kmalloc(size, GFP_KERNEL); - if (p->chunks[i].kdata == NULL) { - return -ENOMEM; - } - if (DRM_COPY_FROM_USER(p->chunks[i].kdata, cdata, size)) { - return -EFAULT; + cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data; + if (p->chunks[i].chunk_id != RADEON_CHUNK_ID_IB) { + size = p->chunks[i].length_dw * sizeof(uint32_t); + p->chunks[i].kdata = kmalloc(size, GFP_KERNEL); + if (p->chunks[i].kdata == NULL) { + return -ENOMEM; + } + if (DRM_COPY_FROM_USER(p->chunks[i].kdata, + p->chunks[i].user_ptr, size)) { + return -EFAULT; + } + } else { + p->chunks[i].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); + p->chunks[i].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (p->chunks[i].kpage[0] == NULL || p->chunks[i].kpage[1] == NULL) { + kfree(p->chunks[i].kpage[0]); + kfree(p->chunks[i].kpage[1]); + return -ENOMEM; + } + p->chunks[i].kpage_idx[0] = -1; + p->chunks[i].kpage_idx[1] = -1; + p->chunks[i].last_copied_page = -1; + p->chunks[i].last_page_index = ((p->chunks[i].length_dw * 4) - 1) / PAGE_SIZE; } } if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) { @@ -190,6 +206,8 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) kfree(parser->relocs_ptr); for (i = 0; i < parser->nchunks; i++) { kfree(parser->chunks[i].kdata); + kfree(parser->chunks[i].kpage[0]); + kfree(parser->chunks[i].kpage[1]); } kfree(parser->chunks); kfree(parser->chunks_array); @@ -238,8 +256,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * uncached). */ ib_chunk = &parser.chunks[parser.chunk_ib_idx]; parser.ib->length_dw = ib_chunk->length_dw; - memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4); r = radeon_cs_parse(&parser); + if (r || parser.parser_error) { + DRM_ERROR("Invalid command stream !\n"); + radeon_cs_parser_fini(&parser, r); + mutex_unlock(&rdev->cs_mutex); + return r; + } + r = radeon_cs_finish_pages(&parser); if (r) { DRM_ERROR("Invalid command stream !\n"); radeon_cs_parser_fini(&parser, r); @@ -254,3 +278,66 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) mutex_unlock(&rdev->cs_mutex); return r; } + +int radeon_cs_finish_pages(struct radeon_cs_parser *p) +{ + struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; + int i; + int size = PAGE_SIZE; + + for (i = ibc->last_copied_page + 1; i <= ibc->last_page_index; i++) { + if (i == ibc->last_page_index) { + size = (ibc->length_dw * 4) % PAGE_SIZE; + if (size == 0) + size = PAGE_SIZE; + } + + if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)), + ibc->user_ptr + (i * PAGE_SIZE), + size)) + return -EFAULT; + } + return 0; +} + +int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) +{ + int new_page; + int num_extra_pages; + struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; + int i; + int size = PAGE_SIZE; + + num_extra_pages = (pg_idx - ibc->last_copied_page - 1); + for (i = ibc->last_copied_page + 1; i < ibc->last_copied_page + num_extra_pages; i++) { + if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)), + ibc->user_ptr + (i * PAGE_SIZE), + PAGE_SIZE)) { + p->parser_error = -EFAULT; + return 0; + } + } + + new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; + + if (pg_idx == ibc->last_page_index) { + size = (ibc->length_dw * 4) % PAGE_SIZE; + if (size == 0) + size = PAGE_SIZE; + } + + if (DRM_COPY_FROM_USER(ibc->kpage[new_page], + ibc->user_ptr + (pg_idx * PAGE_SIZE), + size)) { + p->parser_error = -EFAULT; + return 0; + } + + /* copy to IB here */ + memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); + + ibc->last_copied_page = pg_idx; + ibc->kpage_idx[new_page] = pg_idx; + + return new_page; +} -- cgit v1.2.3-70-g09d2