From 8410ea3b95d105a5be5db501656f44bbb91197c1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 15 Dec 2010 03:16:38 +1000 Subject: drm: rework PCI/platform driver interface. This abstracts the pci/platform interface out a step further, we can go further but this is far enough for now to allow USB to be plugged in. The drivers now just call the init code directly for their device type. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_kms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/radeon/radeon_kms.c') diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 8387d32caaa..4057ebf5195 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -58,9 +58,9 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) dev->dev_private = (void *)rdev; /* update BUS flag */ - if (drm_device_is_agp(dev)) { + if (drm_pci_device_is_agp(dev)) { flags |= RADEON_IS_AGP; - } else if (drm_device_is_pcie(dev)) { + } else if (drm_pci_device_is_pcie(dev)) { flags |= RADEON_IS_PCIE; } else { flags |= RADEON_IS_PCI; -- cgit v1.2.3-70-g09d2 From 486af1896f3a4a388410215c5a2014b9d09a79f5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 1 Mar 2011 14:32:27 +1000 Subject: drm/radeon: add new getparam for number of backends. This allows userspace to work out how many DBs there are for conditional rendering to work. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_drv.c | 2 +- drivers/gpu/drm/radeon/radeon_kms.c | 11 +++++++++++ include/drm/radeon_drm.h | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon/radeon_kms.c') diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 6c116150558..63d2de8771d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -49,7 +49,7 @@ * - 2.6.0 - add tiling config query (r6xx+), add initial HiZ support (r300->r500) * 2.7.0 - fixups for r600 2D tiling support. (no external ABI change), add eg dyn gpr regs * 2.8.0 - pageflip support, r500 US_FORMAT regs. r500 ARGB2101010 colorbuf, r300->r500 CMASK, clock crystal query - * 2.9.0 - r600 tiling (s3tc,rgtc) working, SET_PREDICATION packet 3 on r600 + eg + * 2.9.0 - r600 tiling (s3tc,rgtc) working, SET_PREDICATION packet 3 on r600 + eg, backend query */ #define KMS_DRIVER_MAJOR 2 #define KMS_DRIVER_MINOR 9 diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 4057ebf5195..68a7b22c1fe 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -205,6 +205,17 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) /* return clock value in KHz */ value = rdev->clock.spll.reference_freq * 10; break; + case RADEON_INFO_NUM_BACKENDS: + if (rdev->family >= CHIP_CEDAR) + value = rdev->config.evergreen.max_backends; + else if (rdev->family >= CHIP_RV770) + value = rdev->config.rv770.max_backends; + else if (rdev->family >= CHIP_R600) + value = rdev->config.r600.max_backends; + else { + return -EINVAL; + } + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index e5c607a02d5..3dec41cf834 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -908,6 +908,7 @@ struct drm_radeon_cs { #define RADEON_INFO_WANT_HYPERZ 0x07 #define RADEON_INFO_WANT_CMASK 0x08 /* get access to CMASK on r300 */ #define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x09 /* clock crystal frequency */ +#define RADEON_INFO_NUM_BACKENDS 0x0a /* DB/backends for r600+ - need for OQ */ struct drm_radeon_info { uint32_t request; -- cgit v1.2.3-70-g09d2 From fecf1d072f96114266ed3aae8c4fb93f9c179b00 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 2 Mar 2011 20:07:29 -0500 Subject: drm/radeon/kms: add gpu_init function for cayman This may some work to get accel going. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/ni.c | 550 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/nid.h | 269 ++++++++++++++++++ drivers/gpu/drm/radeon/radeon.h | 40 +++ drivers/gpu/drm/radeon/radeon_kms.c | 9 +- 4 files changed, 866 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/radeon/radeon_kms.c') diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 058aa4f84d1..eaefb5b066d 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -381,3 +381,553 @@ out: return err; } +/* + * Core functions + */ +static u32 cayman_get_tile_pipe_to_backend_map(struct radeon_device *rdev, + u32 num_tile_pipes, + u32 num_backends_per_asic, + u32 *backend_disable_mask_per_asic, + u32 num_shader_engines) +{ + u32 backend_map = 0; + u32 enabled_backends_mask = 0; + u32 enabled_backends_count = 0; + u32 num_backends_per_se; + u32 cur_pipe; + u32 swizzle_pipe[CAYMAN_MAX_PIPES]; + u32 cur_backend = 0; + u32 i; + bool force_no_swizzle; + + /* force legal values */ + if (num_tile_pipes < 1) + num_tile_pipes = 1; + if (num_tile_pipes > rdev->config.cayman.max_tile_pipes) + num_tile_pipes = rdev->config.cayman.max_tile_pipes; + if (num_shader_engines < 1) + num_shader_engines = 1; + if (num_shader_engines > rdev->config.cayman.max_shader_engines) + num_shader_engines = rdev->config.cayman.max_shader_engines; + if (num_backends_per_asic > num_shader_engines) + num_backends_per_asic = num_shader_engines; + if (num_backends_per_asic > (rdev->config.cayman.max_backends_per_se * num_shader_engines)) + num_backends_per_asic = rdev->config.cayman.max_backends_per_se * num_shader_engines; + + /* make sure we have the same number of backends per se */ + num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines); + /* set up the number of backends per se */ + num_backends_per_se = num_backends_per_asic / num_shader_engines; + if (num_backends_per_se > rdev->config.cayman.max_backends_per_se) { + num_backends_per_se = rdev->config.cayman.max_backends_per_se; + num_backends_per_asic = num_backends_per_se * num_shader_engines; + } + + /* create enable mask and count for enabled backends */ + for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) { + if (((*backend_disable_mask_per_asic >> i) & 1) == 0) { + enabled_backends_mask |= (1 << i); + ++enabled_backends_count; + } + if (enabled_backends_count == num_backends_per_asic) + break; + } + + /* force the backends mask to match the current number of backends */ + if (enabled_backends_count != num_backends_per_asic) { + u32 this_backend_enabled; + u32 shader_engine; + u32 backend_per_se; + + enabled_backends_mask = 0; + enabled_backends_count = 0; + *backend_disable_mask_per_asic = CAYMAN_MAX_BACKENDS_MASK; + for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) { + /* calc the current se */ + shader_engine = i / rdev->config.cayman.max_backends_per_se; + /* calc the backend per se */ + backend_per_se = i % rdev->config.cayman.max_backends_per_se; + /* default to not enabled */ + this_backend_enabled = 0; + if ((shader_engine < num_shader_engines) && + (backend_per_se < num_backends_per_se)) + this_backend_enabled = 1; + if (this_backend_enabled) { + enabled_backends_mask |= (1 << i); + *backend_disable_mask_per_asic &= ~(1 << i); + ++enabled_backends_count; + } + } + } + + + memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * CAYMAN_MAX_PIPES); + switch (rdev->family) { + case CHIP_CAYMAN: + force_no_swizzle = true; + break; + default: + force_no_swizzle = false; + break; + } + if (force_no_swizzle) { + bool last_backend_enabled = false; + + force_no_swizzle = false; + for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) { + if (((enabled_backends_mask >> i) & 1) == 1) { + if (last_backend_enabled) + force_no_swizzle = true; + last_backend_enabled = true; + } else + last_backend_enabled = false; + } + } + + switch (num_tile_pipes) { + case 1: + case 3: + case 5: + case 7: + DRM_ERROR("odd number of pipes!\n"); + break; + case 2: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + break; + case 4: + if (force_no_swizzle) { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + } else { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 1; + swizzle_pipe[3] = 3; + } + break; + case 6: + if (force_no_swizzle) { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + swizzle_pipe[4] = 4; + swizzle_pipe[5] = 5; + } else { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 1; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 5; + } + break; + case 8: + if (force_no_swizzle) { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + swizzle_pipe[4] = 4; + swizzle_pipe[5] = 5; + swizzle_pipe[6] = 6; + swizzle_pipe[7] = 7; + } else { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + swizzle_pipe[6] = 5; + swizzle_pipe[7] = 7; + } + break; + } + + for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { + while (((1 << cur_backend) & enabled_backends_mask) == 0) + cur_backend = (cur_backend + 1) % CAYMAN_MAX_BACKENDS; + + backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4))); + + cur_backend = (cur_backend + 1) % CAYMAN_MAX_BACKENDS; + } + + return backend_map; +} + +static void cayman_program_channel_remap(struct radeon_device *rdev) +{ + u32 tcp_chan_steer_lo, tcp_chan_steer_hi, mc_shared_chremap, tmp; + + tmp = RREG32(MC_SHARED_CHMAP); + switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { + case 0: + case 1: + case 2: + case 3: + default: + /* default mapping */ + mc_shared_chremap = 0x00fac688; + break; + } + + switch (rdev->family) { + case CHIP_CAYMAN: + default: + //tcp_chan_steer_lo = 0x54763210 + tcp_chan_steer_lo = 0x76543210; + tcp_chan_steer_hi = 0x0000ba98; + break; + } + + WREG32(TCP_CHAN_STEER_LO, tcp_chan_steer_lo); + WREG32(TCP_CHAN_STEER_HI, tcp_chan_steer_hi); + WREG32(MC_SHARED_CHREMAP, mc_shared_chremap); +} + +static u32 cayman_get_disable_mask_per_asic(struct radeon_device *rdev, + u32 disable_mask_per_se, + u32 max_disable_mask_per_se, + u32 num_shader_engines) +{ + u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se); + u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se; + + if (num_shader_engines == 1) + return disable_mask_per_asic; + else if (num_shader_engines == 2) + return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se); + else + return 0xffffffff; +} + +static void cayman_gpu_init(struct radeon_device *rdev) +{ + u32 cc_rb_backend_disable = 0; + u32 cc_gc_shader_pipe_config; + u32 gb_addr_config = 0; + u32 mc_shared_chmap, mc_arb_ramcfg; + u32 gb_backend_map; + u32 cgts_tcc_disable; + u32 sx_debug_1; + u32 smx_dc_ctl0; + u32 gc_user_shader_pipe_config; + u32 gc_user_rb_backend_disable; + u32 cgts_user_tcc_disable; + u32 cgts_sm_ctrl_reg; + u32 hdp_host_path_cntl; + u32 tmp; + int i, j; + + switch (rdev->family) { + case CHIP_CAYMAN: + default: + rdev->config.cayman.max_shader_engines = 2; + rdev->config.cayman.max_pipes_per_simd = 4; + rdev->config.cayman.max_tile_pipes = 8; + rdev->config.cayman.max_simds_per_se = 12; + rdev->config.cayman.max_backends_per_se = 4; + rdev->config.cayman.max_texture_channel_caches = 8; + rdev->config.cayman.max_gprs = 256; + rdev->config.cayman.max_threads = 256; + rdev->config.cayman.max_gs_threads = 32; + rdev->config.cayman.max_stack_entries = 512; + rdev->config.cayman.sx_num_of_sets = 8; + rdev->config.cayman.sx_max_export_size = 256; + rdev->config.cayman.sx_max_export_pos_size = 64; + rdev->config.cayman.sx_max_export_smx_size = 192; + rdev->config.cayman.max_hw_contexts = 8; + rdev->config.cayman.sq_num_cf_insts = 2; + + rdev->config.cayman.sc_prim_fifo_size = 0x100; + rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30; + rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130; + break; + } + + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + + WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); + + mc_shared_chmap = RREG32(MC_SHARED_CHMAP); + mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); + + cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE); + cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG); + cgts_tcc_disable = RREG32(CGTS_TCC_DISABLE); + gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE); + gc_user_shader_pipe_config = RREG32(GC_USER_SHADER_PIPE_CONFIG); + cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE); + + rdev->config.cayman.num_shader_engines = rdev->config.cayman.max_shader_engines; + tmp = ((~gc_user_shader_pipe_config) & INACTIVE_QD_PIPES_MASK) >> INACTIVE_QD_PIPES_SHIFT; + rdev->config.cayman.num_shader_pipes_per_simd = r600_count_pipe_bits(tmp); + rdev->config.cayman.num_tile_pipes = rdev->config.cayman.max_tile_pipes; + tmp = ((~gc_user_shader_pipe_config) & INACTIVE_SIMDS_MASK) >> INACTIVE_SIMDS_SHIFT; + rdev->config.cayman.num_simds_per_se = r600_count_pipe_bits(tmp); + tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT; + rdev->config.cayman.num_backends_per_se = r600_count_pipe_bits(tmp); + tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT; + rdev->config.cayman.backend_disable_mask_per_asic = + cayman_get_disable_mask_per_asic(rdev, tmp, CAYMAN_MAX_BACKENDS_PER_SE_MASK, + rdev->config.cayman.num_shader_engines); + rdev->config.cayman.backend_map = + cayman_get_tile_pipe_to_backend_map(rdev, rdev->config.cayman.num_tile_pipes, + rdev->config.cayman.num_backends_per_se * + rdev->config.cayman.num_shader_engines, + &rdev->config.cayman.backend_disable_mask_per_asic, + rdev->config.cayman.num_shader_engines); + tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT; + rdev->config.cayman.num_texture_channel_caches = r600_count_pipe_bits(tmp); + tmp = (mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT; + rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256; + if (rdev->config.cayman.mem_max_burst_length_bytes > 512) + rdev->config.cayman.mem_max_burst_length_bytes = 512; + tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; + rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; + if (rdev->config.cayman.mem_row_size_in_kb > 4) + rdev->config.cayman.mem_row_size_in_kb = 4; + /* XXX use MC settings? */ + rdev->config.cayman.shader_engine_tile_size = 32; + rdev->config.cayman.num_gpus = 1; + rdev->config.cayman.multi_gpu_tile_size = 64; + + //gb_addr_config = 0x02011003 +#if 0 + gb_addr_config = RREG32(GB_ADDR_CONFIG); +#else + gb_addr_config = 0; + switch (rdev->config.cayman.num_tile_pipes) { + case 1: + default: + gb_addr_config |= NUM_PIPES(0); + break; + case 2: + gb_addr_config |= NUM_PIPES(1); + break; + case 4: + gb_addr_config |= NUM_PIPES(2); + break; + case 8: + gb_addr_config |= NUM_PIPES(3); + break; + } + + tmp = (rdev->config.cayman.mem_max_burst_length_bytes / 256) - 1; + gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp); + gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.cayman.num_shader_engines - 1); + tmp = (rdev->config.cayman.shader_engine_tile_size / 16) - 1; + gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp); + switch (rdev->config.cayman.num_gpus) { + case 1: + default: + gb_addr_config |= NUM_GPUS(0); + break; + case 2: + gb_addr_config |= NUM_GPUS(1); + break; + case 4: + gb_addr_config |= NUM_GPUS(2); + break; + } + switch (rdev->config.cayman.multi_gpu_tile_size) { + case 16: + gb_addr_config |= MULTI_GPU_TILE_SIZE(0); + break; + case 32: + default: + gb_addr_config |= MULTI_GPU_TILE_SIZE(1); + break; + case 64: + gb_addr_config |= MULTI_GPU_TILE_SIZE(2); + break; + case 128: + gb_addr_config |= MULTI_GPU_TILE_SIZE(3); + break; + } + switch (rdev->config.cayman.mem_row_size_in_kb) { + case 1: + default: + gb_addr_config |= ROW_SIZE(0); + break; + case 2: + gb_addr_config |= ROW_SIZE(1); + break; + case 4: + gb_addr_config |= ROW_SIZE(2); + break; + } +#endif + + tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT; + rdev->config.cayman.num_tile_pipes = (1 << tmp); + tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT; + rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256; + tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT; + rdev->config.cayman.num_shader_engines = tmp + 1; + tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT; + rdev->config.cayman.num_gpus = tmp + 1; + tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT; + rdev->config.cayman.multi_gpu_tile_size = 1 << tmp; + tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT; + rdev->config.cayman.mem_row_size_in_kb = 1 << tmp; + + //gb_backend_map = 0x76541032; +#if 0 + gb_backend_map = RREG32(GB_BACKEND_MAP); +#else + gb_backend_map = + cayman_get_tile_pipe_to_backend_map(rdev, rdev->config.cayman.num_tile_pipes, + rdev->config.cayman.num_backends_per_se * + rdev->config.cayman.num_shader_engines, + &rdev->config.cayman.backend_disable_mask_per_asic, + rdev->config.cayman.num_shader_engines); +#endif + /* setup tiling info dword. gb_addr_config is not adequate since it does + * not have bank info, so create a custom tiling dword. + * bits 3:0 num_pipes + * bits 7:4 num_banks + * bits 11:8 group_size + * bits 15:12 row_size + */ + rdev->config.cayman.tile_config = 0; + switch (rdev->config.cayman.num_tile_pipes) { + case 1: + default: + rdev->config.cayman.tile_config |= (0 << 0); + break; + case 2: + rdev->config.cayman.tile_config |= (1 << 0); + break; + case 4: + rdev->config.cayman.tile_config |= (2 << 0); + break; + case 8: + rdev->config.cayman.tile_config |= (3 << 0); + break; + } + rdev->config.cayman.tile_config |= + ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; + rdev->config.cayman.tile_config |= + (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT; + rdev->config.cayman.tile_config |= + ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; + + WREG32(GB_BACKEND_MAP, gb_backend_map); + WREG32(GB_ADDR_CONFIG, gb_addr_config); + WREG32(DMIF_ADDR_CONFIG, gb_addr_config); + WREG32(HDP_ADDR_CONFIG, gb_addr_config); + + cayman_program_channel_remap(rdev); + + /* primary versions */ + WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); + + WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable); + WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable); + + /* user versions */ + WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); + + WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable); + WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable); + + /* reprogram the shader complex */ + cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG); + for (i = 0; i < 16; i++) + WREG32(CGTS_SM_CTRL_REG, OVERRIDE); + WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg); + + /* set HW defaults for 3D engine */ + WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); + + sx_debug_1 = RREG32(SX_DEBUG_1); + sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS; + WREG32(SX_DEBUG_1, sx_debug_1); + + smx_dc_ctl0 = RREG32(SMX_DC_CTL0); + smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff); + smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.evergreen.sx_num_of_sets); + WREG32(SMX_DC_CTL0, smx_dc_ctl0); + + WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE); + + /* need to be explicitly zero-ed */ + WREG32(VGT_OFFCHIP_LDS_BASE, 0); + WREG32(SQ_LSTMP_RING_BASE, 0); + WREG32(SQ_HSTMP_RING_BASE, 0); + WREG32(SQ_ESTMP_RING_BASE, 0); + WREG32(SQ_GSTMP_RING_BASE, 0); + WREG32(SQ_VSTMP_RING_BASE, 0); + WREG32(SQ_PSTMP_RING_BASE, 0); + + WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO); + + WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_size / 4) - 1) | + POSITION_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_pos_size / 4) - 1) | + SMX_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_smx_size / 4) - 1))); + + WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.evergreen.sc_prim_fifo_size) | + SC_HIZ_TILE_FIFO_SIZE(rdev->config.evergreen.sc_hiz_tile_fifo_size) | + SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.evergreen.sc_earlyz_tile_fifo_size))); + + + WREG32(VGT_NUM_INSTANCES, 1); + + WREG32(CP_PERFMON_CNTL, 0); + + WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.evergreen.sq_num_cf_insts) | + FETCH_FIFO_HIWATER(0x4) | + DONE_FIFO_HIWATER(0xe0) | + ALU_UPDATE_FIFO_HIWATER(0x8))); + + WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4)); + WREG32(SQ_CONFIG, (VC_ENABLE | + EXPORT_SRC_C | + GFX_PRIO(0) | + CS1_PRIO(0) | + CS2_PRIO(1))); + WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE); + + WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | + FORCE_EOV_MAX_REZ_CNT(255))); + + WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | + AUTO_INVLD_EN(ES_AND_GS_AUTO)); + + WREG32(VGT_GS_VERTEX_REUSE, 16); + WREG32(PA_SC_LINE_STIPPLE_STATE, 0); + + WREG32(CB_PERF_CTR0_SEL_0, 0); + WREG32(CB_PERF_CTR0_SEL_1, 0); + WREG32(CB_PERF_CTR1_SEL_0, 0); + WREG32(CB_PERF_CTR1_SEL_1, 0); + WREG32(CB_PERF_CTR2_SEL_0, 0); + WREG32(CB_PERF_CTR2_SEL_1, 0); + WREG32(CB_PERF_CTR3_SEL_0, 0); + WREG32(CB_PERF_CTR3_SEL_1, 0); + + hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); + WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); + + WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); + + udelay(50); +} + diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index f7b445390e0..b4ba1b013cc 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -24,7 +24,44 @@ #ifndef NI_H #define NI_H +#define CAYMAN_MAX_SH_GPRS 256 +#define CAYMAN_MAX_TEMP_GPRS 16 +#define CAYMAN_MAX_SH_THREADS 256 +#define CAYMAN_MAX_SH_STACK_ENTRIES 4096 +#define CAYMAN_MAX_FRC_EOV_CNT 16384 +#define CAYMAN_MAX_BACKENDS 8 +#define CAYMAN_MAX_BACKENDS_MASK 0xFF +#define CAYMAN_MAX_BACKENDS_PER_SE_MASK 0xF +#define CAYMAN_MAX_SIMDS 16 +#define CAYMAN_MAX_SIMDS_MASK 0xFFFF +#define CAYMAN_MAX_SIMDS_PER_SE_MASK 0xFFF +#define CAYMAN_MAX_PIPES 8 +#define CAYMAN_MAX_PIPES_MASK 0xFF +#define CAYMAN_MAX_LDS_NUM 0xFFFF +#define CAYMAN_MAX_TCC 16 +#define CAYMAN_MAX_TCC_MASK 0xFF + +#define DMIF_ADDR_CONFIG 0xBD4 + +#define MC_SHARED_CHMAP 0x2004 +#define NOOFCHAN_SHIFT 12 +#define NOOFCHAN_MASK 0x00003000 +#define MC_SHARED_CHREMAP 0x2008 #define MC_SHARED_BLACKOUT_CNTL 0x20ac +#define MC_ARB_RAMCFG 0x2760 +#define NOOFBANK_SHIFT 0 +#define NOOFBANK_MASK 0x00000003 +#define NOOFRANK_SHIFT 2 +#define NOOFRANK_MASK 0x00000004 +#define NOOFROWS_SHIFT 3 +#define NOOFROWS_MASK 0x00000038 +#define NOOFCOLS_SHIFT 6 +#define NOOFCOLS_MASK 0x000000C0 +#define CHANSIZE_SHIFT 8 +#define CHANSIZE_MASK 0x00000100 +#define BURSTLENGTH_SHIFT 9 +#define BURSTLENGTH_MASK 0x00000200 +#define CHANSIZE_OVERRIDE (1 << 11) #define MC_SEQ_SUP_CNTL 0x28c8 #define RUN_MASK (1 << 0) #define MC_SEQ_SUP_PGM 0x28cc @@ -37,5 +74,237 @@ #define MC_SEQ_IO_DEBUG_INDEX 0x2a44 #define MC_SEQ_IO_DEBUG_DATA 0x2a48 +#define HDP_HOST_PATH_CNTL 0x2C00 +#define HDP_NONSURFACE_BASE 0x2C04 +#define HDP_NONSURFACE_INFO 0x2C08 +#define HDP_NONSURFACE_SIZE 0x2C0C +#define HDP_ADDR_CONFIG 0x2F48 + +#define CC_SYS_RB_BACKEND_DISABLE 0x3F88 +#define GC_USER_SYS_RB_BACKEND_DISABLE 0x3F8C +#define CGTS_SYS_TCC_DISABLE 0x3F90 +#define CGTS_USER_SYS_TCC_DISABLE 0x3F94 + +#define CONFIG_MEMSIZE 0x5428 + +#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 + +#define GRBM_CNTL 0x8000 +#define GRBM_READ_TIMEOUT(x) ((x) << 0) +#define GRBM_STATUS 0x8010 +#define CMDFIFO_AVAIL_MASK 0x0000000F +#define RING2_RQ_PENDING (1 << 4) +#define SRBM_RQ_PENDING (1 << 5) +#define RING1_RQ_PENDING (1 << 6) +#define CF_RQ_PENDING (1 << 7) +#define PF_RQ_PENDING (1 << 8) +#define GDS_DMA_RQ_PENDING (1 << 9) +#define GRBM_EE_BUSY (1 << 10) +#define SX_CLEAN (1 << 11) +#define DB_CLEAN (1 << 12) +#define CB_CLEAN (1 << 13) +#define TA_BUSY (1 << 14) +#define GDS_BUSY (1 << 15) +#define VGT_BUSY_NO_DMA (1 << 16) +#define VGT_BUSY (1 << 17) +#define IA_BUSY_NO_DMA (1 << 18) +#define IA_BUSY (1 << 19) +#define SX_BUSY (1 << 20) +#define SH_BUSY (1 << 21) +#define SPI_BUSY (1 << 22) +#define SC_BUSY (1 << 24) +#define PA_BUSY (1 << 25) +#define DB_BUSY (1 << 26) +#define CP_COHERENCY_BUSY (1 << 28) +#define CP_BUSY (1 << 29) +#define CB_BUSY (1 << 30) +#define GUI_ACTIVE (1 << 31) +#define GRBM_STATUS_SE0 0x8014 +#define GRBM_STATUS_SE1 0x8018 +#define SE_SX_CLEAN (1 << 0) +#define SE_DB_CLEAN (1 << 1) +#define SE_CB_CLEAN (1 << 2) +#define SE_VGT_BUSY (1 << 23) +#define SE_PA_BUSY (1 << 24) +#define SE_TA_BUSY (1 << 25) +#define SE_SX_BUSY (1 << 26) +#define SE_SPI_BUSY (1 << 27) +#define SE_SH_BUSY (1 << 28) +#define SE_SC_BUSY (1 << 29) +#define SE_DB_BUSY (1 << 30) +#define SE_CB_BUSY (1 << 31) +#define GRBM_SOFT_RESET 0x8020 +#define SOFT_RESET_CP (1 << 0) +#define SOFT_RESET_CB (1 << 1) +#define SOFT_RESET_DB (1 << 3) +#define SOFT_RESET_GDS (1 << 4) +#define SOFT_RESET_PA (1 << 5) +#define SOFT_RESET_SC (1 << 6) +#define SOFT_RESET_SPI (1 << 8) +#define SOFT_RESET_SH (1 << 9) +#define SOFT_RESET_SX (1 << 10) +#define SOFT_RESET_TC (1 << 11) +#define SOFT_RESET_TA (1 << 12) +#define SOFT_RESET_VGT (1 << 14) +#define SOFT_RESET_IA (1 << 15) + +#define CP_MEQ_THRESHOLDS 0x8764 +#define MEQ1_START(x) ((x) << 0) +#define MEQ2_START(x) ((x) << 8) +#define CP_PERFMON_CNTL 0x87FC + +#define VGT_CACHE_INVALIDATION 0x88C4 +#define CACHE_INVALIDATION(x) ((x) << 0) +#define VC_ONLY 0 +#define TC_ONLY 1 +#define VC_AND_TC 2 +#define AUTO_INVLD_EN(x) ((x) << 6) +#define NO_AUTO 0 +#define ES_AUTO 1 +#define GS_AUTO 2 +#define ES_AND_GS_AUTO 3 +#define VGT_GS_VERTEX_REUSE 0x88D4 + +#define CC_GC_SHADER_PIPE_CONFIG 0x8950 +#define GC_USER_SHADER_PIPE_CONFIG 0x8954 +#define INACTIVE_QD_PIPES(x) ((x) << 8) +#define INACTIVE_QD_PIPES_MASK 0x0000FF00 +#define INACTIVE_QD_PIPES_SHIFT 8 +#define INACTIVE_SIMDS(x) ((x) << 16) +#define INACTIVE_SIMDS_MASK 0xFFFF0000 +#define INACTIVE_SIMDS_SHIFT 16 + +#define VGT_PRIMITIVE_TYPE 0x8958 +#define VGT_NUM_INSTANCES 0x8974 +#define VGT_TF_RING_SIZE 0x8988 +#define VGT_OFFCHIP_LDS_BASE 0x89b4 + +#define PA_SC_LINE_STIPPLE_STATE 0x8B10 +#define PA_CL_ENHANCE 0x8A14 +#define CLIP_VTX_REORDER_ENA (1 << 0) +#define NUM_CLIP_SEQ(x) ((x) << 1) +#define PA_SC_FIFO_SIZE 0x8BCC +#define SC_PRIM_FIFO_SIZE(x) ((x) << 0) +#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 12) +#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 20) +#define PA_SC_FORCE_EOV_MAX_CNTS 0x8B24 +#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0) +#define FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16) + +#define SQ_CONFIG 0x8C00 +#define VC_ENABLE (1 << 0) +#define EXPORT_SRC_C (1 << 1) +#define GFX_PRIO(x) ((x) << 2) +#define CS1_PRIO(x) ((x) << 4) +#define CS2_PRIO(x) ((x) << 6) +#define SQ_GPR_RESOURCE_MGMT_1 0x8C04 +#define NUM_PS_GPRS(x) ((x) << 0) +#define NUM_VS_GPRS(x) ((x) << 16) +#define NUM_CLAUSE_TEMP_GPRS(x) ((x) << 28) +#define SQ_ESGS_RING_SIZE 0x8c44 +#define SQ_GSVS_RING_SIZE 0x8c4c +#define SQ_ESTMP_RING_BASE 0x8c50 +#define SQ_ESTMP_RING_SIZE 0x8c54 +#define SQ_GSTMP_RING_BASE 0x8c58 +#define SQ_GSTMP_RING_SIZE 0x8c5c +#define SQ_VSTMP_RING_BASE 0x8c60 +#define SQ_VSTMP_RING_SIZE 0x8c64 +#define SQ_PSTMP_RING_BASE 0x8c68 +#define SQ_PSTMP_RING_SIZE 0x8c6c +#define SQ_MS_FIFO_SIZES 0x8CF0 +#define CACHE_FIFO_SIZE(x) ((x) << 0) +#define FETCH_FIFO_HIWATER(x) ((x) << 8) +#define DONE_FIFO_HIWATER(x) ((x) << 16) +#define ALU_UPDATE_FIFO_HIWATER(x) ((x) << 24) +#define SQ_LSTMP_RING_BASE 0x8e10 +#define SQ_LSTMP_RING_SIZE 0x8e14 +#define SQ_HSTMP_RING_BASE 0x8e18 +#define SQ_HSTMP_RING_SIZE 0x8e1c +#define SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x8D8C +#define DYN_GPR_ENABLE (1 << 8) +#define SQ_CONST_MEM_BASE 0x8df8 + +#define SX_EXPORT_BUFFER_SIZES 0x900C +#define COLOR_BUFFER_SIZE(x) ((x) << 0) +#define POSITION_BUFFER_SIZE(x) ((x) << 8) +#define SMX_BUFFER_SIZE(x) ((x) << 16) +#define SX_DEBUG_1 0x9058 +#define ENABLE_NEW_SMX_ADDRESS (1 << 16) + +#define SPI_CONFIG_CNTL 0x9100 +#define GPR_WRITE_PRIORITY(x) ((x) << 0) +#define SPI_CONFIG_CNTL_1 0x913C +#define VTX_DONE_DELAY(x) ((x) << 0) +#define INTERP_ONE_PRIM_PER_ROW (1 << 4) +#define CRC_SIMD_ID_WADDR_DISABLE (1 << 8) + +#define CGTS_TCC_DISABLE 0x9148 +#define CGTS_USER_TCC_DISABLE 0x914C +#define TCC_DISABLE_MASK 0xFFFF0000 +#define TCC_DISABLE_SHIFT 16 +#define CGTS_SM_CTRL_REG 0x915C +#define OVERRIDE (1 << 21) + +#define TA_CNTL_AUX 0x9508 +#define DISABLE_CUBE_WRAP (1 << 0) +#define DISABLE_CUBE_ANISO (1 << 1) + +#define TCP_CHAN_STEER_LO 0x960c +#define TCP_CHAN_STEER_HI 0x9610 + +#define CC_RB_BACKEND_DISABLE 0x98F4 +#define BACKEND_DISABLE(x) ((x) << 16) +#define GB_ADDR_CONFIG 0x98F8 +#define NUM_PIPES(x) ((x) << 0) +#define NUM_PIPES_MASK 0x00000007 +#define NUM_PIPES_SHIFT 0 +#define PIPE_INTERLEAVE_SIZE(x) ((x) << 4) +#define PIPE_INTERLEAVE_SIZE_MASK 0x00000070 +#define PIPE_INTERLEAVE_SIZE_SHIFT 4 +#define BANK_INTERLEAVE_SIZE(x) ((x) << 8) +#define NUM_SHADER_ENGINES(x) ((x) << 12) +#define NUM_SHADER_ENGINES_MASK 0x00003000 +#define NUM_SHADER_ENGINES_SHIFT 12 +#define SHADER_ENGINE_TILE_SIZE(x) ((x) << 16) +#define SHADER_ENGINE_TILE_SIZE_MASK 0x00070000 +#define SHADER_ENGINE_TILE_SIZE_SHIFT 16 +#define NUM_GPUS(x) ((x) << 20) +#define NUM_GPUS_MASK 0x00700000 +#define NUM_GPUS_SHIFT 20 +#define MULTI_GPU_TILE_SIZE(x) ((x) << 24) +#define MULTI_GPU_TILE_SIZE_MASK 0x03000000 +#define MULTI_GPU_TILE_SIZE_SHIFT 24 +#define ROW_SIZE(x) ((x) << 28) +#define ROW_SIZE_MASK 0x30000007 +#define ROW_SIZE_SHIFT 28 +#define NUM_LOWER_PIPES(x) ((x) << 30) +#define NUM_LOWER_PIPES_MASK 0x40000000 +#define NUM_LOWER_PIPES_SHIFT 30 +#define GB_BACKEND_MAP 0x98FC + +#define CB_PERF_CTR0_SEL_0 0x9A20 +#define CB_PERF_CTR0_SEL_1 0x9A24 +#define CB_PERF_CTR1_SEL_0 0x9A28 +#define CB_PERF_CTR1_SEL_1 0x9A2C +#define CB_PERF_CTR2_SEL_0 0x9A30 +#define CB_PERF_CTR2_SEL_1 0x9A34 +#define CB_PERF_CTR3_SEL_0 0x9A38 +#define CB_PERF_CTR3_SEL_1 0x9A3C + +#define GC_USER_RB_BACKEND_DISABLE 0x9B7C +#define BACKEND_DISABLE_MASK 0x00FF0000 +#define BACKEND_DISABLE_SHIFT 16 + +#define SMX_DC_CTL0 0xA020 +#define USE_HASH_FUNCTION (1 << 0) +#define NUMBER_OF_SETS(x) ((x) << 1) +#define FLUSH_ALL_ON_EVENT (1 << 10) +#define STALL_ON_EVENT (1 << 11) +#define SMX_EVENT_CTL 0xA02C +#define ES_FLUSH_CTL(x) ((x) << 0) +#define GS_FLUSH_CTL(x) ((x) << 3) +#define ACK_FLUSH_CTL(x) ((x) << 6) +#define SYNC_FLUSH_CTL (1 << 8) + #endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 55fefe76396..4b77b79fbbc 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1050,12 +1050,52 @@ struct evergreen_asic { struct r100_gpu_lockup lockup; }; +struct cayman_asic { + unsigned max_shader_engines; + unsigned max_pipes_per_simd; + unsigned max_tile_pipes; + unsigned max_simds_per_se; + unsigned max_backends_per_se; + unsigned max_texture_channel_caches; + unsigned max_gprs; + unsigned max_threads; + unsigned max_gs_threads; + unsigned max_stack_entries; + unsigned sx_num_of_sets; + unsigned sx_max_export_size; + unsigned sx_max_export_pos_size; + unsigned sx_max_export_smx_size; + unsigned max_hw_contexts; + unsigned sq_num_cf_insts; + unsigned sc_prim_fifo_size; + unsigned sc_hiz_tile_fifo_size; + unsigned sc_earlyz_tile_fifo_size; + + unsigned num_shader_engines; + unsigned num_shader_pipes_per_simd; + unsigned num_tile_pipes; + unsigned num_simds_per_se; + unsigned num_backends_per_se; + unsigned backend_disable_mask_per_asic; + unsigned backend_map; + unsigned num_texture_channel_caches; + unsigned mem_max_burst_length_bytes; + unsigned mem_row_size_in_kb; + unsigned shader_engine_tile_size; + unsigned num_gpus; + unsigned multi_gpu_tile_size; + + unsigned tile_config; + struct r100_gpu_lockup lockup; +}; + union radeon_asic_config { struct r300_asic r300; struct r100_asic r100; struct r600_asic r600; struct rv770_asic rv770; struct evergreen_asic evergreen; + struct cayman_asic cayman; }; /* diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 68a7b22c1fe..bf7d4c06145 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -169,7 +169,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) value = rdev->accel_working; break; case RADEON_INFO_TILING_CONFIG: - if (rdev->family >= CHIP_CEDAR) + if (rdev->family >= CHIP_CAYMAN) + value = rdev->config.cayman.tile_config; + else if (rdev->family >= CHIP_CEDAR) value = rdev->config.evergreen.tile_config; else if (rdev->family >= CHIP_RV770) value = rdev->config.rv770.tile_config; @@ -206,7 +208,10 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) value = rdev->clock.spll.reference_freq * 10; break; case RADEON_INFO_NUM_BACKENDS: - if (rdev->family >= CHIP_CEDAR) + if (rdev->family >= CHIP_CAYMAN) + value = rdev->config.cayman.max_backends_per_se * + rdev->config.cayman.max_shader_engines; + else if (rdev->family >= CHIP_CEDAR) value = rdev->config.evergreen.max_backends; else if (rdev->family >= CHIP_RV770) value = rdev->config.rv770.max_backends; -- cgit v1.2.3-70-g09d2