diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/evergreen_cs.c')
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen_cs.c | 426 |
1 files changed, 302 insertions, 124 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 49203b67b81..70089d32b80 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -43,48 +43,46 @@ struct evergreen_cs_track { u32 npipes; u32 row_size; /* value we track */ - u32 nsamples; - u32 cb_color_base_last[12]; + u32 nsamples; /* unused */ struct radeon_bo *cb_color_bo[12]; u32 cb_color_bo_offset[12]; - struct radeon_bo *cb_color_fmask_bo[8]; - struct radeon_bo *cb_color_cmask_bo[8]; + struct radeon_bo *cb_color_fmask_bo[8]; /* unused */ + struct radeon_bo *cb_color_cmask_bo[8]; /* unused */ u32 cb_color_info[12]; u32 cb_color_view[12]; - u32 cb_color_pitch_idx[12]; - u32 cb_color_slice_idx[12]; - u32 cb_color_dim_idx[12]; - u32 cb_color_dim[12]; u32 cb_color_pitch[12]; u32 cb_color_slice[12]; u32 cb_color_attrib[12]; - u32 cb_color_cmask_slice[8]; - u32 cb_color_fmask_slice[8]; + u32 cb_color_cmask_slice[8];/* unused */ + u32 cb_color_fmask_slice[8];/* unused */ u32 cb_target_mask; - u32 cb_shader_mask; + u32 cb_shader_mask; /* unused */ u32 vgt_strmout_config; u32 vgt_strmout_buffer_config; struct radeon_bo *vgt_strmout_bo[4]; - u64 vgt_strmout_bo_mc[4]; u32 vgt_strmout_bo_offset[4]; u32 vgt_strmout_size[4]; u32 db_depth_control; u32 db_depth_view; u32 db_depth_slice; u32 db_depth_size; - u32 db_depth_size_idx; u32 db_z_info; - u32 db_z_idx; u32 db_z_read_offset; u32 db_z_write_offset; struct radeon_bo *db_z_read_bo; struct radeon_bo *db_z_write_bo; u32 db_s_info; - u32 db_s_idx; u32 db_s_read_offset; u32 db_s_write_offset; struct radeon_bo *db_s_read_bo; struct radeon_bo *db_s_write_bo; + bool sx_misc_kill_all_prims; + bool cb_dirty; + bool db_dirty; + bool streamout_dirty; + u32 htile_offset; + u32 htile_surface; + struct radeon_bo *htile_bo; }; static u32 evergreen_cs_get_aray_mode(u32 tiling_flags) @@ -124,44 +122,42 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track) } for (i = 0; i < 12; i++) { - track->cb_color_base_last[i] = 0; track->cb_color_bo[i] = NULL; track->cb_color_bo_offset[i] = 0xFFFFFFFF; track->cb_color_info[i] = 0; track->cb_color_view[i] = 0xFFFFFFFF; - track->cb_color_pitch_idx[i] = 0; - track->cb_color_slice_idx[i] = 0; - track->cb_color_dim[i] = 0; track->cb_color_pitch[i] = 0; track->cb_color_slice[i] = 0; - track->cb_color_dim[i] = 0; } track->cb_target_mask = 0xFFFFFFFF; track->cb_shader_mask = 0xFFFFFFFF; + track->cb_dirty = true; track->db_depth_view = 0xFFFFC000; track->db_depth_size = 0xFFFFFFFF; - track->db_depth_size_idx = 0; track->db_depth_control = 0xFFFFFFFF; track->db_z_info = 0xFFFFFFFF; - track->db_z_idx = 0xFFFFFFFF; track->db_z_read_offset = 0xFFFFFFFF; track->db_z_write_offset = 0xFFFFFFFF; track->db_z_read_bo = NULL; track->db_z_write_bo = NULL; track->db_s_info = 0xFFFFFFFF; - track->db_s_idx = 0xFFFFFFFF; track->db_s_read_offset = 0xFFFFFFFF; track->db_s_write_offset = 0xFFFFFFFF; track->db_s_read_bo = NULL; track->db_s_write_bo = NULL; + track->db_dirty = true; + track->htile_bo = NULL; + track->htile_offset = 0xFFFFFFFF; + track->htile_surface = 0; for (i = 0; i < 4; i++) { track->vgt_strmout_size[i] = 0; track->vgt_strmout_bo[i] = NULL; track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF; - track->vgt_strmout_bo_mc[i] = 0xFFFFFFFF; } + track->streamout_dirty = true; + track->sx_misc_kill_all_prims = false; } struct eg_surface { @@ -306,8 +302,8 @@ static int evergreen_surface_check(struct radeon_cs_parser *p, case ARRAY_2D_TILED_THIN1: return evergreen_surface_check_2d(p, surf, prefix); default: - dev_warn(p->dev, "%s:%d invalid array mode %d\n", - __func__, __LINE__, surf->mode); + dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", + __func__, __LINE__, prefix, surf->mode); return -EINVAL; } return -EINVAL; @@ -325,8 +321,8 @@ static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p, case ARRAY_1D_TILED_THIN1: return 0; default: - dev_warn(p->dev, "%s:%d invalid array mode %d\n", - __func__, __LINE__, surf->mode); + dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", + __func__, __LINE__, prefix, surf->mode); return -EINVAL; } @@ -454,6 +450,62 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i return 0; } +static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p, + unsigned nbx, unsigned nby) +{ + struct evergreen_cs_track *track = p->track; + unsigned long size; + + if (track->htile_bo == NULL) { + dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n", + __func__, __LINE__, track->db_z_info); + return -EINVAL; + } + + if (G_028ABC_LINEAR(track->htile_surface)) { + /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */ + nbx = round_up(nbx, 16 * 8); + /* height is npipes htiles aligned == npipes * 8 pixel aligned */ + nby = round_up(nby, track->npipes * 8); + } else { + switch (track->npipes) { + case 8: + nbx = round_up(nbx, 64 * 8); + nby = round_up(nby, 64 * 8); + break; + case 4: + nbx = round_up(nbx, 64 * 8); + nby = round_up(nby, 32 * 8); + break; + case 2: + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 32 * 8); + break; + case 1: + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 16 * 8); + break; + default: + dev_warn(p->dev, "%s:%d invalid num pipes %d\n", + __func__, __LINE__, track->npipes); + return -EINVAL; + } + } + /* compute number of htile */ + nbx = nbx / 8; + nby = nby / 8; + size = nbx * nby * 4; + size += track->htile_offset; + + if (size > radeon_bo_size(track->htile_bo)) { + dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n", + __func__, __LINE__, radeon_bo_size(track->htile_bo), + size, nbx, nby); + return -EINVAL; + } + return 0; +} + static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) { struct evergreen_cs_track *track = p->track; @@ -540,6 +592,14 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return -EINVAL; } + /* hyperz */ + if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) { + r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby); + if (r) { + return r; + } + } + return 0; } @@ -627,6 +687,14 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) return -EINVAL; } + /* hyperz */ + if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) { + r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby); + if (r) { + return r; + } + } + return 0; } @@ -795,62 +863,77 @@ static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p, static int evergreen_cs_track_check(struct radeon_cs_parser *p) { struct evergreen_cs_track *track = p->track; - unsigned tmp, i, j; + unsigned tmp, i; int r; + unsigned buffer_mask = 0; /* check streamout */ - for (i = 0; i < 4; i++) { - if (track->vgt_strmout_config & (1 << i)) { - for (j = 0; j < 4; j++) { - if ((track->vgt_strmout_buffer_config >> (i * 4)) & (1 << j)) { - if (track->vgt_strmout_bo[j]) { - u64 offset = (u64)track->vgt_strmout_bo_offset[j] + - (u64)track->vgt_strmout_size[j]; - if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { - DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n", - j, offset, - radeon_bo_size(track->vgt_strmout_bo[j])); - return -EINVAL; - } - } else { - dev_warn(p->dev, "No buffer for streamout %d\n", j); + if (track->streamout_dirty && track->vgt_strmout_config) { + for (i = 0; i < 4; i++) { + if (track->vgt_strmout_config & (1 << i)) { + buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf; + } + } + + for (i = 0; i < 4; i++) { + if (buffer_mask & (1 << i)) { + if (track->vgt_strmout_bo[i]) { + u64 offset = (u64)track->vgt_strmout_bo_offset[i] + + (u64)track->vgt_strmout_size[i]; + if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { + DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n", + i, offset, + radeon_bo_size(track->vgt_strmout_bo[i])); return -EINVAL; } + } else { + dev_warn(p->dev, "No buffer for streamout %d\n", i); + return -EINVAL; } } } + track->streamout_dirty = false; } + if (track->sx_misc_kill_all_prims) + return 0; + /* check that we have a cb for each enabled target */ - tmp = track->cb_target_mask; - for (i = 0; i < 8; i++) { - if ((tmp >> (i * 4)) & 0xF) { - /* at least one component is enabled */ - if (track->cb_color_bo[i] == NULL) { - dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", - __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); - return -EINVAL; - } - /* check cb */ - r = evergreen_cs_track_validate_cb(p, i); - if (r) { - return r; + if (track->cb_dirty) { + tmp = track->cb_target_mask; + for (i = 0; i < 8; i++) { + if ((tmp >> (i * 4)) & 0xF) { + /* at least one component is enabled */ + if (track->cb_color_bo[i] == NULL) { + dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", + __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); + return -EINVAL; + } + /* check cb */ + r = evergreen_cs_track_validate_cb(p, i); + if (r) { + return r; + } } } + track->cb_dirty = false; } - /* Check stencil buffer */ - if (G_028800_STENCIL_ENABLE(track->db_depth_control)) { - r = evergreen_cs_track_validate_stencil(p); - if (r) - return r; - } - /* Check depth buffer */ - if (G_028800_Z_WRITE_ENABLE(track->db_depth_control)) { - r = evergreen_cs_track_validate_depth(p); - if (r) - return r; + if (track->db_dirty) { + /* Check stencil buffer */ + if (G_028800_STENCIL_ENABLE(track->db_depth_control)) { + r = evergreen_cs_track_validate_stencil(p); + if (r) + return r; + } + /* Check depth buffer */ + if (G_028800_Z_ENABLE(track->db_depth_control)) { + r = evergreen_cs_track_validate_depth(p); + if (r) + return r; + } + track->db_dirty = false; } return 0; @@ -1184,6 +1267,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case DB_DEPTH_CONTROL: track->db_depth_control = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case CAYMAN_DB_EQAA: if (p->rdev->family < CHIP_CAYMAN) { @@ -1225,19 +1309,23 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) DB_MACRO_TILE_ASPECT(mtaspect); } } + track->db_dirty = true; break; case DB_STENCIL_INFO: track->db_s_info = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case DB_DEPTH_VIEW: track->db_depth_view = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case DB_DEPTH_SIZE: track->db_depth_size = radeon_get_ib_value(p, idx); - track->db_depth_size_idx = idx; + track->db_dirty = true; break; case R_02805C_DB_DEPTH_SLICE: track->db_depth_slice = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case DB_Z_READ_BASE: r = evergreen_cs_packet_next_reloc(p, &reloc); @@ -1249,6 +1337,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->db_z_read_offset = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->db_z_read_bo = reloc->robj; + track->db_dirty = true; break; case DB_Z_WRITE_BASE: r = evergreen_cs_packet_next_reloc(p, &reloc); @@ -1260,6 +1349,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->db_z_write_offset = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->db_z_write_bo = reloc->robj; + track->db_dirty = true; break; case DB_STENCIL_READ_BASE: r = evergreen_cs_packet_next_reloc(p, &reloc); @@ -1271,6 +1361,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->db_s_read_offset = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->db_s_read_bo = reloc->robj; + track->db_dirty = true; break; case DB_STENCIL_WRITE_BASE: r = evergreen_cs_packet_next_reloc(p, &reloc); @@ -1282,12 +1373,15 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->db_s_write_offset = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->db_s_write_bo = reloc->robj; + track->db_dirty = true; break; case VGT_STRMOUT_CONFIG: track->vgt_strmout_config = radeon_get_ib_value(p, idx); + track->streamout_dirty = true; break; case VGT_STRMOUT_BUFFER_CONFIG: track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx); + track->streamout_dirty = true; break; case VGT_STRMOUT_BUFFER_BASE_0: case VGT_STRMOUT_BUFFER_BASE_1: @@ -1303,7 +1397,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->vgt_strmout_bo[tmp] = reloc->robj; - track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset; + track->streamout_dirty = true; break; case VGT_STRMOUT_BUFFER_SIZE_0: case VGT_STRMOUT_BUFFER_SIZE_1: @@ -1312,6 +1406,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16; /* size in register is DWs, convert to bytes */ track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4; + track->streamout_dirty = true; break; case CP_COHER_BASE: r = evergreen_cs_packet_next_reloc(p, &reloc); @@ -1323,9 +1418,11 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); case CB_TARGET_MASK: track->cb_target_mask = radeon_get_ib_value(p, idx); + track->cb_dirty = true; break; case CB_SHADER_MASK: track->cb_shader_mask = radeon_get_ib_value(p, idx); + track->cb_dirty = true; break; case PA_SC_AA_CONFIG: if (p->rdev->family >= CHIP_CAYMAN) { @@ -1355,6 +1452,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_VIEW: tmp = (reg - CB_COLOR0_VIEW) / 0x3c; track->cb_color_view[tmp] = radeon_get_ib_value(p, idx); + track->cb_dirty = true; break; case CB_COLOR8_VIEW: case CB_COLOR9_VIEW: @@ -1362,6 +1460,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_VIEW: tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8; track->cb_color_view[tmp] = radeon_get_ib_value(p, idx); + track->cb_dirty = true; break; case CB_COLOR0_INFO: case CB_COLOR1_INFO: @@ -1383,6 +1482,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); } + track->cb_dirty = true; break; case CB_COLOR8_INFO: case CB_COLOR9_INFO: @@ -1400,6 +1500,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); } + track->cb_dirty = true; break; case CB_COLOR0_PITCH: case CB_COLOR1_PITCH: @@ -1411,7 +1512,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_PITCH: tmp = (reg - CB_COLOR0_PITCH) / 0x3c; track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_pitch_idx[tmp] = idx; + track->cb_dirty = true; break; case CB_COLOR8_PITCH: case CB_COLOR9_PITCH: @@ -1419,7 +1520,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_PITCH: tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8; track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_pitch_idx[tmp] = idx; + track->cb_dirty = true; break; case CB_COLOR0_SLICE: case CB_COLOR1_SLICE: @@ -1431,7 +1532,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_SLICE: tmp = (reg - CB_COLOR0_SLICE) / 0x3c; track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_slice_idx[tmp] = idx; + track->cb_dirty = true; break; case CB_COLOR8_SLICE: case CB_COLOR9_SLICE: @@ -1439,7 +1540,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_SLICE: tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8; track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_slice_idx[tmp] = idx; + track->cb_dirty = true; break; case CB_COLOR0_ATTRIB: case CB_COLOR1_ATTRIB: @@ -1471,6 +1572,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c); track->cb_color_attrib[tmp] = ib[idx]; + track->cb_dirty = true; break; case CB_COLOR8_ATTRIB: case CB_COLOR9_ATTRIB: @@ -1498,26 +1600,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8; track->cb_color_attrib[tmp] = ib[idx]; - break; - case CB_COLOR0_DIM: - case CB_COLOR1_DIM: - case CB_COLOR2_DIM: - case CB_COLOR3_DIM: - case CB_COLOR4_DIM: - case CB_COLOR5_DIM: - case CB_COLOR6_DIM: - case CB_COLOR7_DIM: - tmp = (reg - CB_COLOR0_DIM) / 0x3c; - track->cb_color_dim[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_dim_idx[tmp] = idx; - break; - case CB_COLOR8_DIM: - case CB_COLOR9_DIM: - case CB_COLOR10_DIM: - case CB_COLOR11_DIM: - tmp = ((reg - CB_COLOR8_DIM) / 0x1c) + 8; - track->cb_color_dim[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_dim_idx[tmp] = idx; + track->cb_dirty = true; break; case CB_COLOR0_FMASK: case CB_COLOR1_FMASK: @@ -1592,8 +1675,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) tmp = (reg - CB_COLOR0_BASE) / 0x3c; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); - track->cb_color_base_last[tmp] = ib[idx]; track->cb_color_bo[tmp] = reloc->robj; + track->cb_dirty = true; break; case CB_COLOR8_BASE: case CB_COLOR9_BASE: @@ -1608,8 +1691,25 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); - track->cb_color_base_last[tmp] = ib[idx]; track->cb_color_bo[tmp] = reloc->robj; + track->cb_dirty = true; + break; + case DB_HTILE_DATA_BASE: + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + dev_warn(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + track->htile_offset = radeon_get_ib_value(p, idx); + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + track->htile_bo = reloc->robj; + track->db_dirty = true; + break; + case DB_HTILE_SURFACE: + /* 8x8 only */ + track->htile_surface = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case CB_IMMED0_BASE: case CB_IMMED1_BASE: @@ -1623,7 +1723,6 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_IMMED9_BASE: case CB_IMMED10_BASE: case CB_IMMED11_BASE: - case DB_HTILE_DATA_BASE: case SQ_PGM_START_FS: case SQ_PGM_START_ES: case SQ_PGM_START_VS: @@ -1748,6 +1847,9 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); break; + case SX_MISC: + track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; + break; default: dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; @@ -1803,6 +1905,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, { int pred_op; int tmp; + uint64_t offset; + if (pkt->count != 1) { DRM_ERROR("bad SET PREDICATION\n"); return -EINVAL; @@ -1826,8 +1930,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - ib[idx + 0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx + 1] = tmp + (upper_32_bits(reloc->lobj.gpu_offset) & 0xff); + offset = reloc->lobj.gpu_offset + + (idx_value & 0xfffffff0) + + ((u64)(tmp & 0xff) << 32); + + ib[idx + 0] = offset; + ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff); } break; case PACKET3_CONTEXT_CONTROL: @@ -1855,6 +1963,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } break; case PACKET3_INDEX_BASE: + { + uint64_t offset; + if (pkt->count != 1) { DRM_ERROR("bad INDEX_BASE\n"); return -EINVAL; @@ -1864,15 +1975,24 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad INDEX_BASE\n"); return -EINVAL; } - ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+1] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + idx_value + + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); + + ib[idx+0] = offset; + ib[idx+1] = upper_32_bits(offset) & 0xff; + r = evergreen_cs_track_check(p); if (r) { dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; + } case PACKET3_DRAW_INDEX: + { + uint64_t offset; if (pkt->count != 3) { DRM_ERROR("bad DRAW_INDEX\n"); return -EINVAL; @@ -1882,15 +2002,25 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad DRAW_INDEX\n"); return -EINVAL; } - ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+1] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + idx_value + + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); + + ib[idx+0] = offset; + ib[idx+1] = upper_32_bits(offset) & 0xff; + r = evergreen_cs_track_check(p); if (r) { dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; + } case PACKET3_DRAW_INDEX_2: + { + uint64_t offset; + if (pkt->count != 4) { DRM_ERROR("bad DRAW_INDEX_2\n"); return -EINVAL; @@ -1900,14 +2030,21 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad DRAW_INDEX_2\n"); return -EINVAL; } - ib[idx+1] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + radeon_get_ib_value(p, idx+1) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = offset; + ib[idx+2] = upper_32_bits(offset) & 0xff; + r = evergreen_cs_track_check(p); if (r) { dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; + } case PACKET3_DRAW_INDEX_AUTO: if (pkt->count != 1) { DRM_ERROR("bad DRAW_INDEX_AUTO\n"); @@ -1998,13 +2135,20 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } /* bit 4 is reg (0) or mem (1) */ if (idx_value & 0x10) { + uint64_t offset; + r = evergreen_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("bad WAIT_REG_MEM\n"); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc); + ib[idx+2] = upper_32_bits(offset) & 0xff; } break; case PACKET3_SURFACE_SYNC: @@ -2029,16 +2173,25 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } if (pkt->count) { + uint64_t offset; + r = evergreen_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("bad EVENT_WRITE\n"); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + offset = reloc->lobj.gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = offset & 0xfffffff8; + ib[idx+2] = upper_32_bits(offset) & 0xff; } break; case PACKET3_EVENT_WRITE_EOP: + { + uint64_t offset; + if (pkt->count != 4) { DRM_ERROR("bad EVENT_WRITE_EOP\n"); return -EINVAL; @@ -2048,10 +2201,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad EVENT_WRITE_EOP\n"); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = offset & 0xfffffffc; + ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff); break; + } case PACKET3_EVENT_WRITE_EOS: + { + uint64_t offset; + if (pkt->count != 3) { DRM_ERROR("bad EVENT_WRITE_EOS\n"); return -EINVAL; @@ -2061,9 +2223,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad EVENT_WRITE_EOS\n"); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = offset & 0xfffffffc; + ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff); break; + } case PACKET3_SET_CONFIG_REG: start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START; end_reg = 4 * pkt->count + start_reg - 4; @@ -2156,6 +2324,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, ib[idx+1+(i*8)+3] += moffset; break; case SQ_TEX_VTX_VALID_BUFFER: + { + uint64_t offset64; /* vtx base */ r = evergreen_cs_packet_next_reloc(p, &reloc); if (r) { @@ -2167,11 +2337,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) { /* force size to size of the buffer */ dev_warn(p->dev, "vbo resource seems too big for the bo\n"); - ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj); + ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset; } - ib[idx+1+(i*8)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff); - ib[idx+1+(i*8)+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset64 = reloc->lobj.gpu_offset + offset; + ib[idx+1+(i*8)+0] = offset64; + ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | + (upper_32_bits(offset64) & 0xff); break; + } case SQ_TEX_VTX_INVALID_TEXTURE: case SQ_TEX_VTX_INVALID_BUFFER: default: @@ -2247,8 +2421,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + offset += reloc->lobj.gpu_offset; + ib[idx+1] = offset; + ib[idx+2] = upper_32_bits(offset) & 0xff; } /* Reading data from SRC_ADDRESS. */ if (((idx_value >> 1) & 0x3) == 2) { @@ -2265,8 +2440,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + offset += reloc->lobj.gpu_offset; + ib[idx+3] = offset; + ib[idx+4] = upper_32_bits(offset) & 0xff; } break; case PACKET3_COPY_DW: @@ -2289,8 +2465,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + offset += reloc->lobj.gpu_offset; + ib[idx+1] = offset; + ib[idx+2] = upper_32_bits(offset) & 0xff; } else { /* SRC is a reg. */ reg = radeon_get_ib_value(p, idx+1) << 2; @@ -2312,8 +2489,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + offset += reloc->lobj.gpu_offset; + ib[idx+3] = offset; + ib[idx+4] = upper_32_bits(offset) & 0xff; } else { /* DST is a reg. */ reg = radeon_get_ib_value(p, idx+3) << 2; |