diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/evergreen_cs.c')
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen_cs.c | 1274 |
1 files changed, 1161 insertions, 113 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 8e8cd85e5c0..70089d32b80 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -31,6 +31,9 @@ #include "evergreen_reg_safe.h" #include "cayman_reg_safe.h" +#define MAX(a,b) (((a)>(b))?(a):(b)) +#define MIN(a,b) (((a)<(b))?(a):(b)) + static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p, struct radeon_cs_reloc **cs_reloc); @@ -40,42 +43,46 @@ struct evergreen_cs_track { u32 npipes; u32 row_size; /* value we track */ - u32 nsamples; - u32 cb_color_base_last[12]; + u32 nsamples; /* unused */ struct radeon_bo *cb_color_bo[12]; u32 cb_color_bo_offset[12]; - struct radeon_bo *cb_color_fmask_bo[8]; - struct radeon_bo *cb_color_cmask_bo[8]; + struct radeon_bo *cb_color_fmask_bo[8]; /* unused */ + struct radeon_bo *cb_color_cmask_bo[8]; /* unused */ u32 cb_color_info[12]; u32 cb_color_view[12]; - u32 cb_color_pitch_idx[12]; - u32 cb_color_slice_idx[12]; - u32 cb_color_dim_idx[12]; - u32 cb_color_dim[12]; u32 cb_color_pitch[12]; u32 cb_color_slice[12]; - u32 cb_color_cmask_slice[8]; - u32 cb_color_fmask_slice[8]; + u32 cb_color_attrib[12]; + u32 cb_color_cmask_slice[8];/* unused */ + u32 cb_color_fmask_slice[8];/* unused */ u32 cb_target_mask; - u32 cb_shader_mask; + u32 cb_shader_mask; /* unused */ u32 vgt_strmout_config; u32 vgt_strmout_buffer_config; + struct radeon_bo *vgt_strmout_bo[4]; + u32 vgt_strmout_bo_offset[4]; + u32 vgt_strmout_size[4]; u32 db_depth_control; u32 db_depth_view; + u32 db_depth_slice; u32 db_depth_size; - u32 db_depth_size_idx; u32 db_z_info; - u32 db_z_idx; u32 db_z_read_offset; u32 db_z_write_offset; struct radeon_bo *db_z_read_bo; struct radeon_bo *db_z_write_bo; u32 db_s_info; - u32 db_s_idx; u32 db_s_read_offset; u32 db_s_write_offset; struct radeon_bo *db_s_read_bo; struct radeon_bo *db_s_write_bo; + bool sx_misc_kill_all_prims; + bool cb_dirty; + bool db_dirty; + bool streamout_dirty; + u32 htile_offset; + u32 htile_surface; + struct radeon_bo *htile_bo; }; static u32 evergreen_cs_get_aray_mode(u32 tiling_flags) @@ -103,19 +110,6 @@ static u32 evergreen_cs_get_num_banks(u32 nbanks) } } -static u32 evergreen_cs_get_tile_split(u32 row_size) -{ - switch (row_size) { - case 1: - default: - return ADDR_SURF_TILE_SPLIT_1KB; - case 2: - return ADDR_SURF_TILE_SPLIT_2KB; - case 4: - return ADDR_SURF_TILE_SPLIT_4KB; - } -} - static void evergreen_cs_track_init(struct evergreen_cs_track *track) { int i; @@ -128,50 +122,820 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track) } for (i = 0; i < 12; i++) { - track->cb_color_base_last[i] = 0; track->cb_color_bo[i] = NULL; track->cb_color_bo_offset[i] = 0xFFFFFFFF; track->cb_color_info[i] = 0; - track->cb_color_view[i] = 0; - track->cb_color_pitch_idx[i] = 0; - track->cb_color_slice_idx[i] = 0; - track->cb_color_dim[i] = 0; + track->cb_color_view[i] = 0xFFFFFFFF; track->cb_color_pitch[i] = 0; track->cb_color_slice[i] = 0; - track->cb_color_dim[i] = 0; } track->cb_target_mask = 0xFFFFFFFF; track->cb_shader_mask = 0xFFFFFFFF; + track->cb_dirty = true; track->db_depth_view = 0xFFFFC000; track->db_depth_size = 0xFFFFFFFF; - track->db_depth_size_idx = 0; track->db_depth_control = 0xFFFFFFFF; track->db_z_info = 0xFFFFFFFF; - track->db_z_idx = 0xFFFFFFFF; track->db_z_read_offset = 0xFFFFFFFF; track->db_z_write_offset = 0xFFFFFFFF; track->db_z_read_bo = NULL; track->db_z_write_bo = NULL; track->db_s_info = 0xFFFFFFFF; - track->db_s_idx = 0xFFFFFFFF; track->db_s_read_offset = 0xFFFFFFFF; track->db_s_write_offset = 0xFFFFFFFF; track->db_s_read_bo = NULL; track->db_s_write_bo = NULL; + track->db_dirty = true; + track->htile_bo = NULL; + track->htile_offset = 0xFFFFFFFF; + track->htile_surface = 0; + + for (i = 0; i < 4; i++) { + track->vgt_strmout_size[i] = 0; + track->vgt_strmout_bo[i] = NULL; + track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF; + } + track->streamout_dirty = true; + track->sx_misc_kill_all_prims = false; } -static int evergreen_cs_track_check(struct radeon_cs_parser *p) +struct eg_surface { + /* value gathered from cs */ + unsigned nbx; + unsigned nby; + unsigned format; + unsigned mode; + unsigned nbanks; + unsigned bankw; + unsigned bankh; + unsigned tsplit; + unsigned mtilea; + unsigned nsamples; + /* output value */ + unsigned bpe; + unsigned layer_size; + unsigned palign; + unsigned halign; + unsigned long base_align; +}; + +static int evergreen_surface_check_linear(struct radeon_cs_parser *p, + struct eg_surface *surf, + const char *prefix) +{ + surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples; + surf->base_align = surf->bpe; + surf->palign = 1; + surf->halign = 1; + return 0; +} + +static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p, + struct eg_surface *surf, + const char *prefix) +{ + struct evergreen_cs_track *track = p->track; + unsigned palign; + + palign = MAX(64, track->group_size / surf->bpe); + surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples; + surf->base_align = track->group_size; + surf->palign = palign; + surf->halign = 1; + if (surf->nbx & (palign - 1)) { + if (prefix) { + dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n", + __func__, __LINE__, prefix, surf->nbx, palign); + } + return -EINVAL; + } + return 0; +} + +static int evergreen_surface_check_1d(struct radeon_cs_parser *p, + struct eg_surface *surf, + const char *prefix) +{ + struct evergreen_cs_track *track = p->track; + unsigned palign; + + palign = track->group_size / (8 * surf->bpe * surf->nsamples); + palign = MAX(8, palign); + surf->layer_size = surf->nbx * surf->nby * surf->bpe; + surf->base_align = track->group_size; + surf->palign = palign; + surf->halign = 8; + if ((surf->nbx & (palign - 1))) { + if (prefix) { + dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n", + __func__, __LINE__, prefix, surf->nbx, palign, + track->group_size, surf->bpe, surf->nsamples); + } + return -EINVAL; + } + if ((surf->nby & (8 - 1))) { + if (prefix) { + dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n", + __func__, __LINE__, prefix, surf->nby); + } + return -EINVAL; + } + return 0; +} + +static int evergreen_surface_check_2d(struct radeon_cs_parser *p, + struct eg_surface *surf, + const char *prefix) +{ + struct evergreen_cs_track *track = p->track; + unsigned palign, halign, tileb, slice_pt; + + tileb = 64 * surf->bpe * surf->nsamples; + palign = track->group_size / (8 * surf->bpe * surf->nsamples); + palign = MAX(8, palign); + slice_pt = 1; + if (tileb > surf->tsplit) { + slice_pt = tileb / surf->tsplit; + } + tileb = tileb / slice_pt; + /* macro tile width & height */ + palign = (8 * surf->bankw * track->npipes) * surf->mtilea; + halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea; + surf->layer_size = surf->nbx * surf->nby * surf->bpe * slice_pt; + surf->base_align = (palign / 8) * (halign / 8) * tileb; + surf->palign = palign; + surf->halign = halign; + + if ((surf->nbx & (palign - 1))) { + if (prefix) { + dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n", + __func__, __LINE__, prefix, surf->nbx, palign); + } + return -EINVAL; + } + if ((surf->nby & (halign - 1))) { + if (prefix) { + dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n", + __func__, __LINE__, prefix, surf->nby, halign); + } + return -EINVAL; + } + + return 0; +} + +static int evergreen_surface_check(struct radeon_cs_parser *p, + struct eg_surface *surf, + const char *prefix) +{ + /* some common value computed here */ + surf->bpe = r600_fmt_get_blocksize(surf->format); + + switch (surf->mode) { + case ARRAY_LINEAR_GENERAL: + return evergreen_surface_check_linear(p, surf, prefix); + case ARRAY_LINEAR_ALIGNED: + return evergreen_surface_check_linear_aligned(p, surf, prefix); + case ARRAY_1D_TILED_THIN1: + return evergreen_surface_check_1d(p, surf, prefix); + case ARRAY_2D_TILED_THIN1: + return evergreen_surface_check_2d(p, surf, prefix); + default: + dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", + __func__, __LINE__, prefix, surf->mode); + return -EINVAL; + } + return -EINVAL; +} + +static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p, + struct eg_surface *surf, + const char *prefix) +{ + switch (surf->mode) { + case ARRAY_2D_TILED_THIN1: + break; + case ARRAY_LINEAR_GENERAL: + case ARRAY_LINEAR_ALIGNED: + case ARRAY_1D_TILED_THIN1: + return 0; + default: + dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", + __func__, __LINE__, prefix, surf->mode); + return -EINVAL; + } + + switch (surf->nbanks) { + case 0: surf->nbanks = 2; break; + case 1: surf->nbanks = 4; break; + case 2: surf->nbanks = 8; break; + case 3: surf->nbanks = 16; break; + default: + dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n", + __func__, __LINE__, prefix, surf->nbanks); + return -EINVAL; + } + switch (surf->bankw) { + case 0: surf->bankw = 1; break; + case 1: surf->bankw = 2; break; + case 2: surf->bankw = 4; break; + case 3: surf->bankw = 8; break; + default: + dev_warn(p->dev, "%s:%d %s invalid bankw %d\n", + __func__, __LINE__, prefix, surf->bankw); + return -EINVAL; + } + switch (surf->bankh) { + case 0: surf->bankh = 1; break; + case 1: surf->bankh = 2; break; + case 2: surf->bankh = 4; break; + case 3: surf->bankh = 8; break; + default: + dev_warn(p->dev, "%s:%d %s invalid bankh %d\n", + __func__, __LINE__, prefix, surf->bankh); + return -EINVAL; + } + switch (surf->mtilea) { + case 0: surf->mtilea = 1; break; + case 1: surf->mtilea = 2; break; + case 2: surf->mtilea = 4; break; + case 3: surf->mtilea = 8; break; + default: + dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n", + __func__, __LINE__, prefix, surf->mtilea); + return -EINVAL; + } + switch (surf->tsplit) { + case 0: surf->tsplit = 64; break; + case 1: surf->tsplit = 128; break; + case 2: surf->tsplit = 256; break; + case 3: surf->tsplit = 512; break; + case 4: surf->tsplit = 1024; break; + case 5: surf->tsplit = 2048; break; + case 6: surf->tsplit = 4096; break; + default: + dev_warn(p->dev, "%s:%d %s invalid tile split %d\n", + __func__, __LINE__, prefix, surf->tsplit); + return -EINVAL; + } + return 0; +} + +static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id) { struct evergreen_cs_track *track = p->track; + struct eg_surface surf; + unsigned pitch, slice, mslice; + unsigned long offset; + int r; - /* we don't support stream out buffer yet */ - if (track->vgt_strmout_config || track->vgt_strmout_buffer_config) { - dev_warn(p->dev, "this kernel doesn't support SMX output buffer\n"); + mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1; + pitch = track->cb_color_pitch[id]; + slice = track->cb_color_slice[id]; + surf.nbx = (pitch + 1) * 8; + surf.nby = ((slice + 1) * 64) / surf.nbx; + surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]); + surf.format = G_028C70_FORMAT(track->cb_color_info[id]); + surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]); + surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]); + surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]); + surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]); + surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]); + surf.nsamples = 1; + + if (!r600_fmt_is_valid_color(surf.format)) { + dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n", + __func__, __LINE__, surf.format, + id, track->cb_color_info[id]); + return -EINVAL; + } + + r = evergreen_surface_value_conv_check(p, &surf, "cb"); + if (r) { + return r; + } + + r = evergreen_surface_check(p, &surf, "cb"); + if (r) { + dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", + __func__, __LINE__, id, track->cb_color_pitch[id], + track->cb_color_slice[id], track->cb_color_attrib[id], + track->cb_color_info[id]); + return r; + } + + offset = track->cb_color_bo_offset[id] << 8; + if (offset & (surf.base_align - 1)) { + dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n", + __func__, __LINE__, id, offset, surf.base_align); + return -EINVAL; + } + + offset += surf.layer_size * mslice; + if (offset > radeon_bo_size(track->cb_color_bo[id])) { + dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, " + "offset %d, max layer %d, bo size %ld, slice %d)\n", + __func__, __LINE__, id, surf.layer_size, + track->cb_color_bo_offset[id] << 8, mslice, + radeon_bo_size(track->cb_color_bo[id]), slice); + dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", + __func__, __LINE__, surf.nbx, surf.nby, + surf.mode, surf.bpe, surf.nsamples, + surf.bankw, surf.bankh, + surf.tsplit, surf.mtilea); return -EINVAL; } - /* XXX fill in */ + return 0; +} + +static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p, + unsigned nbx, unsigned nby) +{ + struct evergreen_cs_track *track = p->track; + unsigned long size; + + if (track->htile_bo == NULL) { + dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n", + __func__, __LINE__, track->db_z_info); + return -EINVAL; + } + + if (G_028ABC_LINEAR(track->htile_surface)) { + /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */ + nbx = round_up(nbx, 16 * 8); + /* height is npipes htiles aligned == npipes * 8 pixel aligned */ + nby = round_up(nby, track->npipes * 8); + } else { + switch (track->npipes) { + case 8: + nbx = round_up(nbx, 64 * 8); + nby = round_up(nby, 64 * 8); + break; + case 4: + nbx = round_up(nbx, 64 * 8); + nby = round_up(nby, 32 * 8); + break; + case 2: + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 32 * 8); + break; + case 1: + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 16 * 8); + break; + default: + dev_warn(p->dev, "%s:%d invalid num pipes %d\n", + __func__, __LINE__, track->npipes); + return -EINVAL; + } + } + /* compute number of htile */ + nbx = nbx / 8; + nby = nby / 8; + size = nbx * nby * 4; + size += track->htile_offset; + + if (size > radeon_bo_size(track->htile_bo)) { + dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n", + __func__, __LINE__, radeon_bo_size(track->htile_bo), + size, nbx, nby); + return -EINVAL; + } + return 0; +} + +static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) +{ + struct evergreen_cs_track *track = p->track; + struct eg_surface surf; + unsigned pitch, slice, mslice; + unsigned long offset; + int r; + + mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; + pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size); + slice = track->db_depth_slice; + surf.nbx = (pitch + 1) * 8; + surf.nby = ((slice + 1) * 64) / surf.nbx; + surf.mode = G_028040_ARRAY_MODE(track->db_z_info); + surf.format = G_028044_FORMAT(track->db_s_info); + surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info); + surf.nbanks = G_028040_NUM_BANKS(track->db_z_info); + surf.bankw = G_028040_BANK_WIDTH(track->db_z_info); + surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info); + surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info); + surf.nsamples = 1; + + if (surf.format != 1) { + dev_warn(p->dev, "%s:%d stencil invalid format %d\n", + __func__, __LINE__, surf.format); + return -EINVAL; + } + /* replace by color format so we can use same code */ + surf.format = V_028C70_COLOR_8; + + r = evergreen_surface_value_conv_check(p, &surf, "stencil"); + if (r) { + return r; + } + + r = evergreen_surface_check(p, &surf, NULL); + if (r) { + /* old userspace doesn't compute proper depth/stencil alignment + * check that alignment against a bigger byte per elements and + * only report if that alignment is wrong too. + */ + surf.format = V_028C70_COLOR_8_8_8_8; + r = evergreen_surface_check(p, &surf, "stencil"); + if (r) { + dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", + __func__, __LINE__, track->db_depth_size, + track->db_depth_slice, track->db_s_info, track->db_z_info); + } + return r; + } + + offset = track->db_s_read_offset << 8; + if (offset & (surf.base_align - 1)) { + dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + __func__, __LINE__, offset, surf.base_align); + return -EINVAL; + } + offset += surf.layer_size * mslice; + if (offset > radeon_bo_size(track->db_s_read_bo)) { + dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, " + "offset %ld, max layer %d, bo size %ld)\n", + __func__, __LINE__, surf.layer_size, + (unsigned long)track->db_s_read_offset << 8, mslice, + radeon_bo_size(track->db_s_read_bo)); + dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", + __func__, __LINE__, track->db_depth_size, + track->db_depth_slice, track->db_s_info, track->db_z_info); + return -EINVAL; + } + + offset = track->db_s_write_offset << 8; + if (offset & (surf.base_align - 1)) { + dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + __func__, __LINE__, offset, surf.base_align); + return -EINVAL; + } + offset += surf.layer_size * mslice; + if (offset > radeon_bo_size(track->db_s_write_bo)) { + dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, " + "offset %ld, max layer %d, bo size %ld)\n", + __func__, __LINE__, surf.layer_size, + (unsigned long)track->db_s_write_offset << 8, mslice, + radeon_bo_size(track->db_s_write_bo)); + return -EINVAL; + } + + /* hyperz */ + if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) { + r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby); + if (r) { + return r; + } + } + + return 0; +} + +static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) +{ + struct evergreen_cs_track *track = p->track; + struct eg_surface surf; + unsigned pitch, slice, mslice; + unsigned long offset; + int r; + + mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; + pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size); + slice = track->db_depth_slice; + surf.nbx = (pitch + 1) * 8; + surf.nby = ((slice + 1) * 64) / surf.nbx; + surf.mode = G_028040_ARRAY_MODE(track->db_z_info); + surf.format = G_028040_FORMAT(track->db_z_info); + surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info); + surf.nbanks = G_028040_NUM_BANKS(track->db_z_info); + surf.bankw = G_028040_BANK_WIDTH(track->db_z_info); + surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info); + surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info); + surf.nsamples = 1; + + switch (surf.format) { + case V_028040_Z_16: + surf.format = V_028C70_COLOR_16; + break; + case V_028040_Z_24: + case V_028040_Z_32_FLOAT: + surf.format = V_028C70_COLOR_8_8_8_8; + break; + default: + dev_warn(p->dev, "%s:%d depth invalid format %d\n", + __func__, __LINE__, surf.format); + return -EINVAL; + } + + r = evergreen_surface_value_conv_check(p, &surf, "depth"); + if (r) { + dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n", + __func__, __LINE__, track->db_depth_size, + track->db_depth_slice, track->db_z_info); + return r; + } + + r = evergreen_surface_check(p, &surf, "depth"); + if (r) { + dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n", + __func__, __LINE__, track->db_depth_size, + track->db_depth_slice, track->db_z_info); + return r; + } + + offset = track->db_z_read_offset << 8; + if (offset & (surf.base_align - 1)) { + dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + __func__, __LINE__, offset, surf.base_align); + return -EINVAL; + } + offset += surf.layer_size * mslice; + if (offset > radeon_bo_size(track->db_z_read_bo)) { + dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, " + "offset %ld, max layer %d, bo size %ld)\n", + __func__, __LINE__, surf.layer_size, + (unsigned long)track->db_z_read_offset << 8, mslice, + radeon_bo_size(track->db_z_read_bo)); + return -EINVAL; + } + + offset = track->db_z_write_offset << 8; + if (offset & (surf.base_align - 1)) { + dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + __func__, __LINE__, offset, surf.base_align); + return -EINVAL; + } + offset += surf.layer_size * mslice; + if (offset > radeon_bo_size(track->db_z_write_bo)) { + dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, " + "offset %ld, max layer %d, bo size %ld)\n", + __func__, __LINE__, surf.layer_size, + (unsigned long)track->db_z_write_offset << 8, mslice, + radeon_bo_size(track->db_z_write_bo)); + return -EINVAL; + } + + /* hyperz */ + if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) { + r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby); + if (r) { + return r; + } + } + + return 0; +} + +static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p, + struct radeon_bo *texture, + struct radeon_bo *mipmap, + unsigned idx) +{ + struct eg_surface surf; + unsigned long toffset, moffset; + unsigned dim, llevel, mslice, width, height, depth, i; + u32 texdw[8]; + int r; + + texdw[0] = radeon_get_ib_value(p, idx + 0); + texdw[1] = radeon_get_ib_value(p, idx + 1); + texdw[2] = radeon_get_ib_value(p, idx + 2); + texdw[3] = radeon_get_ib_value(p, idx + 3); + texdw[4] = radeon_get_ib_value(p, idx + 4); + texdw[5] = radeon_get_ib_value(p, idx + 5); + texdw[6] = radeon_get_ib_value(p, idx + 6); + texdw[7] = radeon_get_ib_value(p, idx + 7); + dim = G_030000_DIM(texdw[0]); + llevel = G_030014_LAST_LEVEL(texdw[5]); + mslice = G_030014_LAST_ARRAY(texdw[5]) + 1; + width = G_030000_TEX_WIDTH(texdw[0]) + 1; + height = G_030004_TEX_HEIGHT(texdw[1]) + 1; + depth = G_030004_TEX_DEPTH(texdw[1]) + 1; + surf.format = G_03001C_DATA_FORMAT(texdw[7]); + surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8; + surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx); + surf.nby = r600_fmt_get_nblocksy(surf.format, height); + surf.mode = G_030004_ARRAY_MODE(texdw[1]); + surf.tsplit = G_030018_TILE_SPLIT(texdw[6]); + surf.nbanks = G_03001C_NUM_BANKS(texdw[7]); + surf.bankw = G_03001C_BANK_WIDTH(texdw[7]); + surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]); + surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]); + surf.nsamples = 1; + toffset = texdw[2] << 8; + moffset = texdw[3] << 8; + + if (!r600_fmt_is_valid_texture(surf.format, p->family)) { + dev_warn(p->dev, "%s:%d texture invalid format %d\n", + __func__, __LINE__, surf.format); + return -EINVAL; + } + switch (dim) { + case V_030000_SQ_TEX_DIM_1D: + case V_030000_SQ_TEX_DIM_2D: + case V_030000_SQ_TEX_DIM_CUBEMAP: + case V_030000_SQ_TEX_DIM_1D_ARRAY: + case V_030000_SQ_TEX_DIM_2D_ARRAY: + depth = 1; + case V_030000_SQ_TEX_DIM_3D: + break; + default: + dev_warn(p->dev, "%s:%d texture invalid dimension %d\n", + __func__, __LINE__, dim); + return -EINVAL; + } + + r = evergreen_surface_value_conv_check(p, &surf, "texture"); + if (r) { + return r; + } + + /* align height */ + evergreen_surface_check(p, &surf, NULL); + surf.nby = ALIGN(surf.nby, surf.halign); + + r = evergreen_surface_check(p, &surf, "texture"); + if (r) { + dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", + __func__, __LINE__, texdw[0], texdw[1], texdw[4], + texdw[5], texdw[6], texdw[7]); + return r; + } + + /* check texture size */ + if (toffset & (surf.base_align - 1)) { + dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n", + __func__, __LINE__, toffset, surf.base_align); + return -EINVAL; + } + if (moffset & (surf.base_align - 1)) { + dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n", + __func__, __LINE__, moffset, surf.base_align); + return -EINVAL; + } + if (dim == SQ_TEX_DIM_3D) { + toffset += surf.layer_size * depth; + } else { + toffset += surf.layer_size * mslice; + } + if (toffset > radeon_bo_size(texture)) { + dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, " + "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n", + __func__, __LINE__, surf.layer_size, + (unsigned long)texdw[2] << 8, mslice, + depth, radeon_bo_size(texture), + surf.nbx, surf.nby); + return -EINVAL; + } + + /* check mipmap size */ + for (i = 1; i <= llevel; i++) { + unsigned w, h, d; + + w = r600_mip_minify(width, i); + h = r600_mip_minify(height, i); + d = r600_mip_minify(depth, i); + surf.nbx = r600_fmt_get_nblocksx(surf.format, w); + surf.nby = r600_fmt_get_nblocksy(surf.format, h); + + switch (surf.mode) { + case ARRAY_2D_TILED_THIN1: + if (surf.nbx < surf.palign || surf.nby < surf.halign) { + surf.mode = ARRAY_1D_TILED_THIN1; + } + /* recompute alignment */ + evergreen_surface_check(p, &surf, NULL); + break; + case ARRAY_LINEAR_GENERAL: + case ARRAY_LINEAR_ALIGNED: + case ARRAY_1D_TILED_THIN1: + break; + default: + dev_warn(p->dev, "%s:%d invalid array mode %d\n", + __func__, __LINE__, surf.mode); + return -EINVAL; + } + surf.nbx = ALIGN(surf.nbx, surf.palign); + surf.nby = ALIGN(surf.nby, surf.halign); + + r = evergreen_surface_check(p, &surf, "mipmap"); + if (r) { + return r; + } + + if (dim == SQ_TEX_DIM_3D) { + moffset += surf.layer_size * d; + } else { + moffset += surf.layer_size * mslice; + } + if (moffset > radeon_bo_size(mipmap)) { + dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, " + "offset %ld, coffset %ld, max layer %d, depth %d, " + "bo size %ld) level0 (%d %d %d)\n", + __func__, __LINE__, i, surf.layer_size, + (unsigned long)texdw[3] << 8, moffset, mslice, + d, radeon_bo_size(mipmap), + width, height, depth); + dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", + __func__, __LINE__, surf.nbx, surf.nby, + surf.mode, surf.bpe, surf.nsamples, + surf.bankw, surf.bankh, + surf.tsplit, surf.mtilea); + return -EINVAL; + } + } + + return 0; +} + +static int evergreen_cs_track_check(struct radeon_cs_parser *p) +{ + struct evergreen_cs_track *track = p->track; + unsigned tmp, i; + int r; + unsigned buffer_mask = 0; + + /* check streamout */ + if (track->streamout_dirty && track->vgt_strmout_config) { + for (i = 0; i < 4; i++) { + if (track->vgt_strmout_config & (1 << i)) { + buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf; + } + } + + for (i = 0; i < 4; i++) { + if (buffer_mask & (1 << i)) { + if (track->vgt_strmout_bo[i]) { + u64 offset = (u64)track->vgt_strmout_bo_offset[i] + + (u64)track->vgt_strmout_size[i]; + if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { + DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n", + i, offset, + radeon_bo_size(track->vgt_strmout_bo[i])); + return -EINVAL; + } + } else { + dev_warn(p->dev, "No buffer for streamout %d\n", i); + return -EINVAL; + } + } + } + track->streamout_dirty = false; + } + + if (track->sx_misc_kill_all_prims) + return 0; + + /* check that we have a cb for each enabled target + */ + if (track->cb_dirty) { + tmp = track->cb_target_mask; + for (i = 0; i < 8; i++) { + if ((tmp >> (i * 4)) & 0xF) { + /* at least one component is enabled */ + if (track->cb_color_bo[i] == NULL) { + dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", + __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); + return -EINVAL; + } + /* check cb */ + r = evergreen_cs_track_validate_cb(p, i); + if (r) { + return r; + } + } + } + track->cb_dirty = false; + } + + if (track->db_dirty) { + /* Check stencil buffer */ + if (G_028800_STENCIL_ENABLE(track->db_depth_control)) { + r = evergreen_cs_track_validate_stencil(p); + if (r) + return r; + } + /* Check depth buffer */ + if (G_028800_Z_ENABLE(track->db_depth_control)) { + r = evergreen_cs_track_validate_depth(p); + if (r) + return r; + } + track->db_dirty = false; + } + return 0; } @@ -503,6 +1267,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case DB_DEPTH_CONTROL: track->db_depth_control = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case CAYMAN_DB_EQAA: if (p->rdev->family < CHIP_CAYMAN) { @@ -532,20 +1297,35 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + unsigned bankw, bankh, mtaspect, tile_split; + + evergreen_tiling_fields(reloc->lobj.tiling_flags, + &bankw, &bankh, &mtaspect, + &tile_split); ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); - ib[idx] |= DB_TILE_SPLIT(evergreen_cs_get_tile_split(track->row_size)); + ib[idx] |= DB_TILE_SPLIT(tile_split) | + DB_BANK_WIDTH(bankw) | + DB_BANK_HEIGHT(bankh) | + DB_MACRO_TILE_ASPECT(mtaspect); } } + track->db_dirty = true; break; case DB_STENCIL_INFO: track->db_s_info = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case DB_DEPTH_VIEW: track->db_depth_view = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case DB_DEPTH_SIZE: track->db_depth_size = radeon_get_ib_value(p, idx); - track->db_depth_size_idx = idx; + track->db_dirty = true; + break; + case R_02805C_DB_DEPTH_SLICE: + track->db_depth_slice = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case DB_Z_READ_BASE: r = evergreen_cs_packet_next_reloc(p, &reloc); @@ -557,6 +1337,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->db_z_read_offset = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->db_z_read_bo = reloc->robj; + track->db_dirty = true; break; case DB_Z_WRITE_BASE: r = evergreen_cs_packet_next_reloc(p, &reloc); @@ -568,6 +1349,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->db_z_write_offset = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->db_z_write_bo = reloc->robj; + track->db_dirty = true; break; case DB_STENCIL_READ_BASE: r = evergreen_cs_packet_next_reloc(p, &reloc); @@ -579,6 +1361,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->db_s_read_offset = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->db_s_read_bo = reloc->robj; + track->db_dirty = true; break; case DB_STENCIL_WRITE_BASE: r = evergreen_cs_packet_next_reloc(p, &reloc); @@ -590,18 +1373,56 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->db_s_write_offset = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->db_s_write_bo = reloc->robj; + track->db_dirty = true; break; case VGT_STRMOUT_CONFIG: track->vgt_strmout_config = radeon_get_ib_value(p, idx); + track->streamout_dirty = true; break; case VGT_STRMOUT_BUFFER_CONFIG: track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx); + track->streamout_dirty = true; break; + case VGT_STRMOUT_BUFFER_BASE_0: + case VGT_STRMOUT_BUFFER_BASE_1: + case VGT_STRMOUT_BUFFER_BASE_2: + case VGT_STRMOUT_BUFFER_BASE_3: + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + dev_warn(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; + track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + track->vgt_strmout_bo[tmp] = reloc->robj; + track->streamout_dirty = true; + break; + case VGT_STRMOUT_BUFFER_SIZE_0: + case VGT_STRMOUT_BUFFER_SIZE_1: + case VGT_STRMOUT_BUFFER_SIZE_2: + case VGT_STRMOUT_BUFFER_SIZE_3: + tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16; + /* size in register is DWs, convert to bytes */ + track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4; + track->streamout_dirty = true; + break; + case CP_COHER_BASE: + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + dev_warn(p->dev, "missing reloc for CP_COHER_BASE " + "0x%04X\n", reg); + return -EINVAL; + } + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); case CB_TARGET_MASK: track->cb_target_mask = radeon_get_ib_value(p, idx); + track->cb_dirty = true; break; case CB_SHADER_MASK: track->cb_shader_mask = radeon_get_ib_value(p, idx); + track->cb_dirty = true; break; case PA_SC_AA_CONFIG: if (p->rdev->family >= CHIP_CAYMAN) { @@ -631,6 +1452,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_VIEW: tmp = (reg - CB_COLOR0_VIEW) / 0x3c; track->cb_color_view[tmp] = radeon_get_ib_value(p, idx); + track->cb_dirty = true; break; case CB_COLOR8_VIEW: case CB_COLOR9_VIEW: @@ -638,6 +1460,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_VIEW: tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8; track->cb_color_view[tmp] = radeon_get_ib_value(p, idx); + track->cb_dirty = true; break; case CB_COLOR0_INFO: case CB_COLOR1_INFO: @@ -659,6 +1482,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); } + track->cb_dirty = true; break; case CB_COLOR8_INFO: case CB_COLOR9_INFO: @@ -676,6 +1500,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); } + track->cb_dirty = true; break; case CB_COLOR0_PITCH: case CB_COLOR1_PITCH: @@ -687,7 +1512,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_PITCH: tmp = (reg - CB_COLOR0_PITCH) / 0x3c; track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_pitch_idx[tmp] = idx; + track->cb_dirty = true; break; case CB_COLOR8_PITCH: case CB_COLOR9_PITCH: @@ -695,7 +1520,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_PITCH: tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8; track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_pitch_idx[tmp] = idx; + track->cb_dirty = true; break; case CB_COLOR0_SLICE: case CB_COLOR1_SLICE: @@ -707,7 +1532,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_SLICE: tmp = (reg - CB_COLOR0_SLICE) / 0x3c; track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_slice_idx[tmp] = idx; + track->cb_dirty = true; break; case CB_COLOR8_SLICE: case CB_COLOR9_SLICE: @@ -715,7 +1540,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_SLICE: tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8; track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_slice_idx[tmp] = idx; + track->cb_dirty = true; break; case CB_COLOR0_ATTRIB: case CB_COLOR1_ATTRIB: @@ -725,6 +1550,30 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR5_ATTRIB: case CB_COLOR6_ATTRIB: case CB_COLOR7_ATTRIB: + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + dev_warn(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + unsigned bankw, bankh, mtaspect, tile_split; + + evergreen_tiling_fields(reloc->lobj.tiling_flags, + &bankw, &bankh, &mtaspect, + &tile_split); + ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); + ib[idx] |= CB_TILE_SPLIT(tile_split) | + CB_BANK_WIDTH(bankw) | + CB_BANK_HEIGHT(bankh) | + CB_MACRO_TILE_ASPECT(mtaspect); + } + } + tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c); + track->cb_color_attrib[tmp] = ib[idx]; + track->cb_dirty = true; + break; case CB_COLOR8_ATTRIB: case CB_COLOR9_ATTRIB: case CB_COLOR10_ATTRIB: @@ -735,30 +1584,23 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { - ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); - ib[idx] |= CB_TILE_SPLIT(evergreen_cs_get_tile_split(track->row_size)); + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + unsigned bankw, bankh, mtaspect, tile_split; + + evergreen_tiling_fields(reloc->lobj.tiling_flags, + &bankw, &bankh, &mtaspect, + &tile_split); + ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); + ib[idx] |= CB_TILE_SPLIT(tile_split) | + CB_BANK_WIDTH(bankw) | + CB_BANK_HEIGHT(bankh) | + CB_MACRO_TILE_ASPECT(mtaspect); + } } - break; - case CB_COLOR0_DIM: - case CB_COLOR1_DIM: - case CB_COLOR2_DIM: - case CB_COLOR3_DIM: - case CB_COLOR4_DIM: - case CB_COLOR5_DIM: - case CB_COLOR6_DIM: - case CB_COLOR7_DIM: - tmp = (reg - CB_COLOR0_DIM) / 0x3c; - track->cb_color_dim[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_dim_idx[tmp] = idx; - break; - case CB_COLOR8_DIM: - case CB_COLOR9_DIM: - case CB_COLOR10_DIM: - case CB_COLOR11_DIM: - tmp = ((reg - CB_COLOR8_DIM) / 0x1c) + 8; - track->cb_color_dim[tmp] = radeon_get_ib_value(p, idx); - track->cb_color_dim_idx[tmp] = idx; + tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8; + track->cb_color_attrib[tmp] = ib[idx]; + track->cb_dirty = true; break; case CB_COLOR0_FMASK: case CB_COLOR1_FMASK: @@ -833,8 +1675,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) tmp = (reg - CB_COLOR0_BASE) / 0x3c; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); - track->cb_color_base_last[tmp] = ib[idx]; track->cb_color_bo[tmp] = reloc->robj; + track->cb_dirty = true; break; case CB_COLOR8_BASE: case CB_COLOR9_BASE: @@ -849,8 +1691,25 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); - track->cb_color_base_last[tmp] = ib[idx]; track->cb_color_bo[tmp] = reloc->robj; + track->cb_dirty = true; + break; + case DB_HTILE_DATA_BASE: + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + dev_warn(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + track->htile_offset = radeon_get_ib_value(p, idx); + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + track->htile_bo = reloc->robj; + track->db_dirty = true; + break; + case DB_HTILE_SURFACE: + /* 8x8 only */ + track->htile_surface = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case CB_IMMED0_BASE: case CB_IMMED1_BASE: @@ -864,7 +1723,6 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_IMMED9_BASE: case CB_IMMED10_BASE: case CB_IMMED11_BASE: - case DB_HTILE_DATA_BASE: case SQ_PGM_START_FS: case SQ_PGM_START_ES: case SQ_PGM_START_VS: @@ -989,6 +1847,9 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); break; + case SX_MISC: + track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; + break; default: dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; @@ -996,22 +1857,30 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return 0; } -/** - * evergreen_check_texture_resource() - check if register is authorized or not - * @p: parser structure holding parsing context - * @idx: index into the cs buffer - * @texture: texture's bo structure - * @mipmap: mipmap's bo structure - * - * This function will check that the resource has valid field and that - * the texture and mipmap bo object are big enough to cover this resource. - */ -static int evergreen_check_texture_resource(struct radeon_cs_parser *p, u32 idx, - struct radeon_bo *texture, - struct radeon_bo *mipmap) +static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) { - /* XXX fill in */ - return 0; + u32 last_reg, m, i; + + if (p->rdev->family >= CHIP_CAYMAN) + last_reg = ARRAY_SIZE(cayman_reg_safe_bm); + else + last_reg = ARRAY_SIZE(evergreen_reg_safe_bm); + + i = (reg >> 7); + if (i >= last_reg) { + dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + return false; + } + m = 1 << ((reg >> 2) & 31); + if (p->rdev->family >= CHIP_CAYMAN) { + if (!(cayman_reg_safe_bm[i] & m)) + return true; + } else { + if (!(evergreen_reg_safe_bm[i] & m)) + return true; + } + dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + return false; } static int evergreen_packet3_check(struct radeon_cs_parser *p, @@ -1036,6 +1905,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, { int pred_op; int tmp; + uint64_t offset; + if (pkt->count != 1) { DRM_ERROR("bad SET PREDICATION\n"); return -EINVAL; @@ -1059,8 +1930,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - ib[idx + 0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx + 1] = tmp + (upper_32_bits(reloc->lobj.gpu_offset) & 0xff); + offset = reloc->lobj.gpu_offset + + (idx_value & 0xfffffff0) + + ((u64)(tmp & 0xff) << 32); + + ib[idx + 0] = offset; + ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff); } break; case PACKET3_CONTEXT_CONTROL: @@ -1088,6 +1963,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } break; case PACKET3_INDEX_BASE: + { + uint64_t offset; + if (pkt->count != 1) { DRM_ERROR("bad INDEX_BASE\n"); return -EINVAL; @@ -1097,15 +1975,24 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad INDEX_BASE\n"); return -EINVAL; } - ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+1] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + idx_value + + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); + + ib[idx+0] = offset; + ib[idx+1] = upper_32_bits(offset) & 0xff; + r = evergreen_cs_track_check(p); if (r) { dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; + } case PACKET3_DRAW_INDEX: + { + uint64_t offset; if (pkt->count != 3) { DRM_ERROR("bad DRAW_INDEX\n"); return -EINVAL; @@ -1115,15 +2002,25 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad DRAW_INDEX\n"); return -EINVAL; } - ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+1] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + idx_value + + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); + + ib[idx+0] = offset; + ib[idx+1] = upper_32_bits(offset) & 0xff; + r = evergreen_cs_track_check(p); if (r) { dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; + } case PACKET3_DRAW_INDEX_2: + { + uint64_t offset; + if (pkt->count != 4) { DRM_ERROR("bad DRAW_INDEX_2\n"); return -EINVAL; @@ -1133,14 +2030,21 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad DRAW_INDEX_2\n"); return -EINVAL; } - ib[idx+1] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + radeon_get_ib_value(p, idx+1) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = offset; + ib[idx+2] = upper_32_bits(offset) & 0xff; + r = evergreen_cs_track_check(p); if (r) { dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; + } case PACKET3_DRAW_INDEX_AUTO: if (pkt->count != 1) { DRM_ERROR("bad DRAW_INDEX_AUTO\n"); @@ -1231,13 +2135,20 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } /* bit 4 is reg (0) or mem (1) */ if (idx_value & 0x10) { + uint64_t offset; + r = evergreen_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("bad WAIT_REG_MEM\n"); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc); + ib[idx+2] = upper_32_bits(offset) & 0xff; } break; case PACKET3_SURFACE_SYNC: @@ -1262,16 +2173,25 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } if (pkt->count) { + uint64_t offset; + r = evergreen_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("bad EVENT_WRITE\n"); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + offset = reloc->lobj.gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = offset & 0xfffffff8; + ib[idx+2] = upper_32_bits(offset) & 0xff; } break; case PACKET3_EVENT_WRITE_EOP: + { + uint64_t offset; + if (pkt->count != 4) { DRM_ERROR("bad EVENT_WRITE_EOP\n"); return -EINVAL; @@ -1281,10 +2201,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad EVENT_WRITE_EOP\n"); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = offset & 0xfffffffc; + ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff); break; + } case PACKET3_EVENT_WRITE_EOS: + { + uint64_t offset; + if (pkt->count != 3) { DRM_ERROR("bad EVENT_WRITE_EOS\n"); return -EINVAL; @@ -1294,9 +2223,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad EVENT_WRITE_EOS\n"); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = offset & 0xfffffffc; + ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff); break; + } case PACKET3_SET_CONFIG_REG: start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START; end_reg = 4 * pkt->count + start_reg - 4; @@ -1344,6 +2279,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } for (i = 0; i < (pkt->count / 8); i++) { struct radeon_bo *texture, *mipmap; + u32 toffset, moffset; u32 size, offset; switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) { @@ -1354,32 +2290,42 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad SET_RESOURCE (tex)\n"); return -EINVAL; } - ib[idx+1+(i*8)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { ib[idx+1+(i*8)+1] |= TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { - ib[idx+1+(i*8)+6] |= - TEX_TILE_SPLIT(evergreen_cs_get_tile_split(track->row_size)); + unsigned bankw, bankh, mtaspect, tile_split; + + evergreen_tiling_fields(reloc->lobj.tiling_flags, + &bankw, &bankh, &mtaspect, + &tile_split); + ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split); ib[idx+1+(i*8)+7] |= + TEX_BANK_WIDTH(bankw) | + TEX_BANK_HEIGHT(bankh) | + MACRO_TILE_ASPECT(mtaspect) | TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); } } texture = reloc->robj; + toffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); /* tex mip base */ r = evergreen_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("bad SET_RESOURCE (tex)\n"); return -EINVAL; } - ib[idx+1+(i*8)+3] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + moffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); mipmap = reloc->robj; - r = evergreen_check_texture_resource(p, idx+1+(i*8), - texture, mipmap); + r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8)); if (r) return r; + ib[idx+1+(i*8)+2] += toffset; + ib[idx+1+(i*8)+3] += moffset; break; case SQ_TEX_VTX_VALID_BUFFER: + { + uint64_t offset64; /* vtx base */ r = evergreen_cs_packet_next_reloc(p, &reloc); if (r) { @@ -1391,11 +2337,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) { /* force size to size of the buffer */ dev_warn(p->dev, "vbo resource seems too big for the bo\n"); - ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj); + ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset; } - ib[idx+1+(i*8)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff); - ib[idx+1+(i*8)+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset64 = reloc->lobj.gpu_offset + offset; + ib[idx+1+(i*8)+0] = offset64; + ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | + (upper_32_bits(offset64) & 0xff); break; + } case SQ_TEX_VTX_INVALID_TEXTURE: case SQ_TEX_VTX_INVALID_BUFFER: default: @@ -1451,6 +2401,104 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } break; + case PACKET3_STRMOUT_BUFFER_UPDATE: + if (pkt->count != 4) { + DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); + return -EINVAL; + } + /* Updating memory at DST_ADDRESS. */ + if (idx_value & 0x1) { + u64 offset; + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+1); + offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; + if ((offset + 4) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+1] = offset; + ib[idx+2] = upper_32_bits(offset) & 0xff; + } + /* Reading data from SRC_ADDRESS. */ + if (((idx_value >> 1) & 0x3) == 2) { + u64 offset; + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+3); + offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; + if ((offset + 4) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+3] = offset; + ib[idx+4] = upper_32_bits(offset) & 0xff; + } + break; + case PACKET3_COPY_DW: + if (pkt->count != 4) { + DRM_ERROR("bad COPY_DW (invalid count)\n"); + return -EINVAL; + } + if (idx_value & 0x1) { + u64 offset; + /* SRC is memory. */ + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad COPY_DW (missing src reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+1); + offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; + if ((offset + 4) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+1] = offset; + ib[idx+2] = upper_32_bits(offset) & 0xff; + } else { + /* SRC is a reg. */ + reg = radeon_get_ib_value(p, idx+1) << 2; + if (!evergreen_is_safe_reg(p, reg, idx+1)) + return -EINVAL; + } + if (idx_value & 0x2) { + u64 offset; + /* DST is memory. */ + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad COPY_DW (missing dst reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+3); + offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; + if ((offset + 4) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+3] = offset; + ib[idx+4] = upper_32_bits(offset) & 0xff; + } else { + /* DST is a reg. */ + reg = radeon_get_ib_value(p, idx+3) << 2; + if (!evergreen_is_safe_reg(p, reg, idx+3)) + return -EINVAL; + } + break; case PACKET3_NOP: break; default: |