diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/r600_cs.c')
-rw-r--r-- | drivers/gpu/drm/radeon/r600_cs.c | 549 |
1 files changed, 395 insertions, 154 deletions
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 5cbe948ef16..b8e12af304a 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -52,18 +52,18 @@ struct r600_cs_track { struct radeon_bo *cb_color_bo[8]; u64 cb_color_bo_mc[8]; u32 cb_color_bo_offset[8]; - struct radeon_bo *cb_color_frag_bo[8]; - struct radeon_bo *cb_color_tile_bo[8]; + struct radeon_bo *cb_color_frag_bo[8]; /* unused */ + struct radeon_bo *cb_color_tile_bo[8]; /* unused */ u32 cb_color_info[8]; u32 cb_color_view[8]; - u32 cb_color_size_idx[8]; + u32 cb_color_size_idx[8]; /* unused */ u32 cb_target_mask; - u32 cb_shader_mask; + u32 cb_shader_mask; /* unused */ u32 cb_color_size[8]; u32 vgt_strmout_en; u32 vgt_strmout_buffer_en; struct radeon_bo *vgt_strmout_bo[4]; - u64 vgt_strmout_bo_mc[4]; + u64 vgt_strmout_bo_mc[4]; /* unused */ u32 vgt_strmout_bo_offset[4]; u32 vgt_strmout_size[4]; u32 db_depth_control; @@ -74,6 +74,13 @@ struct r600_cs_track { u32 db_offset; struct radeon_bo *db_bo; u64 db_bo_mc; + bool sx_misc_kill_all_prims; + bool cb_dirty; + bool db_dirty; + bool streamout_dirty; + struct radeon_bo *htile_bo; + u64 htile_offset; + u32 htile_surface; }; #define FMT_8_BIT(fmt, vc) [fmt] = { 1, 1, 1, vc, CHIP_R600 } @@ -307,6 +314,7 @@ static void r600_cs_track_init(struct r600_cs_track *track) } track->cb_target_mask = 0xFFFFFFFF; track->cb_shader_mask = 0xFFFFFFFF; + track->cb_dirty = true; track->db_bo = NULL; track->db_bo_mc = 0xFFFFFFFF; /* assume the biggest format and that htile is enabled */ @@ -315,6 +323,10 @@ static void r600_cs_track_init(struct r600_cs_track *track) track->db_depth_size = 0xFFFFFFFF; track->db_depth_size_idx = 0; track->db_depth_control = 0xFFFFFFFF; + track->db_dirty = true; + track->htile_bo = NULL; + track->htile_offset = 0xFFFFFFFF; + track->htile_surface = 0; for (i = 0; i < 4; i++) { track->vgt_strmout_size[i] = 0; @@ -322,6 +334,8 @@ static void r600_cs_track_init(struct r600_cs_track *track) track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF; track->vgt_strmout_bo_mc[i] = 0xFFFFFFFF; } + track->streamout_dirty = true; + track->sx_misc_kill_all_prims = false; } static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) @@ -447,19 +461,263 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) return 0; } +static int r600_cs_track_validate_db(struct radeon_cs_parser *p) +{ + struct r600_cs_track *track = p->track; + u32 nviews, bpe, ntiles, size, slice_tile_max, tmp; + u32 height_align, pitch_align, depth_align; + u32 pitch = 8192; + u32 height = 8192; + u64 base_offset, base_align; + struct array_mode_checker array_check; + int array_mode; + volatile u32 *ib = p->ib->ptr; + + + if (track->db_bo == NULL) { + dev_warn(p->dev, "z/stencil with no depth buffer\n"); + return -EINVAL; + } + switch (G_028010_FORMAT(track->db_depth_info)) { + case V_028010_DEPTH_16: + bpe = 2; + break; + case V_028010_DEPTH_X8_24: + case V_028010_DEPTH_8_24: + case V_028010_DEPTH_X8_24_FLOAT: + case V_028010_DEPTH_8_24_FLOAT: + case V_028010_DEPTH_32_FLOAT: + bpe = 4; + break; + case V_028010_DEPTH_X24_8_32_FLOAT: + bpe = 8; + break; + default: + dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info)); + return -EINVAL; + } + if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) { + if (!track->db_depth_size_idx) { + dev_warn(p->dev, "z/stencil buffer size not set\n"); + return -EINVAL; + } + tmp = radeon_bo_size(track->db_bo) - track->db_offset; + tmp = (tmp / bpe) >> 6; + if (!tmp) { + dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n", + track->db_depth_size, bpe, track->db_offset, + radeon_bo_size(track->db_bo)); + return -EINVAL; + } + ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF); + } else { + size = radeon_bo_size(track->db_bo); + /* pitch in pixels */ + pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8; + slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; + slice_tile_max *= 64; + height = slice_tile_max / pitch; + if (height > 8192) + height = 8192; + base_offset = track->db_bo_mc + track->db_offset; + array_mode = G_028010_ARRAY_MODE(track->db_depth_info); + array_check.array_mode = array_mode; + array_check.group_size = track->group_size; + array_check.nbanks = track->nbanks; + array_check.npipes = track->npipes; + array_check.nsamples = track->nsamples; + array_check.blocksize = bpe; + if (r600_get_array_mode_alignment(&array_check, + &pitch_align, &height_align, &depth_align, &base_align)) { + dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, + G_028010_ARRAY_MODE(track->db_depth_info), + track->db_depth_info); + return -EINVAL; + } + switch (array_mode) { + case V_028010_ARRAY_1D_TILED_THIN1: + /* don't break userspace */ + height &= ~0x7; + break; + case V_028010_ARRAY_2D_TILED_THIN1: + break; + default: + dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, + G_028010_ARRAY_MODE(track->db_depth_info), + track->db_depth_info); + return -EINVAL; + } + + if (!IS_ALIGNED(pitch, pitch_align)) { + dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n", + __func__, __LINE__, pitch, pitch_align, array_mode); + return -EINVAL; + } + if (!IS_ALIGNED(height, height_align)) { + dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n", + __func__, __LINE__, height, height_align, array_mode); + return -EINVAL; + } + if (!IS_ALIGNED(base_offset, base_align)) { + dev_warn(p->dev, "%s offset 0x%llx, 0x%llx, %d not aligned\n", __func__, + base_offset, base_align, array_mode); + return -EINVAL; + } + + ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; + nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1; + tmp = ntiles * bpe * 64 * nviews; + if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) { + dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n", + array_mode, + track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset, + radeon_bo_size(track->db_bo)); + return -EINVAL; + } + } + + /* hyperz */ + if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) { + unsigned long size; + unsigned nbx, nby; + + if (track->htile_bo == NULL) { + dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n", + __func__, __LINE__, track->db_depth_info); + return -EINVAL; + } + if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) { + dev_warn(p->dev, "%s:%d htile can't be enabled with bogus db_depth_size 0x%08x\n", + __func__, __LINE__, track->db_depth_size); + return -EINVAL; + } + + nbx = pitch; + nby = height; + if (G_028D24_LINEAR(track->htile_surface)) { + /* nbx must be 16 htiles aligned == 16 * 8 pixel aligned */ + nbx = round_up(nbx, 16 * 8); + /* nby is npipes htiles aligned == npipes * 8 pixel aligned */ + nby = round_up(nby, track->npipes * 8); + } else { + /* htile widht & nby (8 or 4) make 2 bits number */ + tmp = track->htile_surface & 3; + /* align is htile align * 8, htile align vary according to + * number of pipe and tile width and nby + */ + switch (track->npipes) { + case 8: + switch (tmp) { + case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + nbx = round_up(nbx, 64 * 8); + nby = round_up(nby, 64 * 8); + break; + case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ + case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ + nbx = round_up(nbx, 64 * 8); + nby = round_up(nby, 32 * 8); + break; + case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 32 * 8); + break; + default: + return -EINVAL; + } + break; + case 4: + switch (tmp) { + case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + nbx = round_up(nbx, 64 * 8); + nby = round_up(nby, 32 * 8); + break; + case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ + case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 32 * 8); + break; + case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 16 * 8); + break; + default: + return -EINVAL; + } + break; + case 2: + switch (tmp) { + case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 32 * 8); + break; + case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ + case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 16 * 8); + break; + case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ + nbx = round_up(nbx, 16 * 8); + nby = round_up(nby, 16 * 8); + break; + default: + return -EINVAL; + } + break; + case 1: + switch (tmp) { + case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 16 * 8); + break; + case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ + case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ + nbx = round_up(nbx, 16 * 8); + nby = round_up(nby, 16 * 8); + break; + case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ + nbx = round_up(nbx, 16 * 8); + nby = round_up(nby, 8 * 8); + break; + default: + return -EINVAL; + } + break; + default: + dev_warn(p->dev, "%s:%d invalid num pipes %d\n", + __func__, __LINE__, track->npipes); + return -EINVAL; + } + } + /* compute number of htile */ + nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4; + nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4; + size = nbx * nby * 4; + size += track->htile_offset; + + if (size > radeon_bo_size(track->htile_bo)) { + dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n", + __func__, __LINE__, radeon_bo_size(track->htile_bo), + size, nbx, nby); + return -EINVAL; + } + } + + track->db_dirty = false; + return 0; +} + static int r600_cs_track_check(struct radeon_cs_parser *p) { struct r600_cs_track *track = p->track; u32 tmp; int r, i; - volatile u32 *ib = p->ib->ptr; /* on legacy kernel we don't perform advanced check */ if (p->rdev == NULL) return 0; /* check streamout */ - if (track->vgt_strmout_en) { + if (track->streamout_dirty && track->vgt_strmout_en) { for (i = 0; i < 4; i++) { if (track->vgt_strmout_buffer_en & (1 << i)) { if (track->vgt_strmout_bo[i]) { @@ -477,141 +735,42 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) } } } + track->streamout_dirty = false; } + if (track->sx_misc_kill_all_prims) + return 0; + /* check that we have a cb for each enabled target, we don't check * shader_mask because it seems mesa isn't always setting it :( */ - tmp = track->cb_target_mask; - for (i = 0; i < 8; i++) { - if ((tmp >> (i * 4)) & 0xF) { - /* at least one component is enabled */ - if (track->cb_color_bo[i] == NULL) { - dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", - __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); - return -EINVAL; + if (track->cb_dirty) { + tmp = track->cb_target_mask; + for (i = 0; i < 8; i++) { + if ((tmp >> (i * 4)) & 0xF) { + /* at least one component is enabled */ + if (track->cb_color_bo[i] == NULL) { + dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", + __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); + return -EINVAL; + } + /* perform rewrite of CB_COLOR[0-7]_SIZE */ + r = r600_cs_track_validate_cb(p, i); + if (r) + return r; } - /* perform rewrite of CB_COLOR[0-7]_SIZE */ - r = r600_cs_track_validate_cb(p, i); - if (r) - return r; } + track->cb_dirty = false; } - /* Check depth buffer */ - if (G_028800_STENCIL_ENABLE(track->db_depth_control) || - G_028800_Z_ENABLE(track->db_depth_control)) { - u32 nviews, bpe, ntiles, size, slice_tile_max; - u32 height, height_align, pitch, pitch_align, depth_align; - u64 base_offset, base_align; - struct array_mode_checker array_check; - int array_mode; - - if (track->db_bo == NULL) { - dev_warn(p->dev, "z/stencil with no depth buffer\n"); - return -EINVAL; - } - if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) { - dev_warn(p->dev, "this kernel doesn't support z/stencil htile\n"); - return -EINVAL; - } - switch (G_028010_FORMAT(track->db_depth_info)) { - case V_028010_DEPTH_16: - bpe = 2; - break; - case V_028010_DEPTH_X8_24: - case V_028010_DEPTH_8_24: - case V_028010_DEPTH_X8_24_FLOAT: - case V_028010_DEPTH_8_24_FLOAT: - case V_028010_DEPTH_32_FLOAT: - bpe = 4; - break; - case V_028010_DEPTH_X24_8_32_FLOAT: - bpe = 8; - break; - default: - dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info)); - return -EINVAL; - } - if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) { - if (!track->db_depth_size_idx) { - dev_warn(p->dev, "z/stencil buffer size not set\n"); - return -EINVAL; - } - tmp = radeon_bo_size(track->db_bo) - track->db_offset; - tmp = (tmp / bpe) >> 6; - if (!tmp) { - dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n", - track->db_depth_size, bpe, track->db_offset, - radeon_bo_size(track->db_bo)); - return -EINVAL; - } - ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF); - } else { - size = radeon_bo_size(track->db_bo); - /* pitch in pixels */ - pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8; - slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; - slice_tile_max *= 64; - height = slice_tile_max / pitch; - if (height > 8192) - height = 8192; - base_offset = track->db_bo_mc + track->db_offset; - array_mode = G_028010_ARRAY_MODE(track->db_depth_info); - array_check.array_mode = array_mode; - array_check.group_size = track->group_size; - array_check.nbanks = track->nbanks; - array_check.npipes = track->npipes; - array_check.nsamples = track->nsamples; - array_check.blocksize = bpe; - if (r600_get_array_mode_alignment(&array_check, - &pitch_align, &height_align, &depth_align, &base_align)) { - dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, - G_028010_ARRAY_MODE(track->db_depth_info), - track->db_depth_info); - return -EINVAL; - } - switch (array_mode) { - case V_028010_ARRAY_1D_TILED_THIN1: - /* don't break userspace */ - height &= ~0x7; - break; - case V_028010_ARRAY_2D_TILED_THIN1: - break; - default: - dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, - G_028010_ARRAY_MODE(track->db_depth_info), - track->db_depth_info); - return -EINVAL; - } - - if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n", - __func__, __LINE__, pitch, pitch_align, array_mode); - return -EINVAL; - } - if (!IS_ALIGNED(height, height_align)) { - dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n", - __func__, __LINE__, height, height_align, array_mode); - return -EINVAL; - } - if (!IS_ALIGNED(base_offset, base_align)) { - dev_warn(p->dev, "%s offset[%d] 0x%llx, 0x%llx, %d not aligned\n", __func__, i, - base_offset, base_align, array_mode); - return -EINVAL; - } - ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; - nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1; - tmp = ntiles * bpe * 64 * nviews; - if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) { - dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n", - array_mode, - track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset, - radeon_bo_size(track->db_bo)); - return -EINVAL; - } - } + /* Check depth buffer */ + if (track->db_dirty && (G_028800_STENCIL_ENABLE(track->db_depth_control) || + G_028800_Z_ENABLE(track->db_depth_control))) { + r = r600_cs_track_validate_db(p); + if (r) + return r; } + return 0; } @@ -983,6 +1142,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case R_028800_DB_DEPTH_CONTROL: track->db_depth_control = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case R_028010_DB_DEPTH_INFO: if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) && @@ -1003,21 +1163,27 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1); track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1); } - } else + } else { track->db_depth_info = radeon_get_ib_value(p, idx); + } + track->db_dirty = true; break; case R_028004_DB_DEPTH_VIEW: track->db_depth_view = radeon_get_ib_value(p, idx); + track->db_dirty = true; break; case R_028000_DB_DEPTH_SIZE: track->db_depth_size = radeon_get_ib_value(p, idx); track->db_depth_size_idx = idx; + track->db_dirty = true; break; case R_028AB0_VGT_STRMOUT_EN: track->vgt_strmout_en = radeon_get_ib_value(p, idx); + track->streamout_dirty = true; break; case R_028B20_VGT_STRMOUT_BUFFER_EN: track->vgt_strmout_buffer_en = radeon_get_ib_value(p, idx); + track->streamout_dirty = true; break; case VGT_STRMOUT_BUFFER_BASE_0: case VGT_STRMOUT_BUFFER_BASE_1: @@ -1034,6 +1200,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->vgt_strmout_bo[tmp] = reloc->robj; track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset; + track->streamout_dirty = true; break; case VGT_STRMOUT_BUFFER_SIZE_0: case VGT_STRMOUT_BUFFER_SIZE_1: @@ -1042,6 +1209,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16; /* size in register is DWs, convert to bytes */ track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4; + track->streamout_dirty = true; break; case CP_COHER_BASE: r = r600_cs_packet_next_reloc(p, &reloc); @@ -1054,6 +1222,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case R_028238_CB_TARGET_MASK: track->cb_target_mask = radeon_get_ib_value(p, idx); + track->cb_dirty = true; break; case R_02823C_CB_SHADER_MASK: track->cb_shader_mask = radeon_get_ib_value(p, idx); @@ -1061,6 +1230,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case R_028C04_PA_SC_AA_CONFIG: tmp = G_028C04_MSAA_NUM_SAMPLES(radeon_get_ib_value(p, idx)); track->nsamples = 1 << tmp; + track->cb_dirty = true; break; case R_0280A0_CB_COLOR0_INFO: case R_0280A4_CB_COLOR1_INFO: @@ -1090,6 +1260,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4; track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); } + track->cb_dirty = true; break; case R_028080_CB_COLOR0_VIEW: case R_028084_CB_COLOR1_VIEW: @@ -1101,6 +1272,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case R_02809C_CB_COLOR7_VIEW: tmp = (reg - R_028080_CB_COLOR0_VIEW) / 4; track->cb_color_view[tmp] = radeon_get_ib_value(p, idx); + track->cb_dirty = true; break; case R_028060_CB_COLOR0_SIZE: case R_028064_CB_COLOR1_SIZE: @@ -1113,6 +1285,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) tmp = (reg - R_028060_CB_COLOR0_SIZE) / 4; track->cb_color_size[tmp] = radeon_get_ib_value(p, idx); track->cb_color_size_idx[tmp] = idx; + track->cb_dirty = true; break; /* This register were added late, there is userspace * which does provide relocation for those but set @@ -1195,6 +1368,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->cb_color_base_last[tmp] = ib[idx]; track->cb_color_bo[tmp] = reloc->robj; track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset; + track->cb_dirty = true; break; case DB_DEPTH_BASE: r = r600_cs_packet_next_reloc(p, &reloc); @@ -1207,8 +1381,24 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->db_bo = reloc->robj; track->db_bo_mc = reloc->lobj.gpu_offset; + track->db_dirty = true; break; case DB_HTILE_DATA_BASE: + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + dev_warn(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + track->htile_offset = radeon_get_ib_value(p, idx) << 8; + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + track->htile_bo = reloc->robj; + track->db_dirty = true; + break; + case DB_HTILE_SURFACE: + track->htile_surface = radeon_get_ib_value(p, idx); + track->db_dirty = true; + break; case SQ_PGM_START_FS: case SQ_PGM_START_ES: case SQ_PGM_START_VS: @@ -1279,6 +1469,9 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); break; + case SX_MISC: + track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; + break; default: dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; @@ -1392,6 +1585,7 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, h0 = G_038004_TEX_HEIGHT(word1) + 1; d0 = G_038004_TEX_DEPTH(word1); nfaces = 1; + array = 0; switch (G_038000_DIM(word0)) { case V_038000_SQ_TEX_DIM_1D: case V_038000_SQ_TEX_DIM_2D: @@ -1529,6 +1723,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p, { int pred_op; int tmp; + uint64_t offset; + if (pkt->count != 1) { DRM_ERROR("bad SET PREDICATION\n"); return -EINVAL; @@ -1552,8 +1748,12 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - ib[idx + 0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx + 1] = tmp + (upper_32_bits(reloc->lobj.gpu_offset) & 0xff); + offset = reloc->lobj.gpu_offset + + (idx_value & 0xfffffff0) + + ((u64)(tmp & 0xff) << 32); + + ib[idx + 0] = offset; + ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff); } break; @@ -1577,6 +1777,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p, } break; case PACKET3_DRAW_INDEX: + { + uint64_t offset; if (pkt->count != 3) { DRM_ERROR("bad DRAW_INDEX\n"); return -EINVAL; @@ -1586,14 +1788,21 @@ static int r600_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad DRAW_INDEX\n"); return -EINVAL; } - ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+1] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + idx_value + + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); + + ib[idx+0] = offset; + ib[idx+1] = upper_32_bits(offset) & 0xff; + r = r600_cs_track_check(p); if (r) { dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; + } case PACKET3_DRAW_INDEX_AUTO: if (pkt->count != 1) { DRM_ERROR("bad DRAW_INDEX_AUTO\n"); @@ -1624,13 +1833,20 @@ static int r600_packet3_check(struct radeon_cs_parser *p, } /* bit 4 is reg (0) or mem (1) */ if (idx_value & 0x10) { + uint64_t offset; + r = r600_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("bad WAIT_REG_MEM\n"); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffff0) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffff0); + ib[idx+2] = upper_32_bits(offset) & 0xff; } break; case PACKET3_SURFACE_SYNC: @@ -1655,16 +1871,25 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } if (pkt->count) { + uint64_t offset; + r = r600_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("bad EVENT_WRITE\n"); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + offset = reloc->lobj.gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = offset & 0xfffffff8; + ib[idx+2] = upper_32_bits(offset) & 0xff; } break; case PACKET3_EVENT_WRITE_EOP: + { + uint64_t offset; + if (pkt->count != 4) { DRM_ERROR("bad EVENT_WRITE_EOP\n"); return -EINVAL; @@ -1674,9 +1899,15 @@ static int r600_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad EVENT_WRITE\n"); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset = reloc->lobj.gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + + ib[idx+1] = offset & 0xfffffffc; + ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff); break; + } case PACKET3_SET_CONFIG_REG: start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_OFFSET; end_reg = 4 * pkt->count + start_reg - 4; @@ -1761,6 +1992,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ib[idx+1+(i*7)+3] += mip_offset; break; case SQ_TEX_VTX_VALID_BUFFER: + { + uint64_t offset64; /* vtx base */ r = r600_cs_packet_next_reloc(p, &reloc); if (r) { @@ -1773,11 +2006,15 @@ static int r600_packet3_check(struct radeon_cs_parser *p, /* force size to size of the buffer */ dev_warn(p->dev, "vbo resource seems too big (%d) for the bo (%ld)\n", size + offset, radeon_bo_size(reloc->robj)); - ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj); + ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj) - offset; } - ib[idx+1+(i*7)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff); - ib[idx+1+(i*7)+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + + offset64 = reloc->lobj.gpu_offset + offset; + ib[idx+1+(i*8)+0] = offset64; + ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | + (upper_32_bits(offset64) & 0xff); break; + } case SQ_TEX_VTX_INVALID_TEXTURE: case SQ_TEX_VTX_INVALID_BUFFER: default: @@ -1872,8 +2109,9 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + offset += reloc->lobj.gpu_offset; + ib[idx+1] = offset; + ib[idx+2] = upper_32_bits(offset) & 0xff; } /* Reading data from SRC_ADDRESS. */ if (((idx_value >> 1) & 0x3) == 2) { @@ -1890,8 +2128,9 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + offset += reloc->lobj.gpu_offset; + ib[idx+3] = offset; + ib[idx+4] = upper_32_bits(offset) & 0xff; } break; case PACKET3_COPY_DW: @@ -1914,8 +2153,9 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + offset += reloc->lobj.gpu_offset; + ib[idx+1] = offset; + ib[idx+2] = upper_32_bits(offset) & 0xff; } else { /* SRC is a reg. */ reg = radeon_get_ib_value(p, idx+1) << 2; @@ -1937,8 +2177,9 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + offset += reloc->lobj.gpu_offset; + ib[idx+3] = offset; + ib[idx+4] = upper_32_bits(offset) & 0xff; } else { /* DST is a reg. */ reg = radeon_get_ib_value(p, idx+3) << 2; |