summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r600_blit.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2012-10-03 10:32:58 +1000
committerDave Airlie <airlied@redhat.com>2012-10-03 10:32:58 +1000
commit8ff1f792dd68ad46f3cfe01e01a375b402cf08da (patch)
treeb73b4fa71c5a9357931360d7894beee247cd464e /drivers/gpu/drm/radeon/r600_blit.c
parent2216c9e74fb3baac3cb73952158dbe38b703997e (diff)
parent82ffd92b162ece87c863c075d993c65333e8e78b (diff)
Merge branch 'drm-next-3.7' of git://people.freedesktop.org/~agd5f/linux into drm-next
Alex writes: "The big changes for 3.7 include: - Asynchronous VM page table updates for Cayman/SI - 2 level VM page table support. Saves memory compared to 1 level page tables. - Reworked PLL handing in the display code allows lots more combinations of monitors to work, including more than two DP displays assuming compatible clocks across shared PLLs. This also allows us to power down extra PLLs when we can share a single one across multiple displays which saves power. - Native backlight control on ATOMBIOS systems. - Improved ACPI support for interacting with the GPU. Fixes backlight control on some laptops. - Document AMD ACPI interfaces - Lots of code cleanup - Bug fixes" * 'drm-next-3.7' of git://people.freedesktop.org/~agd5f/linux: (79 commits) drm/radeon: add vm set_page() callback for SI drm/radeon: rework the vm_flush interface drm/radeon: use WRITE_DATA packets for vm flush on SI drm/radeon/pm: fix multi-head profile handling on BTC+ (v2) drm/radeon: fix radeon power state debug output drm/radeon: force MSIs on RS690 asics drm/radeon: Add MSI quirk for gateway RS690 drm/radeon: allow MIP_ADDRESS=0 for MSAA textures on Evergreen drm/radeon/kms: allow STRMOUT_BASE_UPDATE on RS780 and RS880 drm/radeon: add 2-level VM pagetables support v9 drm/radeon: refactor set_page chipset interface v5 drm/radeon: Fix scratch register leak in IB test. drm/radeon: restore backlight level on resume drm/radeon: add get_backlight_level callback drm/radeon: only adjust default clocks on NI GPUs drm/radeon: validate PPLL in crtc fixup drm/radeon: work around KMS modeset limitations in PLL allocation (v2) drm/radeon: make non-DP PPLL sharing more robust drm/radeon: store the encoder in the radeon_crtc drm/radeon: rework crtc pll setup to better support PPLL sharing ...
Diffstat (limited to 'drivers/gpu/drm/radeon/r600_blit.c')
-rw-r--r--drivers/gpu/drm/radeon/r600_blit.c115
1 files changed, 60 insertions, 55 deletions
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index 3c031a48205..661fec2a2cc 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -489,31 +489,36 @@ set_default_state(drm_radeon_private_t *dev_priv)
ADVANCE_RING();
}
-static uint32_t i2f(uint32_t input)
+/* 23 bits of float fractional data */
+#define I2F_FRAC_BITS 23
+#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1)
+
+/*
+ * Converts unsigned integer into 32-bit IEEE floating point representation.
+ * Will be exact from 0 to 2^24. Above that, we round towards zero
+ * as the fractional bits will not fit in a float. (It would be better to
+ * round towards even as the fpu does, but that is slower.)
+ */
+__pure uint32_t int2float(uint32_t x)
{
- u32 result, i, exponent, fraction;
-
- if ((input & 0x3fff) == 0)
- result = 0; /* 0 is a special case */
- else {
- exponent = 140; /* exponent biased by 127; */
- fraction = (input & 0x3fff) << 10; /* cheat and only
- handle numbers below 2^^15 */
- for (i = 0; i < 14; i++) {
- if (fraction & 0x800000)
- break;
- else {
- fraction = fraction << 1; /* keep
- shifting left until top bit = 1 */
- exponent = exponent - 1;
- }
- }
- result = exponent << 23 | (fraction & 0x7fffff); /* mask
- off top bit; assumed 1 */
- }
- return result;
-}
+ uint32_t msb, exponent, fraction;
+
+ /* Zero is special */
+ if (!x) return 0;
+
+ /* Get location of the most significant bit */
+ msb = __fls(x);
+ /*
+ * Use a rotate instead of a shift because that works both leftwards
+ * and rightwards due to the mod(32) behaviour. This means we don't
+ * need to check to see if we are above 2^24 or not.
+ */
+ fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK;
+ exponent = (127 + msb) << I2F_FRAC_BITS;
+
+ return fraction + exponent;
+}
static int r600_nomm_get_vb(struct drm_device *dev)
{
@@ -632,20 +637,20 @@ r600_blit_copy(struct drm_device *dev,
vb = r600_nomm_get_vb_ptr(dev);
}
- vb[0] = i2f(dst_x);
+ vb[0] = int2float(dst_x);
vb[1] = 0;
- vb[2] = i2f(src_x);
+ vb[2] = int2float(src_x);
vb[3] = 0;
- vb[4] = i2f(dst_x);
- vb[5] = i2f(h);
- vb[6] = i2f(src_x);
- vb[7] = i2f(h);
+ vb[4] = int2float(dst_x);
+ vb[5] = int2float(h);
+ vb[6] = int2float(src_x);
+ vb[7] = int2float(h);
- vb[8] = i2f(dst_x + cur_size);
- vb[9] = i2f(h);
- vb[10] = i2f(src_x + cur_size);
- vb[11] = i2f(h);
+ vb[8] = int2float(dst_x + cur_size);
+ vb[9] = int2float(h);
+ vb[10] = int2float(src_x + cur_size);
+ vb[11] = int2float(h);
/* src */
set_tex_resource(dev_priv, FMT_8,
@@ -721,20 +726,20 @@ r600_blit_copy(struct drm_device *dev,
vb = r600_nomm_get_vb_ptr(dev);
}
- vb[0] = i2f(dst_x / 4);
+ vb[0] = int2float(dst_x / 4);
vb[1] = 0;
- vb[2] = i2f(src_x / 4);
+ vb[2] = int2float(src_x / 4);
vb[3] = 0;
- vb[4] = i2f(dst_x / 4);
- vb[5] = i2f(h);
- vb[6] = i2f(src_x / 4);
- vb[7] = i2f(h);
+ vb[4] = int2float(dst_x / 4);
+ vb[5] = int2float(h);
+ vb[6] = int2float(src_x / 4);
+ vb[7] = int2float(h);
- vb[8] = i2f((dst_x + cur_size) / 4);
- vb[9] = i2f(h);
- vb[10] = i2f((src_x + cur_size) / 4);
- vb[11] = i2f(h);
+ vb[8] = int2float((dst_x + cur_size) / 4);
+ vb[9] = int2float(h);
+ vb[10] = int2float((src_x + cur_size) / 4);
+ vb[11] = int2float(h);
/* src */
set_tex_resource(dev_priv, FMT_8_8_8_8,
@@ -804,20 +809,20 @@ r600_blit_swap(struct drm_device *dev,
dx2 = dx + w;
dy2 = dy + h;
- vb[0] = i2f(dx);
- vb[1] = i2f(dy);
- vb[2] = i2f(sx);
- vb[3] = i2f(sy);
+ vb[0] = int2float(dx);
+ vb[1] = int2float(dy);
+ vb[2] = int2float(sx);
+ vb[3] = int2float(sy);
- vb[4] = i2f(dx);
- vb[5] = i2f(dy2);
- vb[6] = i2f(sx);
- vb[7] = i2f(sy2);
+ vb[4] = int2float(dx);
+ vb[5] = int2float(dy2);
+ vb[6] = int2float(sx);
+ vb[7] = int2float(sy2);
- vb[8] = i2f(dx2);
- vb[9] = i2f(dy2);
- vb[10] = i2f(sx2);
- vb[11] = i2f(sy2);
+ vb[8] = int2float(dx2);
+ vb[9] = int2float(dy2);
+ vb[10] = int2float(sx2);
+ vb[11] = int2float(sy2);
switch(cpp) {
case 4: