From 7ff64fcaa7b7ba62d12758e49643f31dd9e90ece Mon Sep 17 00:00:00 2001
From: Steven Fuerst <svfuerst@gmail.com>
Date: Wed, 15 Aug 2012 15:07:14 -0700
Subject: Rename i2f() to int2float(), and make it global so one copy can be
 removed.

Remove the copy of i2f() in r600_blit_kms.c
We rename the function to something longer now that it is a global
symbol.  This reduces the likelyhood of unintended clashes later.

This might be a candidate for inclusion inside general drm infrastructure.
However, at the moment only the radeon driver uses it.

Signed-off-by: Steven Fuerst <svfuerst@gmail.com>
---
 drivers/gpu/drm/radeon/r600_blit.c | 66 +++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

(limited to 'drivers/gpu/drm/radeon/r600_blit.c')

diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index 3c031a48205..ee1b815b568 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -489,7 +489,7 @@ set_default_state(drm_radeon_private_t *dev_priv)
 	ADVANCE_RING();
 }
 
-static uint32_t i2f(uint32_t input)
+uint32_t int2float(uint32_t input)
 {
 	u32 result, i, exponent, fraction;
 
@@ -632,20 +632,20 @@ r600_blit_copy(struct drm_device *dev,
 				vb = r600_nomm_get_vb_ptr(dev);
 			}
 
-			vb[0] = i2f(dst_x);
+			vb[0] = int2float(dst_x);
 			vb[1] = 0;
-			vb[2] = i2f(src_x);
+			vb[2] = int2float(src_x);
 			vb[3] = 0;
 
-			vb[4] = i2f(dst_x);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x);
-			vb[7] = i2f(h);
+			vb[4] = int2float(dst_x);
+			vb[5] = int2float(h);
+			vb[6] = int2float(src_x);
+			vb[7] = int2float(h);
 
-			vb[8] = i2f(dst_x + cur_size);
-			vb[9] = i2f(h);
-			vb[10] = i2f(src_x + cur_size);
-			vb[11] = i2f(h);
+			vb[8] = int2float(dst_x + cur_size);
+			vb[9] = int2float(h);
+			vb[10] = int2float(src_x + cur_size);
+			vb[11] = int2float(h);
 
 			/* src */
 			set_tex_resource(dev_priv, FMT_8,
@@ -721,20 +721,20 @@ r600_blit_copy(struct drm_device *dev,
 				vb = r600_nomm_get_vb_ptr(dev);
 			}
 
-			vb[0] = i2f(dst_x / 4);
+			vb[0] = int2float(dst_x / 4);
 			vb[1] = 0;
-			vb[2] = i2f(src_x / 4);
+			vb[2] = int2float(src_x / 4);
 			vb[3] = 0;
 
-			vb[4] = i2f(dst_x / 4);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x / 4);
-			vb[7] = i2f(h);
+			vb[4] = int2float(dst_x / 4);
+			vb[5] = int2float(h);
+			vb[6] = int2float(src_x / 4);
+			vb[7] = int2float(h);
 
-			vb[8] = i2f((dst_x + cur_size) / 4);
-			vb[9] = i2f(h);
-			vb[10] = i2f((src_x + cur_size) / 4);
-			vb[11] = i2f(h);
+			vb[8] = int2float((dst_x + cur_size) / 4);
+			vb[9] = int2float(h);
+			vb[10] = int2float((src_x + cur_size) / 4);
+			vb[11] = int2float(h);
 
 			/* src */
 			set_tex_resource(dev_priv, FMT_8_8_8_8,
@@ -804,20 +804,20 @@ r600_blit_swap(struct drm_device *dev,
 	dx2 = dx + w;
 	dy2 = dy + h;
 
-	vb[0] = i2f(dx);
-	vb[1] = i2f(dy);
-	vb[2] = i2f(sx);
-	vb[3] = i2f(sy);
+	vb[0] = int2float(dx);
+	vb[1] = int2float(dy);
+	vb[2] = int2float(sx);
+	vb[3] = int2float(sy);
 
-	vb[4] = i2f(dx);
-	vb[5] = i2f(dy2);
-	vb[6] = i2f(sx);
-	vb[7] = i2f(sy2);
+	vb[4] = int2float(dx);
+	vb[5] = int2float(dy2);
+	vb[6] = int2float(sx);
+	vb[7] = int2float(sy2);
 
-	vb[8] = i2f(dx2);
-	vb[9] = i2f(dy2);
-	vb[10] = i2f(sx2);
-	vb[11] = i2f(sy2);
+	vb[8] = int2float(dx2);
+	vb[9] = int2float(dy2);
+	vb[10] = int2float(sx2);
+	vb[11] = int2float(sy2);
 
 	switch(cpp) {
 	case 4:
-- 
cgit v1.2.3-70-g09d2


From 747f49ba67b8895a5831ab539de551b916f3738c Mon Sep 17 00:00:00 2001
From: Steven Fuerst <svfuerst@gmail.com>
Date: Wed, 15 Aug 2012 15:07:15 -0700
Subject: Replace int2float() with an optimized version.

We use __fls() to find the most significant bit.  Using that, the
loop can be avoided.  A second trick is to use the behaviour of the
rotate instructions to expand the range of the unsigned int to float
conversion to the full 32 bits in a branchless way.

The routine is now exact up to 2^24.  Above that, we truncate which
is equivalent to rounding towards zero.

Signed-off-by: Steven Fuerst <svfuerst@gmail.com>
---
 drivers/gpu/drm/radeon/r600_blit.c | 51 +++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 23 deletions(-)

(limited to 'drivers/gpu/drm/radeon/r600_blit.c')

diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index ee1b815b568..7d8ac42e384 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -489,31 +489,36 @@ set_default_state(drm_radeon_private_t *dev_priv)
 	ADVANCE_RING();
 }
 
-uint32_t int2float(uint32_t input)
+/* 23 bits of float fractional data */
+#define I2F_FRAC_BITS  23
+#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1)
+
+/*
+ * Converts unsigned integer into 32-bit IEEE floating point representation.
+ * Will be exact from 0 to 2^24.  Above that, we round towards zero
+ * as the fractional bits will not fit in a float.  (It would be better to
+ * round towards even as the fpu does, but that is slower.)
+ */
+uint32_t int2float(uint32_t x)
 {
-	u32 result, i, exponent, fraction;
-
-	if ((input & 0x3fff) == 0)
-		result = 0; /* 0 is a special case */
-	else {
-		exponent = 140; /* exponent biased by 127; */
-		fraction = (input & 0x3fff) << 10; /* cheat and only
-						      handle numbers below 2^^15 */
-		for (i = 0; i < 14; i++) {
-			if (fraction & 0x800000)
-				break;
-			else {
-				fraction = fraction << 1; /* keep
-							     shifting left until top bit = 1 */
-				exponent = exponent - 1;
-			}
-		}
-		result = exponent << 23 | (fraction & 0x7fffff); /* mask
-								    off top bit; assumed 1 */
-	}
-	return result;
-}
+	uint32_t msb, exponent, fraction;
+
+	/* Zero is special */
+	if (!x) return 0;
+
+	/* Get location of the most significant bit */
+	msb = __fls(x);
 
+	/*
+	 * Use a rotate instead of a shift because that works both leftwards
+	 * and rightwards due to the mod(32) behaviour.  This means we don't
+	 * need to check to see if we are above 2^24 or not.
+	 */
+	fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK;
+	exponent = (127 + msb) << I2F_FRAC_BITS;
+
+	return fraction + exponent;
+}
 
 static int r600_nomm_get_vb(struct drm_device *dev)
 {
-- 
cgit v1.2.3-70-g09d2


From 9e9eb7c60d57620bfe46b2a489e7f56a5925115a Mon Sep 17 00:00:00 2001
From: Steven Fuerst <svfuerst@gmail.com>
Date: Wed, 15 Aug 2012 15:07:16 -0700
Subject: Annotate int2float() as being a pure function.

This allows gcc to fold duplicate calls into a single call.  Since
the current users do actually call it multiple times with the
same arguments, this is an obvious win.

Signed-off-by: Steven Fuerst <svfuerst@gmail.com>
---
 drivers/gpu/drm/radeon/r600_blit.c         | 2 +-
 drivers/gpu/drm/radeon/r600_blit_shaders.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/radeon/r600_blit.c')

diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index 7d8ac42e384..661fec2a2cc 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -499,7 +499,7 @@ set_default_state(drm_radeon_private_t *dev_priv)
  * as the fractional bits will not fit in a float.  (It would be better to
  * round towards even as the fpu does, but that is slower.)
  */
-uint32_t int2float(uint32_t x)
+__pure uint32_t int2float(uint32_t x)
 {
 	uint32_t msb, exponent, fraction;
 
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/radeon/r600_blit_shaders.h
index e17c2cbc662..2f3ce7a7597 100644
--- a/drivers/gpu/drm/radeon/r600_blit_shaders.h
+++ b/drivers/gpu/drm/radeon/r600_blit_shaders.h
@@ -35,5 +35,5 @@ extern const u32 r6xx_default_state[];
 extern const u32 r6xx_ps_size, r6xx_vs_size;
 extern const u32 r6xx_default_size, r7xx_default_size;
 
-uint32_t int2float(uint32_t x);
+__pure uint32_t int2float(uint32_t x);
 #endif
-- 
cgit v1.2.3-70-g09d2