25 files changed, 424 insertions, 301 deletions
diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index 52930a20d8f..998e8b4444f 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -160,6 +160,7 @@ nouveau-y += core/engine/disp/nva0.o
 nouveau-y += core/engine/disp/nva3.o
 nouveau-y += core/engine/disp/nvd0.o
 nouveau-y += core/engine/disp/nve0.o
+nouveau-y += core/engine/disp/nvf0.o
 nouveau-y += core/engine/disp/dacnv50.o
 nouveau-y += core/engine/disp/dport.o
 nouveau-y += core/engine/disp/hdanva3.o
diff --git a/drivers/gpu/drm/nouveau/core/engine/device/base.c b/drivers/gpu/drm/nouveau/core/engine/device/base.c
index 86d24904e9d..4c72571655a 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/base.c
@@ -172,7 +172,8 @@ nouveau_devobj_ctor(struct nouveau_object *parent,
 			case 0xa0: device->card_type = NV_50; break;
 			case 0xc0: device->card_type = NV_C0; break;
 			case 0xd0: device->card_type = NV_D0; break;
-			case 0xe0: device->card_type = NV_E0; break;
+			case 0xe0:
+			case 0xf0: device->card_type = NV_E0; break;
 			default:
 				break;
 			}
diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c
index e6a77944f43..a354e409cdf 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c
@@ -141,6 +141,40 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		break;
+	case 0xf0:
+		device->cname = "GK110";
+		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nve0_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] = &nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
+		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
+		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
+		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
+		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nvc0_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_LTCG   ] = &nvc0_ltcg_oclass;
+		device->oclass[NVDEV_SUBDEV_IBUS   ] = &nve0_ibus_oclass;
+		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv50_instmem_oclass;
+		device->oclass[NVDEV_SUBDEV_VM     ] = &nvc0_vmmgr_oclass;
+		device->oclass[NVDEV_SUBDEV_BAR    ] = &nvc0_bar_oclass;
+		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvd0_dmaeng_oclass;
+#if 0
+		device->oclass[NVDEV_ENGINE_FIFO   ] = &nve0_fifo_oclass;
+		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
+		device->oclass[NVDEV_ENGINE_GR     ] = &nve0_graph_oclass;
+#endif
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nvf0_disp_oclass;
+#if 0
+		device->oclass[NVDEV_ENGINE_COPY0  ] = &nve0_copy0_oclass;
+		device->oclass[NVDEV_ENGINE_COPY1  ] = &nve0_copy1_oclass;
+		device->oclass[NVDEV_ENGINE_COPY2  ] = &nve0_copy2_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nve0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
+#endif
+		break;
 	default:
 		nv_fatal(device, "unknown Kepler chipset\n");
 		return -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c
new file mode 100644
index 00000000000..a488c36e40f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <engine/software.h>
+#include <engine/disp.h>
+
+#include <core/class.h>
+
+#include "nv50.h"
+
+static struct nouveau_oclass
+nvf0_disp_sclass[] = {
+	{ NVF0_DISP_MAST_CLASS, &nvd0_disp_mast_ofuncs },
+	{ NVF0_DISP_SYNC_CLASS, &nvd0_disp_sync_ofuncs },
+	{ NVF0_DISP_OVLY_CLASS, &nvd0_disp_ovly_ofuncs },
+	{ NVF0_DISP_OIMM_CLASS, &nvd0_disp_oimm_ofuncs },
+	{ NVF0_DISP_CURS_CLASS, &nvd0_disp_curs_ofuncs },
+	{}
+};
+
+static struct nouveau_oclass
+nvf0_disp_base_oclass[] = {
+	{ NVF0_DISP_CLASS, &nvd0_disp_base_ofuncs, nva3_disp_base_omthds },
+	{}
+};
+
+static int
+nvf0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
+{
+	struct nv50_disp_priv *priv;
+	int heads = nv_rd32(parent, 0x022448);
+	int ret;
+
+	ret = nouveau_disp_create(parent, engine, oclass, heads,
+				  "PDISP", "display", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_engine(priv)->sclass = nvf0_disp_base_oclass;
+	nv_engine(priv)->cclass = &nv50_disp_cclass;
+	nv_subdev(priv)->intr = nvd0_disp_intr;
+	INIT_WORK(&priv->supervisor, nvd0_disp_intr_supervisor);
+	priv->sclass = nvf0_disp_sclass;
+	priv->head.nr = heads;
+	priv->dac.nr = 3;
+	priv->sor.nr = 4;
+	priv->dac.power = nv50_dac_power;
+	priv->dac.sense = nv50_dac_sense;
+	priv->sor.power = nv50_sor_power;
+	priv->sor.hda_eld = nvd0_hda_eld;
+	priv->sor.hdmi = nvd0_hdmi_ctrl;
+	priv->sor.dp = &nvd0_sor_dp_func;
+	return 0;
+}
+
+struct nouveau_oclass
+nvf0_disp_oclass = {
+	.handle = NV_ENGINE(DISP, 0x92),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nvf0_disp_ctor,
+		.dtor = _nouveau_disp_dtor,
+		.init = _nouveau_disp_init,
+		.fini = _nouveau_disp_fini,
+	},
+};
diff --git a/drivers/gpu/drm/nouveau/core/engine/dmaobj/nvd0.c b/drivers/gpu/drm/nouveau/core/engine/dmaobj/nvd0.c
index d1528752980..944e73ac485 100644
--- a/drivers/gpu/drm/nouveau/core/engine/dmaobj/nvd0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/dmaobj/nvd0.c
@@ -50,6 +50,9 @@ nvd0_dmaobj_bind(struct nouveau_dmaeng *dmaeng,
 		case NVE0_DISP_MAST_CLASS:
 		case NVE0_DISP_SYNC_CLASS:
 		case NVE0_DISP_OVLY_CLASS:
+		case NVF0_DISP_MAST_CLASS:
+		case NVF0_DISP_SYNC_CLASS:
+		case NVF0_DISP_OVLY_CLASS:
 			break;
 		default:
 			return -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/core/include/core/class.h b/drivers/gpu/drm/nouveau/core/include/core/class.h
index 92d3ab11d96..0a393f7f055 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/class.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/class.h
@@ -169,6 +169,7 @@ struct nv04_display_class {
  * 8570: NVA3_DISP
  * 9070: NVD0_DISP
  * 9170: NVE0_DISP
+ * 9270: NVF0_DISP
  */
 
 #define NV50_DISP_CLASS                                              0x00005070
@@ -178,6 +179,7 @@ struct nv04_display_class {
 #define NVA3_DISP_CLASS                                              0x00008570
 #define NVD0_DISP_CLASS                                              0x00009070
 #define NVE0_DISP_CLASS                                              0x00009170
+#define NVF0_DISP_CLASS                                              0x00009270
 
 #define NV50_DISP_SOR_MTHD                                           0x00010000
 #define NV50_DISP_SOR_MTHD_TYPE                                      0x0000f000
@@ -246,6 +248,7 @@ struct nv50_display_class {
  * 857a: NVA3_DISP_CURS
  * 907a: NVD0_DISP_CURS
  * 917a: NVE0_DISP_CURS
+ * 927a: NVF0_DISP_CURS
  */
 
 #define NV50_DISP_CURS_CLASS                                         0x0000507a
@@ -255,6 +258,7 @@ struct nv50_display_class {
 #define NVA3_DISP_CURS_CLASS                                         0x0000857a
 #define NVD0_DISP_CURS_CLASS                                         0x0000907a
 #define NVE0_DISP_CURS_CLASS                                         0x0000917a
+#define NVF0_DISP_CURS_CLASS                                         0x0000927a
 
 struct nv50_display_curs_class {
 	u32 head;
@@ -267,6 +271,7 @@ struct nv50_display_curs_class {
  * 857b: NVA3_DISP_OIMM
  * 907b: NVD0_DISP_OIMM
  * 917b: NVE0_DISP_OIMM
+ * 927b: NVE0_DISP_OIMM
  */
 
 #define NV50_DISP_OIMM_CLASS                                         0x0000507b
@@ -276,6 +281,7 @@ struct nv50_display_curs_class {
 #define NVA3_DISP_OIMM_CLASS                                         0x0000857b
 #define NVD0_DISP_OIMM_CLASS                                         0x0000907b
 #define NVE0_DISP_OIMM_CLASS                                         0x0000917b
+#define NVF0_DISP_OIMM_CLASS                                         0x0000927b
 
 struct nv50_display_oimm_class {
 	u32 head;
@@ -288,6 +294,7 @@ struct nv50_display_oimm_class {
  * 857c: NVA3_DISP_SYNC
  * 907c: NVD0_DISP_SYNC
  * 917c: NVE0_DISP_SYNC
+ * 927c: NVF0_DISP_SYNC
  */
 
 #define NV50_DISP_SYNC_CLASS                                         0x0000507c
@@ -297,6 +304,7 @@ struct nv50_display_oimm_class {
 #define NVA3_DISP_SYNC_CLASS                                         0x0000857c
 #define NVD0_DISP_SYNC_CLASS                                         0x0000907c
 #define NVE0_DISP_SYNC_CLASS                                         0x0000917c
+#define NVF0_DISP_SYNC_CLASS                                         0x0000927c
 
 struct nv50_display_sync_class {
 	u32 pushbuf;
@@ -310,6 +318,7 @@ struct nv50_display_sync_class {
  * 857d: NVA3_DISP_MAST
  * 907d: NVD0_DISP_MAST
  * 917d: NVE0_DISP_MAST
+ * 927d: NVF0_DISP_MAST
  */
 
 #define NV50_DISP_MAST_CLASS                                         0x0000507d
@@ -319,6 +328,7 @@ struct nv50_display_sync_class {
 #define NVA3_DISP_MAST_CLASS                                         0x0000857d
 #define NVD0_DISP_MAST_CLASS                                         0x0000907d
 #define NVE0_DISP_MAST_CLASS                                         0x0000917d
+#define NVF0_DISP_MAST_CLASS                                         0x0000927d
 
 struct nv50_display_mast_class {
 	u32 pushbuf;
@@ -331,6 +341,7 @@ struct nv50_display_mast_class {
  * 857e: NVA3_DISP_OVLY
  * 907e: NVD0_DISP_OVLY
  * 917e: NVE0_DISP_OVLY
+ * 927e: NVF0_DISP_OVLY
  */
 
 #define NV50_DISP_OVLY_CLASS                                         0x0000507e
@@ -340,6 +351,7 @@ struct nv50_display_mast_class {
 #define NVA3_DISP_OVLY_CLASS                                         0x0000857e
 #define NVD0_DISP_OVLY_CLASS                                         0x0000907e
 #define NVE0_DISP_OVLY_CLASS                                         0x0000917e
+#define NVF0_DISP_OVLY_CLASS                                         0x0000927e
 
 struct nv50_display_ovly_class {
 	u32 pushbuf;
diff --git a/drivers/gpu/drm/nouveau/core/include/engine/disp.h b/drivers/gpu/drm/nouveau/core/include/engine/disp.h
index 28da6772c09..4b21fabfbdd 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/disp.h
@@ -44,5 +44,6 @@ extern struct nouveau_oclass nv94_disp_oclass;
 extern struct nouveau_oclass nva3_disp_oclass;
 extern struct nouveau_oclass nvd0_disp_oclass;
 extern struct nouveau_oclass nve0_disp_oclass;
+extern struct nouveau_oclass nvf0_disp_oclass;
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index c33b13fb18d..46c152ff0a8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -33,6 +33,7 @@
 
 #include <engine/device.h>
 #include <engine/disp.h>
+#include <engine/fifo.h>
 
 #include <subdev/vm.h>
 
@@ -164,7 +165,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	u32 arg0, arg1;
 	int ret;
 
-	if (nouveau_noaccel)
+	if (nouveau_noaccel || !nouveau_fifo(device) /*XXX*/)
 		return;
 
 	/* initialise synchronisation routines */
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 7f0e6c3f37d..32588739067 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -2174,6 +2174,7 @@ int
 nv50_display_create(struct drm_device *dev)
 {
 	static const u16 oclass[] = {
+		NVF0_DISP_CLASS,
 		NVE0_DISP_CLASS,
 		NVD0_DISP_CLASS,
 		NVA3_DISP_CLASS,
diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index 46a9c377285..fb441a790f3 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -1394,10 +1394,10 @@ int atom_allocate_fb_scratch(struct atom_context *ctx)
 		firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset);
 
 		DRM_DEBUG("atom firmware requested %08x %dkb\n",
-			  firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware,
-			  firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb);
+			  le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware),
+			  le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb));
 
-		usage_bytes = firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb * 1024;
+		usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
 	}
 	ctx->scratch_size_bytes = 0;
 	if (usage_bytes == 0)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 1531f167d15..105bafb6c29 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -989,62 +989,10 @@ done:
 	return r;
 }
 
-static int evergreen_uvd_calc_post_div(unsigned target_freq,
-				       unsigned vco_freq,
-				       unsigned *div)
-{
-	/* target larger than vco frequency ? */
-	if (vco_freq < target_freq)
-		return -1; /* forget it */
-
-	/* Fclk = Fvco / PDIV */
-	*div = vco_freq / target_freq;
-
-	/* we alway need a frequency less than or equal the target */
-	if ((vco_freq / *div) > target_freq)
-		*div += 1;
-
-	/* dividers above 5 must be even */
-	if (*div > 5 && *div % 2)
-		*div += 1;
-
-	/* out of range ? */
-	if (*div >= 128)
-		return -1; /* forget it */
-
-	return vco_freq / *div;
-}
-
-static int evergreen_uvd_send_upll_ctlreq(struct radeon_device *rdev)
-{
-	unsigned i;
-
-	/* assert UPLL_CTLREQ */
-	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
-
-	/* wait for CTLACK and CTLACK2 to get asserted */
-	for (i = 0; i < 100; ++i) {
-		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
-		if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
-			break;
-		mdelay(10);
-	}
-	if (i == 100)
-		return -ETIMEDOUT;
-
-	/* deassert UPLL_CTLREQ */
-	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
-
-	return 0;
-}
-
 int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 {
 	/* start off with something large */
-	int optimal_diff_score = 0x7FFFFFF;
-	unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
-	unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
-	unsigned vco_freq;
+	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
 	int r;
 
 	/* bypass vclk and dclk with bclk */
@@ -1061,40 +1009,11 @@ int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 		return 0;
 	}
 
-	/* loop through vco from low to high */
-	for (vco_freq = 125000; vco_freq <= 250000; vco_freq += 100) {
-		unsigned fb_div = vco_freq / rdev->clock.spll.reference_freq * 16384;
-		int calc_clk, diff_score, diff_vclk, diff_dclk;
-		unsigned vclk_div, dclk_div;
-
-		/* fb div out of range ? */
-		if (fb_div > 0x03FFFFFF)
-			break; /* it can oly get worse */
-
-		/* calc vclk with current vco freq. */
-		calc_clk = evergreen_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
-		if (calc_clk == -1)
-			break; /* vco is too big, it has to stop. */
-		diff_vclk = vclk - calc_clk;
-
-		/* calc dclk with current vco freq. */
-		calc_clk = evergreen_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
-		if (calc_clk == -1)
-			break; /* vco is too big, it has to stop. */
-		diff_dclk = dclk - calc_clk;
-
-		/* determine if this vco setting is better than current optimal settings */
-		diff_score = abs(diff_vclk) + abs(diff_dclk);
-		if (diff_score < optimal_diff_score) {
-			optimal_fb_div = fb_div;
-			optimal_vclk_div = vclk_div;
-			optimal_dclk_div = dclk_div;
-			optimal_vco_freq = vco_freq;
-			optimal_diff_score = diff_score;
-			if (optimal_diff_score == 0)
-				break; /* it can't get better than this */
-		}
-	}
+	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
+					  16384, 0x03FFFFFF, 0, 128, 5,
+					  &fb_div, &vclk_div, &dclk_div);
+	if (r)
+		return r;
 
 	/* set VCO_MODE to 1 */
 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
@@ -1108,7 +1027,7 @@ int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 
 	mdelay(1);
 
-	r = evergreen_uvd_send_upll_ctlreq(rdev);
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
 	if (r)
 		return r;
 
@@ -1119,19 +1038,19 @@ int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
 
 	/* set feedback divider */
-	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div), ~UPLL_FB_DIV_MASK);
+	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
 
 	/* set ref divider to 0 */
 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
 
-	if (optimal_vco_freq < 187500)
+	if (fb_div < 307200)
 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
 	else
 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
 
 	/* set PDIV_A and PDIV_B */
 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
-		UPLL_PDIV_A(optimal_vclk_div) | UPLL_PDIV_B(optimal_dclk_div),
+		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
 
 	/* give the PLL some time to settle */
@@ -1145,7 +1064,7 @@ int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 	/* switch from bypass mode to normal mode */
 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
 
-	r = evergreen_uvd_send_upll_ctlreq(rdev);
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index d9a00543108..75c05631146 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -59,7 +59,7 @@
 #	define UPLL_SLEEP_MASK				0x00000002
 #	define UPLL_BYPASS_EN_MASK			0x00000004
 #	define UPLL_CTLREQ_MASK				0x00000008
-#	define UPLL_REF_DIV_MASK			0x001F0000
+#	define UPLL_REF_DIV_MASK			0x003F0000
 #	define UPLL_VCO_MODE_MASK			0x00000200
 #	define UPLL_CTLACK_MASK				0x40000000
 #	define UPLL_CTLACK2_MASK			0x80000000
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 7436b91699d..7969c0c8ec2 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -749,7 +749,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 		    (rdev->pdev->device == 0x990F) ||
 		    (rdev->pdev->device == 0x9910) ||
 		    (rdev->pdev->device == 0x9917) ||
-		    (rdev->pdev->device == 0x9999)) {
+		    (rdev->pdev->device == 0x9999) ||
+		    (rdev->pdev->device == 0x999C)) {
 			rdev->config.cayman.max_simds_per_se = 6;
 			rdev->config.cayman.max_backends_per_se = 2;
 		} else if ((rdev->pdev->device == 0x9903) ||
@@ -758,7 +759,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 			   (rdev->pdev->device == 0x990D) ||
 			   (rdev->pdev->device == 0x990E) ||
 			   (rdev->pdev->device == 0x9913) ||
-			   (rdev->pdev->device == 0x9918)) {
+			   (rdev->pdev->device == 0x9918) ||
+			   (rdev->pdev->device == 0x999D)) {
 			rdev->config.cayman.max_simds_per_se = 4;
 			rdev->config.cayman.max_backends_per_se = 2;
 		} else if ((rdev->pdev->device == 0x9919) ||
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 6105b25b18c..acb146c0697 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -1208,6 +1208,10 @@
 
 #define UVD_CONTEXT_ID					0xf6f4
 
+#	define UPLL_CTLREQ_MASK				0x00000008
+#	define UPLL_CTLACK_MASK				0x40000000
+#	define UPLL_CTLACK2_MASK			0x80000000
+
 /*
  * PM4
  */
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index d6c8cbaa869..1442ce765d4 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -358,7 +358,8 @@ struct radeon_bo {
 	struct radeon_device		*rdev;
 	struct drm_gem_object		gem_base;
 
-	struct ttm_bo_kmap_obj dma_buf_vmap;
+	struct ttm_bo_kmap_obj		dma_buf_vmap;
+	pid_t				pid;
 };
 #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base)
 
@@ -372,6 +373,8 @@ struct radeon_bo_list {
 	u32			tiling_flags;
 };
 
+int radeon_gem_debugfs_init(struct radeon_device *rdev);
+
 /* sub-allocation manager, it has to be protected by another lock.
  * By conception this is an helper for other part of the driver
  * like the indirect buffer or semaphore, which both have their
@@ -1159,6 +1162,17 @@ void radeon_uvd_free_handles(struct radeon_device *rdev,
 			     struct drm_file *filp);
 int radeon_uvd_cs_parse(struct radeon_cs_parser *parser);
 void radeon_uvd_note_usage(struct radeon_device *rdev);
+int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
+				  unsigned vclk, unsigned dclk,
+				  unsigned vco_min, unsigned vco_max,
+				  unsigned fb_factor, unsigned fb_mask,
+				  unsigned pd_min, unsigned pd_max,
+				  unsigned pd_even,
+				  unsigned *optimal_fb_div,
+				  unsigned *optimal_vclk_div,
+				  unsigned *optimal_dclk_div);
+int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
+                                unsigned cg_upll_func_cntl);
 
 struct r600_audio {
 	int			channels;
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 0dd87c0e0fa..dea6f63c972 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2028,6 +2028,8 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
 	num_modes = power_info->info.ucNumOfPowerModeEntries;
 	if (num_modes > ATOM_MAX_NUMBEROF_POWER_BLOCK)
 		num_modes = ATOM_MAX_NUMBEROF_POWER_BLOCK;
+	if (num_modes == 0)
+		return state_index;
 	rdev->pm.power_state = kzalloc(sizeof(struct radeon_power_state) * num_modes, GFP_KERNEL);
 	if (!rdev->pm.power_state)
 		return state_index;
@@ -2307,7 +2309,7 @@ static void radeon_atombios_parse_pplib_non_clock_info(struct radeon_device *rde
 		rdev->pm.default_power_state_index = state_index;
 		rdev->pm.power_state[state_index].default_clock_mode =
 			&rdev->pm.power_state[state_index].clock_info[mode_index - 1];
-		if (ASIC_IS_DCE5(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
+		if ((rdev->family >= CHIP_BARTS) && !(rdev->flags & RADEON_IS_IGP)) {
 			/* NI chips post without MC ucode, so default clocks are strobe mode only */
 			rdev->pm.default_sclk = rdev->pm.power_state[state_index].clock_info[0].sclk;
 			rdev->pm.default_mclk = rdev->pm.power_state[state_index].clock_info[0].mclk;
@@ -2345,7 +2347,7 @@ static bool radeon_atombios_parse_pplib_clock_info(struct radeon_device *rdev,
 			sclk |= clock_info->rs780.ucLowEngineClockHigh << 16;
 			rdev->pm.power_state[state_index].clock_info[mode_index].sclk = sclk;
 		}
-	} else if (ASIC_IS_DCE6(rdev)) {
+	} else if (rdev->family >= CHIP_TAHITI) {
 		sclk = le16_to_cpu(clock_info->si.usEngineClockLow);
 		sclk |= clock_info->si.ucEngineClockHigh << 16;
 		mclk = le16_to_cpu(clock_info->si.usMemoryClockLow);
@@ -2358,7 +2360,7 @@ static bool radeon_atombios_parse_pplib_clock_info(struct radeon_device *rdev,
 			le16_to_cpu(clock_info->si.usVDDC);
 		rdev->pm.power_state[state_index].clock_info[mode_index].voltage.vddci =
 			le16_to_cpu(clock_info->si.usVDDCI);
-	} else if (ASIC_IS_DCE4(rdev)) {
+	} else if (rdev->family >= CHIP_CEDAR) {
 		sclk = le16_to_cpu(clock_info->evergreen.usEngineClockLow);
 		sclk |= clock_info->evergreen.ucEngineClockHigh << 16;
 		mclk = le16_to_cpu(clock_info->evergreen.usMemoryClockLow);
@@ -2432,6 +2434,8 @@ static int radeon_atombios_parse_power_table_4_5(struct radeon_device *rdev)
 	power_info = (union power_info *)(mode_info->atom_context->bios + data_offset);
 
 	radeon_atombios_add_pplib_thermal_controller(rdev, &power_info->pplib.sThermalController);
+	if (power_info->pplib.ucNumStates == 0)
+		return state_index;
 	rdev->pm.power_state = kzalloc(sizeof(struct radeon_power_state) *
 				       power_info->pplib.ucNumStates, GFP_KERNEL);
 	if (!rdev->pm.power_state)
@@ -2514,6 +2518,7 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev)
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
         u16 data_offset;
 	u8 frev, crev;
+	u8 *power_state_offset;
 
 	if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
 				   &frev, &crev, &data_offset))
@@ -2530,15 +2535,17 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev)
 	non_clock_info_array = (struct _NonClockInfoArray *)
 		(mode_info->atom_context->bios + data_offset +
 		 le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset));
+	if (state_array->ucNumEntries == 0)
+		return state_index;
 	rdev->pm.power_state = kzalloc(sizeof(struct radeon_power_state) *
 				       state_array->ucNumEntries, GFP_KERNEL);
 	if (!rdev->pm.power_state)
 		return state_index;
+	power_state_offset = (u8 *)state_array->states;
 	for (i = 0; i < state_array->ucNumEntries; i++) {
 		mode_index = 0;
-		power_state = (union pplib_power_state *)&state_array->states[i];
-		/* XXX this might be an inagua bug... */
-		non_clock_array_index = i; /* power_state->v2.nonClockInfoIndex */
+		power_state = (union pplib_power_state *)power_state_offset;
+		non_clock_array_index = power_state->v2.nonClockInfoIndex;
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
 			&non_clock_info_array->nonClockInfo[non_clock_array_index];
 		rdev->pm.power_state[i].clock_info = kzalloc(sizeof(struct radeon_pm_clock_info) *
@@ -2550,9 +2557,6 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev)
 		if (power_state->v2.ucNumDPMLevels) {
 			for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) {
 				clock_array_index = power_state->v2.clockInfoIndex[j];
-				/* XXX this might be an inagua bug... */
-				if (clock_array_index >= clock_info_array->ucNumEntries)
-					continue;
 				clock_info = (union pplib_clock_info *)
 					&clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
 				valid = radeon_atombios_parse_pplib_clock_info(rdev,
@@ -2574,6 +2578,7 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev)
 								   non_clock_info);
 			state_index++;
 		}
+		power_state_offset += 2 + power_state->v2.ucNumDPMLevels;
 	}
 	/* if multiple clock modes, mark the lowest as no display */
 	for (i = 0; i < state_index; i++) {
@@ -2620,7 +2625,9 @@ void radeon_atombios_get_power_modes(struct radeon_device *rdev)
 		default:
 			break;
 		}
-	} else {
+	}
+
+	if (state_index == 0) {
 		rdev->pm.power_state = kzalloc(sizeof(struct radeon_power_state), GFP_KERNEL);
 		if (rdev->pm.power_state) {
 			rdev->pm.power_state[0].clock_info =
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 237b7a7549e..a8f60890398 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1178,6 +1178,11 @@ int radeon_device_init(struct radeon_device *rdev,
 	if (r)
 		DRM_ERROR("ib ring test failed (%d).\n", r);
 
+	r = radeon_gem_debugfs_init(rdev);
+	if (r) {
+		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
+	}
+
 	if (rdev->flags & RADEON_IS_AGP && !rdev->accel_working) {
 		/* Acceleration not working on AGP card try again
 		 * with fallback to PCI or PCIE GART
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 1a699cefaac..5b937dfe6f6 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -767,8 +767,8 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 
 	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 	if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
+		rdev->fence_drv[ring].scratch_reg = 0;
 		if (ring != R600_RING_TYPE_UVD_INDEX) {
-			rdev->fence_drv[ring].scratch_reg = 0;
 			index = R600_WB_EVENT_OFFSET + ring * 4;
 			rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
 			rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index fe5c1f6b795..aa796031ab6 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -84,6 +84,7 @@ retry:
 		return r;
 	}
 	*obj = &robj->gem_base;
+	robj->pid = task_pid_nr(current);
 
 	mutex_lock(&rdev->gem.mutex);
 	list_add_tail(&robj->list, &rdev->gem.objects);
@@ -575,3 +576,52 @@ int radeon_mode_dumb_destroy(struct drm_file *file_priv,
 {
 	return drm_gem_handle_delete(file_priv, handle);
 }
+
+#if defined(CONFIG_DEBUG_FS)
+static int radeon_debugfs_gem_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_bo *rbo;
+	unsigned i = 0;
+
+	mutex_lock(&rdev->gem.mutex);
+	list_for_each_entry(rbo, &rdev->gem.objects, list) {
+		unsigned domain;
+		const char *placement;
+
+		domain = radeon_mem_type_to_domain(rbo->tbo.mem.mem_type);
+		switch (domain) {
+		case RADEON_GEM_DOMAIN_VRAM:
+			placement = "VRAM";
+			break;
+		case RADEON_GEM_DOMAIN_GTT:
+			placement = " GTT";
+			break;
+		case RADEON_GEM_DOMAIN_CPU:
+		default:
+			placement = " CPU";
+			break;
+		}
+		seq_printf(m, "bo[0x%08x] %8ldkB %8ldMB %s pid %8ld\n",
+			   i, radeon_bo_size(rbo) >> 10, radeon_bo_size(rbo) >> 20,
+			   placement, (unsigned long)rbo->pid);
+		i++;
+	}
+	mutex_unlock(&rdev->gem.mutex);
+	return 0;
+}
+
+static struct drm_info_list radeon_debugfs_gem_list[] = {
+	{"radeon_gem_info", &radeon_debugfs_gem_info, 0, NULL},
+};
+#endif
+
+int radeon_gem_debugfs_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, radeon_debugfs_gem_list, 1);
+#endif
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index cb800995d4f..0abe5a9431b 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -64,7 +64,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 	}
 
 	r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_CPU, NULL, &sa_manager->bo);
+			     domain, NULL, &sa_manager->bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r);
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 0312a7f4d76..906e5c0ca3b 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -692,3 +692,140 @@ void radeon_uvd_note_usage(struct radeon_device *rdev)
 	if (set_clocks)
 		radeon_set_uvd_clocks(rdev, 53300, 40000);
 }
+
+static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq,
+					      unsigned target_freq,
+					      unsigned pd_min,
+					      unsigned pd_even)
+{
+	unsigned post_div = vco_freq / target_freq;
+
+	/* adjust to post divider minimum value */
+	if (post_div < pd_min)
+		post_div = pd_min;
+
+	/* we alway need a frequency less than or equal the target */
+	if ((vco_freq / post_div) > target_freq)
+		post_div += 1;
+
+	/* post dividers above a certain value must be even */
+	if (post_div > pd_even && post_div % 2)
+		post_div += 1;
+
+	return post_div;
+}
+
+/**
+ * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers
+ *
+ * @rdev: radeon_device pointer
+ * @vclk: wanted VCLK
+ * @dclk: wanted DCLK
+ * @vco_min: minimum VCO frequency
+ * @vco_max: maximum VCO frequency
+ * @fb_factor: factor to multiply vco freq with
+ * @fb_mask: limit and bitmask for feedback divider
+ * @pd_min: post divider minimum
+ * @pd_max: post divider maximum
+ * @pd_even: post divider must be even above this value
+ * @optimal_fb_div: resulting feedback divider
+ * @optimal_vclk_div: resulting vclk post divider
+ * @optimal_dclk_div: resulting dclk post divider
+ *
+ * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs).
+ * Returns zero on success -EINVAL on error.
+ */
+int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
+				  unsigned vclk, unsigned dclk,
+				  unsigned vco_min, unsigned vco_max,
+				  unsigned fb_factor, unsigned fb_mask,
+				  unsigned pd_min, unsigned pd_max,
+				  unsigned pd_even,
+				  unsigned *optimal_fb_div,
+				  unsigned *optimal_vclk_div,
+				  unsigned *optimal_dclk_div)
+{
+	unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq;
+
+	/* start off with something large */
+	unsigned optimal_score = ~0;
+
+	/* loop through vco from low to high */
+	vco_min = max(max(vco_min, vclk), dclk);
+	for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
+
+		uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
+		unsigned vclk_div, dclk_div, score;
+
+		do_div(fb_div, ref_freq);
+
+		/* fb div out of range ? */
+		if (fb_div > fb_mask)
+			break; /* it can oly get worse */
+
+		fb_div &= fb_mask;
+
+		/* calc vclk divider with current vco freq */
+		vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk,
+							 pd_min, pd_even);
+		if (vclk_div > pd_max)
+			break; /* vco is too big, it has to stop */
+
+		/* calc dclk divider with current vco freq */
+		dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk,
+							 pd_min, pd_even);
+		if (vclk_div > pd_max)
+			break; /* vco is too big, it has to stop */
+
+		/* calc score with current vco freq */
+		score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
+
+		/* determine if this vco setting is better than current optimal settings */
+		if (score < optimal_score) {
+			*optimal_fb_div = fb_div;
+			*optimal_vclk_div = vclk_div;
+			*optimal_dclk_div = dclk_div;
+			optimal_score = score;
+			if (optimal_score == 0)
+				break; /* it can't get better than this */
+		}
+	}
+
+	/* did we found a valid setup ? */
+	if (optimal_score == ~0)
+		return -EINVAL;
+
+	return 0;
+}
+
+int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
+				unsigned cg_upll_func_cntl)
+{
+	unsigned i;
+
+	/* make sure UPLL_CTLREQ is deasserted */
+	WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
+
+	mdelay(10);
+
+	/* assert UPLL_CTLREQ */
+	WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
+
+	/* wait for CTLACK and CTLACK2 to get asserted */
+	for (i = 0; i < 100; ++i) {
+		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
+		if ((RREG32(cg_upll_func_cntl) & mask) == mask)
+			break;
+		mdelay(10);
+	}
+
+	/* deassert UPLL_CTLREQ */
+	WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
+
+	if (i == 100) {
+		DRM_ERROR("Timeout setting UVD clocks!\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 91530d4c11c..83f612a9500 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -44,56 +44,9 @@ void rv770_fini(struct radeon_device *rdev);
 static void rv770_pcie_gen2_enable(struct radeon_device *rdev);
 int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
 
-static int rv770_uvd_calc_post_div(unsigned target_freq,
-				   unsigned vco_freq,
-				   unsigned *div)
-{
-	/* Fclk = Fvco / PDIV */
-	*div = vco_freq / target_freq;
-
-	/* we alway need a frequency less than or equal the target */
-	if ((vco_freq / *div) > target_freq)
-		*div += 1;
-
-	/* out of range ? */
-	if (*div > 30)
-		return -1; /* forget it */
-
-	*div -= 1;
-	return vco_freq / (*div + 1);
-}
-
-static int rv770_uvd_send_upll_ctlreq(struct radeon_device *rdev)
-{
-	unsigned i;
-
-	/* assert UPLL_CTLREQ */
-	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
-
-	/* wait for CTLACK and CTLACK2 to get asserted */
-	for (i = 0; i < 100; ++i) {
-		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
-		if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
-			break;
-		mdelay(10);
-	}
-	if (i == 100)
-		return -ETIMEDOUT;
-
-	/* deassert UPLL_CTLREQ */
-	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
-
-	return 0;
-}
-
 int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 {
-	/* start off with something large */
-	int optimal_diff_score = 0x7FFFFFF;
-	unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
-	unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
-	unsigned vco_freq, vco_min = 50000, vco_max = 160000;
-	unsigned ref_freq = rdev->clock.spll.reference_freq;
+	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
 	int r;
 
 	/* RV740 uses evergreen uvd clk programming */
@@ -111,44 +64,15 @@ int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 		return 0;
 	}
 
-	/* loop through vco from low to high */
-	vco_min = max(max(vco_min, vclk), dclk);
-	for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 500) {
-		uint64_t fb_div = (uint64_t)vco_freq * 43663;
-		int calc_clk, diff_score, diff_vclk, diff_dclk;
-		unsigned vclk_div, dclk_div;
-
-		do_div(fb_div, ref_freq);
-		fb_div |= 1;
-
-		/* fb div out of range ? */
-		if (fb_div > 0x03FFFFFF)
-			break; /* it can oly get worse */
-
-		/* calc vclk with current vco freq. */
-		calc_clk = rv770_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
-		if (calc_clk == -1)
-			break; /* vco is too big, it has to stop. */
-		diff_vclk = vclk - calc_clk;
-
-		/* calc dclk with current vco freq. */
-		calc_clk = rv770_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
-		if (calc_clk == -1)
-			break; /* vco is too big, it has to stop. */
-		diff_dclk = dclk - calc_clk;
-
-		/* determine if this vco setting is better than current optimal settings */
-		diff_score = abs(diff_vclk) + abs(diff_dclk);
-		if (diff_score < optimal_diff_score) {
-			optimal_fb_div = fb_div;
-			optimal_vclk_div = vclk_div;
-			optimal_dclk_div = dclk_div;
-			optimal_vco_freq = vco_freq;
-			optimal_diff_score = diff_score;
-			if (optimal_diff_score == 0)
-				break; /* it can't get better than this */
-		}
-	}
+	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 50000, 160000,
+					  43663, 0x03FFFFFE, 1, 30, ~0,
+					  &fb_div, &vclk_div, &dclk_div);
+	if (r)
+		return r;
+
+	fb_div |= 1;
+	vclk_div -= 1;
+	dclk_div -= 1;
 
 	/* set UPLL_FB_DIV to 0x50000 */
 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(0x50000), ~UPLL_FB_DIV_MASK);
@@ -160,7 +84,7 @@ int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(1), ~UPLL_FB_DIV(1));
 
-	r = rv770_uvd_send_upll_ctlreq(rdev);
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
 	if (r)
 		return r;
 
@@ -170,13 +94,13 @@ int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 	/* set the required FB_DIV, REF_DIV, Post divder values */
 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_REF_DIV(1), ~UPLL_REF_DIV_MASK);
 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
-		 UPLL_SW_HILEN(optimal_vclk_div >> 1) |
-		 UPLL_SW_LOLEN((optimal_vclk_div >> 1) + (optimal_vclk_div & 1)) |
-		 UPLL_SW_HILEN2(optimal_dclk_div >> 1) |
-		 UPLL_SW_LOLEN2((optimal_dclk_div >> 1) + (optimal_dclk_div & 1)),
+		 UPLL_SW_HILEN(vclk_div >> 1) |
+		 UPLL_SW_LOLEN((vclk_div >> 1) + (vclk_div & 1)) |
+		 UPLL_SW_HILEN2(dclk_div >> 1) |
+		 UPLL_SW_LOLEN2((dclk_div >> 1) + (dclk_div & 1)),
 		 ~UPLL_SW_MASK);
 
-	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div),
+	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div),
 		 ~UPLL_FB_DIV_MASK);
 
 	/* give the PLL some time to settle */
@@ -191,7 +115,7 @@ int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
 	WREG32_P(CG_UPLL_FUNC_CNTL_3, 0, ~UPLL_FB_DIV(1));
 
-	r = rv770_uvd_send_upll_ctlreq(rdev);
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index 6a52b2054f3..85b16266f74 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -45,7 +45,7 @@
 #	define UPLL_BYPASS_EN_MASK			0x00000004
 #	define UPLL_CTLREQ_MASK				0x00000008
 #	define UPLL_REF_DIV(x)				((x) << 16)
-#	define UPLL_REF_DIV_MASK			0x001F0000
+#	define UPLL_REF_DIV_MASK			0x003F0000
 #	define UPLL_CTLACK_MASK				0x40000000
 #	define UPLL_CTLACK2_MASK			0x80000000
 #define CG_UPLL_FUNC_CNTL_2				0x71c
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index fe6b14e0021..f0b6c2f87c4 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -5415,62 +5415,9 @@ uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
 	return clock;
 }
 
-static int si_uvd_calc_post_div(unsigned target_freq,
-				unsigned vco_freq,
-				unsigned *div)
-{
-	/* target larger than vco frequency ? */
-	if (vco_freq < target_freq)
-		return -1; /* forget it */
-
-	/* Fclk = Fvco / PDIV */
-	*div = vco_freq / target_freq;
-
-	/* we alway need a frequency less than or equal the target */
-	if ((vco_freq / *div) > target_freq)
-		*div += 1;
-
-	/* dividers above 5 must be even */
-	if (*div > 5 && *div % 2)
-		*div += 1;
-
-	/* out of range ? */
-	if (*div >= 128)
-		return -1; /* forget it */
-
-	return vco_freq / *div;
-}
-
-static int si_uvd_send_upll_ctlreq(struct radeon_device *rdev)
-{
-	unsigned i;
-
-	/* assert UPLL_CTLREQ */
-	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
-
-	/* wait for CTLACK and CTLACK2 to get asserted */
-	for (i = 0; i < 100; ++i) {
-		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
-		if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
-			break;
-		mdelay(10);
-	}
-	if (i == 100)
-		return -ETIMEDOUT;
-
-	/* deassert UPLL_CTLREQ */
-	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
-
-	return 0;
-}
-
 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 {
-	/* start off with something large */
-	int optimal_diff_score = 0x7FFFFFF;
-	unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
-	unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
-	unsigned vco_freq;
+	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
 	int r;
 
 	/* bypass vclk and dclk with bclk */
@@ -5487,40 +5434,11 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 		return 0;
 	}
 
-	/* loop through vco from low to high */
-	for (vco_freq = 125000; vco_freq <= 250000; vco_freq += 100) {
-		unsigned fb_div = vco_freq / rdev->clock.spll.reference_freq * 16384;
-		int calc_clk, diff_score, diff_vclk, diff_dclk;
-		unsigned vclk_div, dclk_div;
-
-		/* fb div out of range ? */
-		if (fb_div > 0x03FFFFFF)
-			break; /* it can oly get worse */
-
-		/* calc vclk with current vco freq. */
-		calc_clk = si_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
-		if (calc_clk == -1)
-			break; /* vco is too big, it has to stop. */
-		diff_vclk = vclk - calc_clk;
-
-		/* calc dclk with current vco freq. */
-		calc_clk = si_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
-		if (calc_clk == -1)
-			break; /* vco is too big, it has to stop. */
-		diff_dclk = dclk - calc_clk;
-
-		/* determine if this vco setting is better than current optimal settings */
-		diff_score = abs(diff_vclk) + abs(diff_dclk);
-		if (diff_score < optimal_diff_score) {
-			optimal_fb_div = fb_div;
-			optimal_vclk_div = vclk_div;
-			optimal_dclk_div = dclk_div;
-			optimal_vco_freq = vco_freq;
-			optimal_diff_score = diff_score;
-			if (optimal_diff_score == 0)
-				break; /* it can't get better than this */
-		}
-	}
+	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
+					  16384, 0x03FFFFFF, 0, 128, 5,
+					  &fb_div, &vclk_div, &dclk_div);
+	if (r)
+		return r;
 
 	/* set RESET_ANTI_MUX to 0 */
 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
@@ -5537,7 +5455,7 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 
 	mdelay(1);
 
-	r = si_uvd_send_upll_ctlreq(rdev);
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
 	if (r)
 		return r;
 
@@ -5548,19 +5466,19 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
 
 	/* set feedback divider */
-	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div), ~UPLL_FB_DIV_MASK);
+	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
 
 	/* set ref divider to 0 */
 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
 
-	if (optimal_vco_freq < 187500)
+	if (fb_div < 307200)
 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
 	else
 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
 
 	/* set PDIV_A and PDIV_B */
 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
-		UPLL_PDIV_A(optimal_vclk_div) | UPLL_PDIV_B(optimal_dclk_div),
+		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
 
 	/* give the PLL some time to settle */
@@ -5574,7 +5492,7 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 	/* switch from bypass mode to normal mode */
 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
 
-	r = si_uvd_send_upll_ctlreq(rdev);
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 042b91d6c94..222877ba6cf 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -36,7 +36,7 @@
 #	define UPLL_BYPASS_EN_MASK			0x00000004
 #	define UPLL_CTLREQ_MASK				0x00000008
 #	define UPLL_VCO_MODE_MASK			0x00000600
-#	define UPLL_REF_DIV_MASK			0x001F0000
+#	define UPLL_REF_DIV_MASK			0x003F0000
 #	define UPLL_CTLACK_MASK				0x40000000
 #	define UPLL_CTLACK2_MASK			0x80000000
 #define	CG_UPLL_FUNC_CNTL_2				0x638