51 files changed, 1210 insertions, 516 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 91dbb2757af..2ee0c9bfd03 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -35,6 +35,12 @@ config ARC
 	select PERF_USE_VMALLOC
 	select HAVE_DEBUG_STACKOVERFLOW
 
+config TRACE_IRQFLAGS_SUPPORT
+	def_bool y
+
+config LOCKDEP_SUPPORT
+	def_bool y
+
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
 
@@ -119,7 +125,6 @@ config ARC_PLAT_NEEDS_CPU_TO_DMA
 config SMP
 	bool "Symmetric Multi-Processing (Incomplete)"
 	default n
-	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
@@ -130,17 +135,14 @@ if SMP
 config ARC_HAS_COH_CACHES
 	def_bool n
 
-config ARC_HAS_COH_RTSC
-	def_bool n
-
 config ARC_HAS_REENTRANT_IRQ_LV2
 	def_bool n
 
 endif
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-32)"
-	range 2 32
+	int "Maximum number of CPUs (2-4096)"
+	range 2 4096
 	depends on SMP
 	default "2"
 
@@ -326,8 +328,7 @@ config ARC_HAS_RTSC
 	bool "Insn: RTSC (64-bit r/o cycle counter)"
 	default y
 	depends on ARC_CPU_REL_4_10
-	# if SMP, enable RTSC only if counter is coherent across cores
-	depends on !SMP || ARC_HAS_COH_RTSC
+	depends on !SMP
 
 endmenu   # "ARC CPU Configuration"
 
diff --git a/arch/arc/boot/dts/abilis_tb100.dtsi b/arch/arc/boot/dts/abilis_tb100.dtsi
index d9f8249aa66..3942634f805 100644
--- a/arch/arc/boot/dts/abilis_tb100.dtsi
+++ b/arch/arc/boot/dts/abilis_tb100.dtsi
@@ -43,124 +43,124 @@
 		iomux: iomux@FF10601c {
 			/* Port 1 */
 			pctl_tsin_s0: pctl-tsin-s0 {   /* Serial TS-in 0 */
-				pingrp = "mis0_pins";
+				abilis,function = "mis0";
 			};
 			pctl_tsin_s1: pctl-tsin-s1 {   /* Serial TS-in 1 */
-				pingrp = "mis1_pins";
+				abilis,function = "mis1";
 			};
 			pctl_gpio_a: pctl-gpio-a {     /* GPIO bank A */
-				pingrp = "gpioa_pins";
+				abilis,function = "gpioa";
 			};
 			pctl_tsin_p1: pctl-tsin-p1 {   /* Parallel TS-in 1 */
-				pingrp = "mip1_pins";
+				abilis,function = "mip1";
 			};
 			/* Port 2 */
 			pctl_tsin_s2: pctl-tsin-s2 {   /* Serial TS-in 2 */
-				pingrp = "mis2_pins";
+				abilis,function = "mis2";
 			};
 			pctl_tsin_s3: pctl-tsin-s3 {   /* Serial TS-in 3 */
-				pingrp = "mis3_pins";
+				abilis,function = "mis3";
 			};
 			pctl_gpio_c: pctl-gpio-c {     /* GPIO bank C */
-				pingrp = "gpioc_pins";
+				abilis,function = "gpioc";
 			};
 			pctl_tsin_p3: pctl-tsin-p3 {   /* Parallel TS-in 3 */
-				pingrp = "mip3_pins";
+				abilis,function = "mip3";
 			};
 			/* Port 3 */
 			pctl_tsin_s4: pctl-tsin-s4 {   /* Serial TS-in 4 */
-				pingrp = "mis4_pins";
+				abilis,function = "mis4";
 			};
 			pctl_tsin_s5: pctl-tsin-s5 {   /* Serial TS-in 5 */
-				pingrp = "mis5_pins";
+				abilis,function = "mis5";
 			};
 			pctl_gpio_e: pctl-gpio-e {     /* GPIO bank E */
-				pingrp = "gpioe_pins";
+				abilis,function = "gpioe";
 			};
 			pctl_tsin_p5: pctl-tsin-p5 {   /* Parallel TS-in 5 */
-				pingrp = "mip5_pins";
+				abilis,function = "mip5";
 			};
 			/* Port 4 */
 			pctl_tsin_s6: pctl-tsin-s6 {   /* Serial TS-in 6 */
-				pingrp = "mis6_pins";
+				abilis,function = "mis6";
 			};
 			pctl_tsin_s7: pctl-tsin-s7 {   /* Serial TS-in 7 */
-				pingrp = "mis7_pins";
+				abilis,function = "mis7";
 			};
 			pctl_gpio_g: pctl-gpio-g {     /* GPIO bank G */
-				pingrp = "gpiog_pins";
+				abilis,function = "gpiog";
 			};
 			pctl_tsin_p7: pctl-tsin-p7 {   /* Parallel TS-in 7 */
-				pingrp = "mip7_pins";
+				abilis,function = "mip7";
 			};
 			/* Port 5 */
 			pctl_gpio_j: pctl-gpio-j {     /* GPIO bank J */
-				pingrp = "gpioj_pins";
+				abilis,function = "gpioj";
 			};
 			pctl_gpio_k: pctl-gpio-k {     /* GPIO bank K */
-				pingrp = "gpiok_pins";
+				abilis,function = "gpiok";
 			};
 			pctl_ciplus: pctl-ciplus {     /* CI+ interface */
-				pingrp = "ciplus_pins";
+				abilis,function = "ciplus";
 			};
 			pctl_mcard: pctl-mcard {       /* M-Card interface */
-				pingrp = "mcard_pins";
+				abilis,function = "mcard";
 			};
 			/* Port 6 */
 			pctl_tsout_p: pctl-tsout-p {   /* Parallel TS-out */
-				pingrp = "mop_pins";
+				abilis,function = "mop";
 			};
 			pctl_tsout_s0: pctl-tsout-s0 { /* Serial TS-out 0 */
-				pingrp = "mos0_pins";
+				abilis,function = "mos0";
 			};
 			pctl_tsout_s1: pctl-tsout-s1 { /* Serial TS-out 1 */
-				pingrp = "mos1_pins";
+				abilis,function = "mos1";
 			};
 			pctl_tsout_s2: pctl-tsout-s2 { /* Serial TS-out 2 */
-				pingrp = "mos2_pins";
+				abilis,function = "mos2";
 			};
 			pctl_tsout_s3: pctl-tsout-s3 { /* Serial TS-out 3 */
-				pingrp = "mos3_pins";
+				abilis,function = "mos3";
 			};
 			/* Port 7 */
 			pctl_uart0: pctl-uart0 {       /* UART 0 */
-				pingrp = "uart0_pins";
+				abilis,function = "uart0";
 			};
 			pctl_uart1: pctl-uart1 {       /* UART 1 */
-				pingrp = "uart1_pins";
+				abilis,function = "uart1";
 			};
 			pctl_gpio_l: pctl-gpio-l {     /* GPIO bank L */
-				pingrp = "gpiol_pins";
+				abilis,function = "gpiol";
 			};
 			pctl_gpio_m: pctl-gpio-m {     /* GPIO bank M */
-				pingrp = "gpiom_pins";
+				abilis,function = "gpiom";
 			};
 			/* Port 8 */
 			pctl_spi3: pctl-spi3 {
-				pingrp = "spi3_pins";
+				abilis,function = "spi3";
 			};
 			/* Port 9 */
 			pctl_spi1: pctl-spi1 {
-				pingrp = "spi1_pins";
+				abilis,function = "spi1";
 			};
 			pctl_gpio_n: pctl-gpio-n {
-				pingrp = "gpion_pins";
+				abilis,function = "gpion";
 			};
 			/* Unmuxed GPIOs */
 			pctl_gpio_b: pctl-gpio-b {
-				pingrp = "gpiob_pins";
+				abilis,function = "gpiob";
 			};
 			pctl_gpio_d: pctl-gpio-d {
-				pingrp = "gpiod_pins";
+				abilis,function = "gpiod";
 			};
 			pctl_gpio_f: pctl-gpio-f {
-				pingrp = "gpiof_pins";
+				abilis,function = "gpiof";
 			};
 			pctl_gpio_h: pctl-gpio-h {
-				pingrp = "gpioh_pins";
+				abilis,function = "gpioh";
 			};
 			pctl_gpio_i: pctl-gpio-i {
-				pingrp = "gpioi_pins";
+				abilis,function = "gpioi";
 			};
 		};
 
@@ -172,9 +172,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF140000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <0>;
-			gpio-pins = <&pctl_gpio_a>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioa";
 		};
 		gpiob: gpio@FF141000 {
 			compatible = "abilis,tb10x-gpio";
@@ -184,9 +185,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF141000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <3>;
-			gpio-pins = <&pctl_gpio_b>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiob";
 		};
 		gpioc: gpio@FF142000 {
 			compatible = "abilis,tb10x-gpio";
@@ -196,9 +198,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF142000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <5>;
-			gpio-pins = <&pctl_gpio_c>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioc";
 		};
 		gpiod: gpio@FF143000 {
 			compatible = "abilis,tb10x-gpio";
@@ -208,9 +211,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF143000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <8>;
-			gpio-pins = <&pctl_gpio_d>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiod";
 		};
 		gpioe: gpio@FF144000 {
 			compatible = "abilis,tb10x-gpio";
@@ -220,9 +224,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF144000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <10>;
-			gpio-pins = <&pctl_gpio_e>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioe";
 		};
 		gpiof: gpio@FF145000 {
 			compatible = "abilis,tb10x-gpio";
@@ -232,9 +237,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF145000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <13>;
-			gpio-pins = <&pctl_gpio_f>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiof";
 		};
 		gpiog: gpio@FF146000 {
 			compatible = "abilis,tb10x-gpio";
@@ -244,9 +250,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF146000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <15>;
-			gpio-pins = <&pctl_gpio_g>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiog";
 		};
 		gpioh: gpio@FF147000 {
 			compatible = "abilis,tb10x-gpio";
@@ -256,9 +263,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF147000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <18>;
-			gpio-pins = <&pctl_gpio_h>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioh";
 		};
 		gpioi: gpio@FF148000 {
 			compatible = "abilis,tb10x-gpio";
@@ -268,9 +276,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF148000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <20>;
-			gpio-pins = <&pctl_gpio_i>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <12>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioi";
 		};
 		gpioj: gpio@FF149000 {
 			compatible = "abilis,tb10x-gpio";
@@ -280,9 +289,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF149000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <32>;
-			gpio-pins = <&pctl_gpio_j>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <32>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioj";
 		};
 		gpiok: gpio@FF14a000 {
 			compatible = "abilis,tb10x-gpio";
@@ -292,9 +302,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF14A000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <64>;
-			gpio-pins = <&pctl_gpio_k>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <22>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiok";
 		};
 		gpiol: gpio@FF14b000 {
 			compatible = "abilis,tb10x-gpio";
@@ -304,9 +315,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF14B000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <86>;
-			gpio-pins = <&pctl_gpio_l>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <4>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiol";
 		};
 		gpiom: gpio@FF14c000 {
 			compatible = "abilis,tb10x-gpio";
@@ -316,9 +328,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF14C000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <90>;
-			gpio-pins = <&pctl_gpio_m>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <4>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiom";
 		};
 		gpion: gpio@FF14d000 {
 			compatible = "abilis,tb10x-gpio";
@@ -328,9 +341,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF14D000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <94>;
-			gpio-pins = <&pctl_gpio_n>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <5>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpion";
 		};
 	};
 };
diff --git a/arch/arc/boot/dts/abilis_tb100_dvk.dts b/arch/arc/boot/dts/abilis_tb100_dvk.dts
index ebc313a9f5b..3dd6ed94146 100644
--- a/arch/arc/boot/dts/abilis_tb100_dvk.dts
+++ b/arch/arc/boot/dts/abilis_tb100_dvk.dts
@@ -64,62 +64,62 @@
 			compatible = "gpio-leds";
 			power {
 				label = "Power";
-				gpios = <&gpioi 0>;
+				gpios = <&gpioi 0 0>;
 				linux,default-trigger = "default-on";
 			};
 			heartbeat {
 				label = "Heartbeat";
-				gpios = <&gpioi 1>;
+				gpios = <&gpioi 1 0>;
 				linux,default-trigger = "heartbeat";
 			};
 			led2 {
 				label = "LED2";
-				gpios = <&gpioi 2>;
+				gpios = <&gpioi 2 0>;
 				default-state = "off";
 			};
 			led3 {
 				label = "LED3";
-				gpios = <&gpioi 3>;
+				gpios = <&gpioi 3 0>;
 				default-state = "off";
 			};
 			led4 {
 				label = "LED4";
-				gpios = <&gpioi 4>;
+				gpios = <&gpioi 4 0>;
 				default-state = "off";
 			};
 			led5 {
 				label = "LED5";
-				gpios = <&gpioi 5>;
+				gpios = <&gpioi 5 0>;
 				default-state = "off";
 			};
 			led6 {
 				label = "LED6";
-				gpios = <&gpioi 6>;
+				gpios = <&gpioi 6 0>;
 				default-state = "off";
 			};
 			led7 {
 				label = "LED7";
-				gpios = <&gpioi 7>;
+				gpios = <&gpioi 7 0>;
 				default-state = "off";
 			};
 			led8 {
 				label = "LED8";
-				gpios = <&gpioi 8>;
+				gpios = <&gpioi 8 0>;
 				default-state = "off";
 			};
 			led9 {
 				label = "LED9";
-				gpios = <&gpioi 9>;
+				gpios = <&gpioi 9 0>;
 				default-state = "off";
 			};
 			led10 {
 				label = "LED10";
-				gpios = <&gpioi 10>;
+				gpios = <&gpioi 10 0>;
 				default-state = "off";
 			};
 			led11 {
 				label = "LED11";
-				gpios = <&gpioi 11>;
+				gpios = <&gpioi 11 0>;
 				default-state = "off";
 			};
 		};
diff --git a/arch/arc/boot/dts/abilis_tb101.dtsi b/arch/arc/boot/dts/abilis_tb101.dtsi
index da8ca7941e6..b0467229a5c 100644
--- a/arch/arc/boot/dts/abilis_tb101.dtsi
+++ b/arch/arc/boot/dts/abilis_tb101.dtsi
@@ -43,133 +43,133 @@
 		iomux: iomux@FF10601c {
 			/* Port 1 */
 			pctl_tsin_s0: pctl-tsin-s0 {   /* Serial TS-in 0 */
-				pingrp = "mis0_pins";
+				abilis,function = "mis0";
 			};
 			pctl_tsin_s1: pctl-tsin-s1 {   /* Serial TS-in 1 */
-				pingrp = "mis1_pins";
+				abilis,function = "mis1";
 			};
 			pctl_gpio_a: pctl-gpio-a {     /* GPIO bank A */
-				pingrp = "gpioa_pins";
+				abilis,function = "gpioa";
 			};
 			pctl_tsin_p1: pctl-tsin-p1 {   /* Parallel TS-in 1 */
-				pingrp = "mip1_pins";
+				abilis,function = "mip1";
 			};
 			/* Port 2 */
 			pctl_tsin_s2: pctl-tsin-s2 {   /* Serial TS-in 2 */
-				pingrp = "mis2_pins";
+				abilis,function = "mis2";
 			};
 			pctl_tsin_s3: pctl-tsin-s3 {   /* Serial TS-in 3 */
-				pingrp = "mis3_pins";
+				abilis,function = "mis3";
 			};
 			pctl_gpio_c: pctl-gpio-c {     /* GPIO bank C */
-				pingrp = "gpioc_pins";
+				abilis,function = "gpioc";
 			};
 			pctl_tsin_p3: pctl-tsin-p3 {   /* Parallel TS-in 3 */
-				pingrp = "mip3_pins";
+				abilis,function = "mip3";
 			};
 			/* Port 3 */
 			pctl_tsin_s4: pctl-tsin-s4 {   /* Serial TS-in 4 */
-				pingrp = "mis4_pins";
+				abilis,function = "mis4";
 			};
 			pctl_tsin_s5: pctl-tsin-s5 {   /* Serial TS-in 5 */
-				pingrp = "mis5_pins";
+				abilis,function = "mis5";
 			};
 			pctl_gpio_e: pctl-gpio-e {     /* GPIO bank E */
-				pingrp = "gpioe_pins";
+				abilis,function = "gpioe";
 			};
 			pctl_tsin_p5: pctl-tsin-p5 {   /* Parallel TS-in 5 */
-				pingrp = "mip5_pins";
+				abilis,function = "mip5";
 			};
 			/* Port 4 */
 			pctl_tsin_s6: pctl-tsin-s6 {   /* Serial TS-in 6 */
-				pingrp = "mis6_pins";
+				abilis,function = "mis6";
 			};
 			pctl_tsin_s7: pctl-tsin-s7 {   /* Serial TS-in 7 */
-				pingrp = "mis7_pins";
+				abilis,function = "mis7";
 			};
 			pctl_gpio_g: pctl-gpio-g {     /* GPIO bank G */
-				pingrp = "gpiog_pins";
+				abilis,function = "gpiog";
 			};
 			pctl_tsin_p7: pctl-tsin-p7 {   /* Parallel TS-in 7 */
-				pingrp = "mip7_pins";
+				abilis,function = "mip7";
 			};
 			/* Port 5 */
 			pctl_gpio_j: pctl-gpio-j {     /* GPIO bank J */
-				pingrp = "gpioj_pins";
+				abilis,function = "gpioj";
 			};
 			pctl_gpio_k: pctl-gpio-k {     /* GPIO bank K */
-				pingrp = "gpiok_pins";
+				abilis,function = "gpiok";
 			};
 			pctl_ciplus: pctl-ciplus {     /* CI+ interface */
-				pingrp = "ciplus_pins";
+				abilis,function = "ciplus";
 			};
 			pctl_mcard: pctl-mcard {       /* M-Card interface */
-				pingrp = "mcard_pins";
+				abilis,function = "mcard";
 			};
 			pctl_stc0: pctl-stc0 {         /* Smart card I/F 0 */
-				pingrp = "stc0_pins";
+				abilis,function = "stc0";
 			};
 			pctl_stc1: pctl-stc1 {         /* Smart card I/F 1 */
-				pingrp = "stc1_pins";
+				abilis,function = "stc1";
 			};
 			/* Port 6 */
 			pctl_tsout_p: pctl-tsout-p {   /* Parallel TS-out */
-				pingrp = "mop_pins";
+				abilis,function = "mop";
 			};
 			pctl_tsout_s0: pctl-tsout-s0 { /* Serial TS-out 0 */
-				pingrp = "mos0_pins";
+				abilis,function = "mos0";
 			};
 			pctl_tsout_s1: pctl-tsout-s1 { /* Serial TS-out 1 */
-				pingrp = "mos1_pins";
+				abilis,function = "mos1";
 			};
 			pctl_tsout_s2: pctl-tsout-s2 { /* Serial TS-out 2 */
-				pingrp = "mos2_pins";
+				abilis,function = "mos2";
 			};
 			pctl_tsout_s3: pctl-tsout-s3 { /* Serial TS-out 3 */
-				pingrp = "mos3_pins";
+				abilis,function = "mos3";
 			};
 			/* Port 7 */
 			pctl_uart0: pctl-uart0 {       /* UART 0 */
-				pingrp = "uart0_pins";
+				abilis,function = "uart0";
 			};
 			pctl_uart1: pctl-uart1 {       /* UART 1 */
-				pingrp = "uart1_pins";
+				abilis,function = "uart1";
 			};
 			pctl_gpio_l: pctl-gpio-l {     /* GPIO bank L */
-				pingrp = "gpiol_pins";
+				abilis,function = "gpiol";
 			};
 			pctl_gpio_m: pctl-gpio-m {     /* GPIO bank M */
-				pingrp = "gpiom_pins";
+				abilis,function = "gpiom";
 			};
 			/* Port 8 */
 			pctl_spi3: pctl-spi3 {
-				pingrp = "spi3_pins";
+				abilis,function = "spi3";
 			};
 			pctl_jtag: pctl-jtag {
-				pingrp = "jtag_pins";
+				abilis,function = "jtag";
 			};
 			/* Port 9 */
 			pctl_spi1: pctl-spi1 {
-				pingrp = "spi1_pins";
+				abilis,function = "spi1";
 			};
 			pctl_gpio_n: pctl-gpio-n {
-				pingrp = "gpion_pins";
+				abilis,function = "gpion";
 			};
 			/* Unmuxed GPIOs */
 			pctl_gpio_b: pctl-gpio-b {
-				pingrp = "gpiob_pins";
+				abilis,function = "gpiob";
 			};
 			pctl_gpio_d: pctl-gpio-d {
-				pingrp = "gpiod_pins";
+				abilis,function = "gpiod";
 			};
 			pctl_gpio_f: pctl-gpio-f {
-				pingrp = "gpiof_pins";
+				abilis,function = "gpiof";
 			};
 			pctl_gpio_h: pctl-gpio-h {
-				pingrp = "gpioh_pins";
+				abilis,function = "gpioh";
 			};
 			pctl_gpio_i: pctl-gpio-i {
-				pingrp = "gpioi_pins";
+				abilis,function = "gpioi";
 			};
 		};
 
@@ -181,9 +181,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF140000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <0>;
-			gpio-pins = <&pctl_gpio_a>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioa";
 		};
 		gpiob: gpio@FF141000 {
 			compatible = "abilis,tb10x-gpio";
@@ -193,9 +194,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF141000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <3>;
-			gpio-pins = <&pctl_gpio_b>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiob";
 		};
 		gpioc: gpio@FF142000 {
 			compatible = "abilis,tb10x-gpio";
@@ -205,9 +207,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF142000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <5>;
-			gpio-pins = <&pctl_gpio_c>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioc";
 		};
 		gpiod: gpio@FF143000 {
 			compatible = "abilis,tb10x-gpio";
@@ -217,9 +220,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF143000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <8>;
-			gpio-pins = <&pctl_gpio_d>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiod";
 		};
 		gpioe: gpio@FF144000 {
 			compatible = "abilis,tb10x-gpio";
@@ -229,9 +233,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF144000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <10>;
-			gpio-pins = <&pctl_gpio_e>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioe";
 		};
 		gpiof: gpio@FF145000 {
 			compatible = "abilis,tb10x-gpio";
@@ -241,9 +246,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF145000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <13>;
-			gpio-pins = <&pctl_gpio_f>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiof";
 		};
 		gpiog: gpio@FF146000 {
 			compatible = "abilis,tb10x-gpio";
@@ -253,9 +259,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF146000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <15>;
-			gpio-pins = <&pctl_gpio_g>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiog";
 		};
 		gpioh: gpio@FF147000 {
 			compatible = "abilis,tb10x-gpio";
@@ -265,9 +272,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF147000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <18>;
-			gpio-pins = <&pctl_gpio_h>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioh";
 		};
 		gpioi: gpio@FF148000 {
 			compatible = "abilis,tb10x-gpio";
@@ -277,9 +285,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF148000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <20>;
-			gpio-pins = <&pctl_gpio_i>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <12>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioi";
 		};
 		gpioj: gpio@FF149000 {
 			compatible = "abilis,tb10x-gpio";
@@ -289,9 +298,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF149000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <32>;
-			gpio-pins = <&pctl_gpio_j>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <32>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioj";
 		};
 		gpiok: gpio@FF14a000 {
 			compatible = "abilis,tb10x-gpio";
@@ -301,9 +311,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF14A000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <64>;
-			gpio-pins = <&pctl_gpio_k>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <22>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiok";
 		};
 		gpiol: gpio@FF14b000 {
 			compatible = "abilis,tb10x-gpio";
@@ -313,9 +324,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF14B000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <86>;
-			gpio-pins = <&pctl_gpio_l>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <4>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiol";
 		};
 		gpiom: gpio@FF14c000 {
 			compatible = "abilis,tb10x-gpio";
@@ -325,9 +337,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF14C000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <90>;
-			gpio-pins = <&pctl_gpio_m>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <4>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiom";
 		};
 		gpion: gpio@FF14d000 {
 			compatible = "abilis,tb10x-gpio";
@@ -337,9 +350,10 @@
 			interrupts = <27 2>;
 			reg = <0xFF14D000 0x1000>;
 			gpio-controller;
-			#gpio-cells = <1>;
-			gpio-base  = <94>;
-			gpio-pins = <&pctl_gpio_n>;
+			#gpio-cells = <2>;
+			abilis,ngpio = <5>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpion";
 		};
 	};
 };
diff --git a/arch/arc/boot/dts/abilis_tb101_dvk.dts b/arch/arc/boot/dts/abilis_tb101_dvk.dts
index b204657993a..1cf51c280f2 100644
--- a/arch/arc/boot/dts/abilis_tb101_dvk.dts
+++ b/arch/arc/boot/dts/abilis_tb101_dvk.dts
@@ -64,62 +64,62 @@
 			compatible = "gpio-leds";
 			power {
 				label = "Power";
-				gpios = <&gpioi 0>;
+				gpios = <&gpioi 0 0>;
 				linux,default-trigger = "default-on";
 			};
 			heartbeat {
 				label = "Heartbeat";
-				gpios = <&gpioi 1>;
+				gpios = <&gpioi 1 0>;
 				linux,default-trigger = "heartbeat";
 			};
 			led2 {
 				label = "LED2";
-				gpios = <&gpioi 2>;
+				gpios = <&gpioi 2 0>;
 				default-state = "off";
 			};
 			led3 {
 				label = "LED3";
-				gpios = <&gpioi 3>;
+				gpios = <&gpioi 3 0>;
 				default-state = "off";
 			};
 			led4 {
 				label = "LED4";
-				gpios = <&gpioi 4>;
+				gpios = <&gpioi 4 0>;
 				default-state = "off";
 			};
 			led5 {
 				label = "LED5";
-				gpios = <&gpioi 5>;
+				gpios = <&gpioi 5 0>;
 				default-state = "off";
 			};
 			led6 {
 				label = "LED6";
-				gpios = <&gpioi 6>;
+				gpios = <&gpioi 6 0>;
 				default-state = "off";
 			};
 			led7 {
 				label = "LED7";
-				gpios = <&gpioi 7>;
+				gpios = <&gpioi 7 0>;
 				default-state = "off";
 			};
 			led8 {
 				label = "LED8";
-				gpios = <&gpioi 8>;
+				gpios = <&gpioi 8 0>;
 				default-state = "off";
 			};
 			led9 {
 				label = "LED9";
-				gpios = <&gpioi 9>;
+				gpios = <&gpioi 9 0>;
 				default-state = "off";
 			};
 			led10 {
 				label = "LED10";
-				gpios = <&gpioi 10>;
+				gpios = <&gpioi 10 0>;
 				default-state = "off";
 			};
 			led11 {
 				label = "LED11";
-				gpios = <&gpioi 11>;
+				gpios = <&gpioi 11 0>;
 				default-state = "off";
 			};
 		};
diff --git a/arch/arc/boot/dts/abilis_tb10x.dtsi b/arch/arc/boot/dts/abilis_tb10x.dtsi
index edf56f4749e..a098d7c05e9 100644
--- a/arch/arc/boot/dts/abilis_tb10x.dtsi
+++ b/arch/arc/boot/dts/abilis_tb10x.dtsi
@@ -62,9 +62,8 @@
 		};
 
 		iomux: iomux@FF10601c {
-			#address-cells = <1>;
-			#size-cells = <1>;
 			compatible = "abilis,tb10x-iomux";
+			#gpio-range-cells = <3>;
 			reg = <0xFF10601c 0x4>;
 		};
 
diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/angel4.dts
index 4fb2d6f655b..bcf662d21a5 100644
--- a/arch/arc/boot/dts/angel4.dts
+++ b/arch/arc/boot/dts/angel4.dts
@@ -67,5 +67,9 @@
 				reg = <1>;
 			};
 		};
+
+		arcpmu0: pmu {
+			compatible = "snps,arc700-pmu";
+		};
 	};
 };
diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/fpga_defconfig
index 4ca50f1f8d0..e283aa58693 100644
--- a/arch/arc/configs/fpga_defconfig
+++ b/arch/arc/configs/fpga_defconfig
@@ -2,6 +2,8 @@ CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -62,4 +64,5 @@ CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_DEBUG_PREEMPT is not set
 CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/fpga_noramfs_defconfig b/arch/arc/configs/fpga_noramfs_defconfig
new file mode 100644
index 00000000000..5276a52f6a2
--- /dev/null
+++ b/arch/arc/configs/fpga_noramfs_defconfig
@@ -0,0 +1,64 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_FPGA_LEGACY=y
+CONFIG_ARC_BOARD_ML509=y
+# CONFIG_ARC_HAS_RTSC is not set
+CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+CONFIG_ARC_EMAC=y
+CONFIG_LXT_PHY=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_ARC=y
+CONFIG_SERIAL_ARC_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_XZ_DEC=y
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index d8dd660898b..5943f7f9d32 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -46,3 +46,4 @@ generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index e4abdaac6f9..2fd3162ec4d 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -17,13 +17,7 @@
 #endif
 
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
-
-/* For a rare case where customers have differently config I/D */
-#define ARC_ICACHE_LINE_LEN	L1_CACHE_BYTES
-#define ARC_DCACHE_LINE_LEN	L1_CACHE_BYTES
-
-#define ICACHE_LINE_MASK	(~(ARC_ICACHE_LINE_LEN - 1))
-#define DCACHE_LINE_MASK	(~(ARC_DCACHE_LINE_LEN - 1))
+#define CACHE_LINE_MASK		(~(L1_CACHE_BYTES - 1))
 
 /*
  * ARC700 doesn't cache any access in top 256M.
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index c0a72105ee0..291a70db68b 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -18,8 +18,8 @@
 
 #include <asm-generic/irq.h>
 
-extern void __init arc_init_IRQ(void);
-extern int __init get_hw_config_num_irq(void);
+extern void arc_init_IRQ(void);
+extern int get_hw_config_num_irq(void);
 
 void arc_local_timer_setup(unsigned int cpu);
 
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index b68b53f458d..cb7efc29f16 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -151,16 +151,38 @@ static inline void arch_unmask_irq(unsigned int irq)
 
 #else
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+.macro TRACE_ASM_IRQ_DISABLE
+	bl	trace_hardirqs_off
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+	bl	trace_hardirqs_on
+.endm
+
+#else
+
+.macro TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+.endm
+
+#endif
+
 .macro IRQ_DISABLE  scratch
 	lr	\scratch, [status32]
 	bic	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
 	flag	\scratch
+	TRACE_ASM_IRQ_DISABLE
 .endm
 
 .macro IRQ_ENABLE  scratch
 	lr	\scratch, [status32]
 	or	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
 	flag	\scratch
+	TRACE_ASM_IRQ_ENABLE
 .endm
 
 #endif	/* __ASSEMBLY__ */
diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h
index 9998dc846eb..e8993a2be6c 100644
--- a/arch/arc/include/asm/mach_desc.h
+++ b/arch/arc/include/asm/mach_desc.h
@@ -51,22 +51,12 @@ struct machine_desc {
 /*
  * Current machine - only accessible during boot.
  */
-extern struct machine_desc *machine_desc;
+extern const struct machine_desc *machine_desc;
 
 /*
  * Machine type table - also only accessible during boot
  */
-extern struct machine_desc __arch_info_begin[], __arch_info_end[];
-#define for_each_machine_desc(p)			\
-	for (p = __arch_info_begin; p < __arch_info_end; p++)
-
-static inline struct machine_desc *default_machine_desc(void)
-{
-	/* the default machine is the last one linked in */
-	if (__arch_info_end - 1 < __arch_info_begin)
-		return NULL;
-	return __arch_info_end - 1;
-}
+extern const struct machine_desc __arch_info_begin[], __arch_info_end[];
 
 /*
  * Set of macros to define architecture features.
@@ -81,7 +71,6 @@ __attribute__((__section__(".arch.info.init"))) = {	\
 #define MACHINE_END				\
 };
 
-extern struct machine_desc *setup_machine_fdt(void *dt);
-extern void __init copy_devtree(void);
+extern const struct machine_desc *setup_machine_fdt(void *dt);
 
 #endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index c2663b32866..8c84ae98c33 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -48,7 +48,7 @@
 #ifndef __ASSEMBLY__
 
 typedef struct {
-	unsigned long asid;	/* 8 bit MMU PID + Generation cycle */
+	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 43a1b51bb8c..1fd467ef658 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -30,13 +30,13 @@
  * "Fast Context Switch" i.e. no TLB flush on ctxt-switch
  *
  * Linux assigns each task a unique ASID. A simple round-robin allocation
- * of H/w ASID is done using software tracker @asid_cache.
+ * of H/w ASID is done using software tracker @asid_cpu.
  * When it reaches max 255, the allocation cycle starts afresh by flushing
  * the entire TLB and wrapping ASID back to zero.
  *
  * A new allocation cycle, post rollover, could potentially reassign an ASID
  * to a different task. Thus the rule is to refresh the ASID in a new cycle.
- * The 32 bit @asid_cache (and mm->asid) have 8 bits MMU PID and rest 24 bits
+ * The 32 bit @asid_cpu (and mm->asid) have 8 bits MMU PID and rest 24 bits
  * serve as cycle/generation indicator and natural 32 bit unsigned math
  * automagically increments the generation when lower 8 bits rollover.
  */
@@ -47,9 +47,11 @@
 #define MM_CTXT_FIRST_CYCLE	(MM_CTXT_ASID_MASK + 1)
 #define MM_CTXT_NO_ASID		0UL
 
-#define hw_pid(mm)		(mm->context.asid & MM_CTXT_ASID_MASK)
+#define asid_mm(mm, cpu)	mm->context.asid[cpu]
+#define hw_pid(mm, cpu)		(asid_mm(mm, cpu) & MM_CTXT_ASID_MASK)
 
-extern unsigned int asid_cache;
+DECLARE_PER_CPU(unsigned int, asid_cache);
+#define asid_cpu(cpu)		per_cpu(asid_cache, cpu)
 
 /*
  * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle)
@@ -57,6 +59,7 @@ extern unsigned int asid_cache;
  */
 static inline void get_new_mmu_context(struct mm_struct *mm)
 {
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -71,28 +74,28 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	 * 	 first need to destroy the context, setting it to invalid
 	 * 	 value.
 	 */
-	if (!((mm->context.asid ^ asid_cache) & MM_CTXT_CYCLE_MASK))
+	if (!((asid_mm(mm, cpu) ^ asid_cpu(cpu)) & MM_CTXT_CYCLE_MASK))
 		goto set_hw;
 
 	/* move to new ASID and handle rollover */
-	if (unlikely(!(++asid_cache & MM_CTXT_ASID_MASK))) {
+	if (unlikely(!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK))) {
 
-		flush_tlb_all();
+		local_flush_tlb_all();
 
 		/*
 		 * Above checke for rollover of 8 bit ASID in 32 bit container.
 		 * If the container itself wrapped around, set it to a non zero
 		 * "generation" to distinguish from no context
 		 */
-		if (!asid_cache)
-			asid_cache = MM_CTXT_FIRST_CYCLE;
+		if (!asid_cpu(cpu))
+			asid_cpu(cpu) = MM_CTXT_FIRST_CYCLE;
 	}
 
 	/* Assign new ASID to tsk */
-	mm->context.asid = asid_cache;
+	asid_mm(mm, cpu) = asid_cpu(cpu);
 
 set_hw:
-	write_aux_reg(ARC_REG_PID, hw_pid(mm) | MMU_ENABLE);
+	write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
 
 	local_irq_restore(flags);
 }
@@ -104,16 +107,45 @@ set_hw:
 static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-	mm->context.asid = MM_CTXT_NO_ASID;
+	int i;
+
+	for_each_possible_cpu(i)
+		asid_mm(mm, i) = MM_CTXT_NO_ASID;
+
 	return 0;
 }
 
+static inline void destroy_context(struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	/* Needed to elide CONFIG_DEBUG_PREEMPT warning */
+	local_irq_save(flags);
+	asid_mm(mm, smp_processor_id()) = MM_CTXT_NO_ASID;
+	local_irq_restore(flags);
+}
+
 /* Prepare the MMU for task: setup PID reg with allocated ASID
     If task doesn't have an ASID (never alloc or stolen, get a new ASID)
 */
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			     struct task_struct *tsk)
 {
+	const int cpu = smp_processor_id();
+
+	/*
+	 * Note that the mm_cpumask is "aggregating" only, we don't clear it
+	 * for the switched-out task, unlike some other arches.
+	 * It is used to enlist cpus for sending TLB flush IPIs and not sending
+	 * it to CPUs where a task once ran-on, could cause stale TLB entry
+	 * re-use, specially for a multi-threaded task.
+	 * e.g. T1 runs on C1, migrates to C3. T2 running on C2 munmaps.
+	 *      For a non-aggregating mm_cpumask, IPI not sent C1, and if T1
+	 *      were to re-migrate to C1, it could access the unmapped region
+	 *      via any existing stale TLB entries.
+	 */
+	cpumask_set_cpu(cpu, mm_cpumask(next));
+
 #ifndef CONFIG_SMP
 	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
 	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
@@ -131,11 +163,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  */
 #define activate_mm(prev, next)		switch_mm(prev, next, NULL)
 
-static inline void destroy_context(struct mm_struct *mm)
-{
-	mm->context.asid = MM_CTXT_NO_ASID;
-}
-
 /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping
  * for retiring-mm. However destroy_context( ) still needs to do that because
  * between mm_release( ) = >deactive_mm( ) and
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h
index 115ad96480e..cbf755e32a0 100644
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@@ -1,5 +1,7 @@
 /*
- * Copyright (C) 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ * Linux performance counter support for ARC
+ *
+ * Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -10,4 +12,204 @@
 #ifndef __ASM_PERF_EVENT_H
 #define __ASM_PERF_EVENT_H
 
+/* real maximum varies per CPU, this is the maximum supported by the driver */
+#define ARC_PMU_MAX_HWEVENTS	64
+
+#define ARC_REG_CC_BUILD	0xF6
+#define ARC_REG_CC_INDEX	0x240
+#define ARC_REG_CC_NAME0	0x241
+#define ARC_REG_CC_NAME1	0x242
+
+#define ARC_REG_PCT_BUILD	0xF5
+#define ARC_REG_PCT_COUNTL	0x250
+#define ARC_REG_PCT_COUNTH	0x251
+#define ARC_REG_PCT_SNAPL	0x252
+#define ARC_REG_PCT_SNAPH	0x253
+#define ARC_REG_PCT_CONFIG	0x254
+#define ARC_REG_PCT_CONTROL	0x255
+#define ARC_REG_PCT_INDEX	0x256
+
+#define ARC_REG_PCT_CONTROL_CC	(1 << 16)	/* clear counts */
+#define ARC_REG_PCT_CONTROL_SN	(1 << 17)	/* snapshot */
+
+struct arc_reg_pct_build {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int m:8, c:8, r:6, s:2, v:8;
+#else
+	unsigned int v:8, s:2, r:6, c:8, m:8;
+#endif
+};
+
+struct arc_reg_cc_build {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int c:16, r:8, v:8;
+#else
+	unsigned int v:8, r:8, c:16;
+#endif
+};
+
+#define PERF_COUNT_ARC_DCLM	(PERF_COUNT_HW_MAX + 0)
+#define PERF_COUNT_ARC_DCSM	(PERF_COUNT_HW_MAX + 1)
+#define PERF_COUNT_ARC_ICM	(PERF_COUNT_HW_MAX + 2)
+#define PERF_COUNT_ARC_BPOK	(PERF_COUNT_HW_MAX + 3)
+#define PERF_COUNT_ARC_EDTLB	(PERF_COUNT_HW_MAX + 4)
+#define PERF_COUNT_ARC_EITLB	(PERF_COUNT_HW_MAX + 5)
+#define PERF_COUNT_ARC_HW_MAX	(PERF_COUNT_HW_MAX + 6)
+
+/*
+ * The "generalized" performance events seem to really be a copy
+ * of the available events on x86 processors; the mapping to ARC
+ * events is not always possible 1-to-1. Fortunately, there doesn't
+ * seem to be an exact definition for these events, so we can cheat
+ * a bit where necessary.
+ *
+ * In particular, the following PERF events may behave a bit differently
+ * compared to other architectures:
+ *
+ * PERF_COUNT_HW_CPU_CYCLES
+ *	Cycles not in halted state
+ *
+ * PERF_COUNT_HW_REF_CPU_CYCLES
+ *	Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES
+ *	for now as we don't do Dynamic Voltage/Frequency Scaling (yet)
+ *
+ * PERF_COUNT_HW_BUS_CYCLES
+ *	Unclear what this means, Intel uses 0x013c, which according to
+ *	their datasheet means "unhalted reference cycles". It sounds similar
+ *	to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it.
+ *
+ * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+ * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+ *	The ARC 700 can either measure stalls per pipeline stage, or all stalls
+ *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
+ *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to
+ *	STALLED_CYCLES_FRONTEND.
+ *
+ *	We could start multiple performance counters and combine everything
+ *	afterwards, but that makes it complicated.
+ *
+ *	Note that I$ cache misses aren't counted by either of the two!
+ */
+
+static const char * const arc_pmu_ev_hw_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
+	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
+	[PERF_COUNT_HW_BUS_CYCLES] = "crun",
+	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
+	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail",
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
+	[PERF_COUNT_ARC_DCLM] = "dclm",
+	[PERF_COUNT_ARC_DCSM] = "dcsm",
+	[PERF_COUNT_ARC_ICM] = "icm",
+	[PERF_COUNT_ARC_BPOK] = "bpok",
+	[PERF_COUNT_ARC_EDTLB] = "edtlb",
+	[PERF_COUNT_ARC_EITLB] = "eitlb",
+};
+
+#define C(_x)			PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED	0xffff
+
+static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
 #endif /* __ASM_PERF_EVENT_H */
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 36a9f20c21a..81208bfd9dc 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -105,11 +105,16 @@ static inline pgtable_t
 pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pgtable_t pte_pg;
+	struct page *page;
 
 	pte_pg = __get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
-	if (pte_pg) {
-		memzero((void *)pte_pg, PTRS_PER_PTE * 4);
-		pgtable_page_ctor(virt_to_page(pte_pg));
+	if (!pte_pg)
+		return 0;
+	memzero((void *)pte_pg, PTRS_PER_PTE * 4);
+	page = virt_to_page(pte_pg);
+	if (!pgtable_page_ctor(page)) {
+		__free_page(page);
+		return 0;
 	}
 
 	return pte_pg;
diff --git a/arch/arc/include/asm/prom.h b/arch/arc/include/asm/prom.h
deleted file mode 100644
index 692d0d0789a..00000000000
--- a/arch/arc/include/asm/prom.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ARC_PROM_H_
-#define _ASM_ARC_PROM_H_
-
-#define HAVE_ARCH_DEVTREE_FIXUPS
-
-#endif
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 229e5068149..e10f8cef56a 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -31,7 +31,7 @@ struct cpuinfo_data {
 extern int root_mountflags, end_mem;
 extern int running_on_hw;
 
-void __init setup_processor(void);
+void setup_processor(void);
 void __init setup_arch_memory(void);
 
 #endif /* __ASMARC_SETUP_H */
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index c4fb211dcd2..eefc29f08cd 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -30,7 +30,7 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
  * APIs provided by arch SMP code to rest of arch code
  */
 extern void __init smp_init_cpus(void);
-extern void __init first_lines_of_secondary(void);
+extern void first_lines_of_secondary(void);
 extern const char *arc_platform_smp_cpuinfo(void);
 
 /*
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index f158197ac5b..b6a8c2dfbe6 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -45,7 +45,14 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
-	lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
+	unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
+
+	__asm__ __volatile__(
+	"	ex  %0, [%1]		\n"
+	: "+r" (tmp)
+	: "r"(&(lock->slock))
+	: "memory");
+
 	smp_mb();
 }
 
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 2d50a4cdd7f..45be2167201 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -80,8 +80,6 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE      0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h
index b2f9bc7f68c..71c7b2e4b87 100644
--- a/arch/arc/include/asm/tlbflush.h
+++ b/arch/arc/include/asm/tlbflush.h
@@ -18,11 +18,18 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 void local_flush_tlb_range(struct vm_area_struct *vma,
 			   unsigned long start, unsigned long end);
 
-/* XXX: Revisit for SMP */
+#ifndef CONFIG_SMP
 #define flush_tlb_range(vma, s, e)	local_flush_tlb_range(vma, s, e)
 #define flush_tlb_page(vma, page)	local_flush_tlb_page(vma, page)
 #define flush_tlb_kernel_range(s, e)	local_flush_tlb_kernel_range(s, e)
 #define flush_tlb_all()			local_flush_tlb_all()
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
-
+#else
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+							 unsigned long end);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+#endif /* CONFIG_SMP */
 #endif
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index 32420824375..30c9baffa96 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -43,7 +43,7 @@
  * Because it essentially checks if buffer end is within limit and @len is
  * non-ngeative, which implies that buffer start will be within limit too.
  *
- * The reason for rewriting being, for majorit yof cases, @len is generally
+ * The reason for rewriting being, for majority of cases, @len is generally
  * compile time constant, causing first sub-expression to be compile time
  * subsumed.
  *
@@ -53,7 +53,7 @@
  *
  */
 #define __user_ok(addr, sz)	(((sz) <= TASK_SIZE) && \
-				 (((addr)+(sz)) <= get_fs()))
+				 ((addr) <= (get_fs() - (sz))))
 #define __access_ok(addr, sz)	(unlikely(__kernel_ok) || \
 				 likely(__user_ok((addr), (sz))))
 
diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h
index 60702f3751d..3e5f071bc00 100644
--- a/arch/arc/include/asm/unaligned.h
+++ b/arch/arc/include/asm/unaligned.h
@@ -22,7 +22,8 @@ static inline int
 misaligned_fixup(unsigned long address, struct pt_regs *regs,
 		 struct callee_regs *cregs)
 {
-	return 0;
+	/* Not fixed */
+	return 1;
 }
 #endif
 
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index c242ef07ba7..8004b4fa646 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_KPROBES)      		+= kprobes.o
 obj-$(CONFIG_ARC_MISALIGN_ACCESS) 	+= unaligned.o
 obj-$(CONFIG_KGDB)			+= kgdb.o
 obj-$(CONFIG_ARC_METAWARE_HLINK)	+= arc_hostlink.o
+obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 obj-$(CONFIG_ARC_FPU_SAVE_RESTORE)	+= fpu.o
 CFLAGS_fpu.o   += -mdpfp
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index 34410eb1a30..c14a5bea0c7 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -17,6 +17,8 @@
 #include <asm/asm-offsets.h>
 #include <linux/sched.h>
 
+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
+
 struct task_struct *__sched
 __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 {
@@ -45,7 +47,16 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 #endif
 
 		/* set ksp of outgoing task in tsk->thread.ksp */
+#if KSP_WORD_OFF <= 255
 		"st.as   sp, [%3, %1]    \n\t"
+#else
+		/*
+		 * Workaround for NR_CPUS=4k
+		 * %1 is bigger than 255 (S9 offset for st.as)
+		 */
+		"add2    r24, %3, %1     \n\t"
+		"st      sp, [r24]       \n\t"
+#endif
 
 		"sync   \n\t"
 
@@ -97,7 +108,7 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 		/* FP/BLINK restore generated by gcc (standard func epilogue */
 
 		: "=r"(tmp)
-		: "n"((TASK_THREAD + THREAD_KSP) / 4), "r"(next), "r"(prev)
+		: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
 		: "blink"
 	);
 
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
index d8972345e4c..65690e7fcc8 100644
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -14,6 +14,8 @@
 #include <asm/asm-offsets.h>
 #include <asm/linkage.h>
 
+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
+
 ;################### Low Level Context Switch ##########################
 
 	.section .sched.text,"ax",@progbits
@@ -28,8 +30,13 @@ __switch_to:
 	SAVE_CALLEE_SAVED_KERNEL
 
 	/* Save the now KSP in task->thread.ksp */
-	st.as  sp, [r0, (TASK_THREAD + THREAD_KSP)/4]
-
+#if KSP_WORD_OFF  <= 255
+	st.as  sp, [r0, KSP_WORD_OFF]
+#else
+	/* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
+	add2	r24, r0, KSP_WORD_OFF
+	st	sp, [r24]
+#endif
 	/*
 	* Return last task in r0 (return reg)
 	* On ARC, Return reg = First Arg reg = r0.
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index 2340af0e1d6..b6dc4e21fd3 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -14,10 +14,22 @@
 #include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
-#include <asm/prom.h>
 #include <asm/clk.h>
 #include <asm/mach_desc.h>
 
+static const void * __init arch_get_next_mach(const char *const **match)
+{
+	static const struct machine_desc *mdesc = __arch_info_begin;
+	const struct machine_desc *m = mdesc;
+
+	if (m >= __arch_info_end)
+		return NULL;
+
+	mdesc++;
+	*match = m->dt_compat;
+	return m;
+}
+
 /**
  * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
  * @dt:		virtual address pointer to dt blob
@@ -25,93 +37,24 @@
  * If a dtb was passed to the kernel, then use it to choose the correct
  * machine_desc and to setup the system.
  */
-struct machine_desc * __init setup_machine_fdt(void *dt)
+const struct machine_desc * __init setup_machine_fdt(void *dt)
 {
-	struct boot_param_header *devtree = dt;
-	struct machine_desc *mdesc = NULL, *mdesc_best = NULL;
-	unsigned int score, mdesc_score = ~1;
+	const struct machine_desc *mdesc;
 	unsigned long dt_root;
-	const char *model, *compat;
 	void *clk;
-	char manufacturer[16];
 	unsigned long len;
 
-	/* check device tree validity */
-	if (be32_to_cpu(devtree->magic) != OF_DT_HEADER)
+	if (!early_init_dt_scan(dt))
 		return NULL;
 
-	initial_boot_params = devtree;
-	dt_root = of_get_flat_dt_root();
-
-	/*
-	 * The kernel could be multi-platform enabled, thus could have many
-	 * "baked-in" machine descriptors. Search thru all for the best
-	 * "compatible" string match.
-	 */
-	for_each_machine_desc(mdesc) {
-		score = of_flat_dt_match(dt_root, mdesc->dt_compat);
-		if (score > 0 && score < mdesc_score) {
-			mdesc_best = mdesc;
-			mdesc_score = score;
-		}
-	}
-	if (!mdesc_best) {
-		const char *prop;
-		long size;
-
-		pr_err("\n unrecognized device tree list:\n[ ");
-
-		prop = of_get_flat_dt_prop(dt_root, "compatible", &size);
-		if (prop) {
-			while (size > 0) {
-				printk("'%s' ", prop);
-				size -= strlen(prop) + 1;
-				prop += strlen(prop) + 1;
-			}
-		}
-		printk("]\n\n");
-
+	mdesc = of_flat_dt_match_machine(NULL, arch_get_next_mach);
+	if (!mdesc)
 		machine_halt();
-	}
-
-	/* compat = "<manufacturer>,<model>" */
-	compat =  mdesc_best->dt_compat[0];
-
-	model = strchr(compat, ',');
-	if (model)
-		model++;
-
-	strlcpy(manufacturer, compat, model ? model - compat : strlen(compat));
-
-	pr_info("Board \"%s\" from %s (Manufacturer)\n", model, manufacturer);
-
-	/* Retrieve various information from the /chosen node */
-	of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
-
-	/* Initialize {size,address}-cells info */
-	of_scan_flat_dt(early_init_dt_scan_root, NULL);
-
-	/* Setup memory, calling early_init_dt_add_memory_arch */
-	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
 
+	dt_root = of_get_flat_dt_root();
 	clk = of_get_flat_dt_prop(dt_root, "clock-frequency", &len);
 	if (clk)
 		arc_set_core_freq(of_read_ulong(clk, len/4));
 
-	return mdesc_best;
-}
-
-/*
- * Copy the flattened DT out of .init since unflattening doesn't copy strings
- * and the normal DT APIs refs them from orig flat DT
- */
-void __init copy_devtree(void)
-{
-	void *alloc = early_init_dt_alloc_memory_arch(
-			be32_to_cpu(initial_boot_params->totalsize), 64);
-	if (alloc) {
-		memcpy(alloc, initial_boot_params,
-				be32_to_cpu(initial_boot_params->totalsize));
-		initial_boot_params = alloc;
-	}
+	return mdesc;
 }
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index b908dde8a33..47d09d07f09 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -250,6 +250,14 @@ ARC_ENTRY handle_interrupt_level1
 	lr  r0, [icause1]
 	and r0, r0, 0x1f
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	; icause1 needs to be read early, before calling tracing, which
+	; can clobber scratch regs, hence use of stack to stash it
+	push r0
+	TRACE_ASM_IRQ_DISABLE
+	pop  r0
+#endif
+
 	bl.d  @arch_do_IRQ
 	mov r1, sp
 
@@ -337,9 +345,9 @@ ARC_ENTRY EV_TLBProtV
 	;  vineetg: Mar 6th: Random Seg Fault issue #1
 	;  ecr and efa were not saved in case an Intr sneaks in
 	;  after fake rtie
-	;
+
 	lr  r2, [ecr]
-	lr  r1, [efa]	; Faulting Data address
+	lr  r0, [efa]	; Faulting Data address
 
 	; --------(4) Return from CPU Exception Mode ---------
 	;  Fake a rtie, but rtie to next label
@@ -348,6 +356,8 @@ ARC_ENTRY EV_TLBProtV
 
 	FAKE_RET_FROM_EXCPN r9
 
+	mov   r1, sp
+
 	;------ (5) Type of Protection Violation? ----------
 	;
 	; ProtV Hardware Exception is triggered for Access Faults of 2 types
@@ -358,16 +368,12 @@ ARC_ENTRY EV_TLBProtV
 	bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
 
 	;========= (6a) Access Violation Processing ========
-	mov r0, sp              ; pt_regs
 	bl  do_page_fault
 	b   ret_from_exception
 
 	;========== (6b) Non aligned access ============
 4:
-	mov r0, r1
-	mov r1, sp              ; pt_regs
 
-#ifdef  CONFIG_ARC_MISALIGN_ACCESS
 	SAVE_CALLEE_SAVED_USER
 	mov r2, sp              ; callee_regs
 
@@ -376,9 +382,6 @@ ARC_ENTRY EV_TLBProtV
 	; TBD: optimize - do this only if a callee reg was involved
 	; either a dst of emulated LD/ST or src with address-writeback
 	RESTORE_CALLEE_SAVED_USER
-#else
-	bl  do_misaligned_error
-#endif
 
 	b   ret_from_exception
 
@@ -575,6 +578,7 @@ resume_user_mode_begin:
 	; --- (Slow Path #2) pending signal  ---
 	mov r0, sp	; pt_regs for arg to do_signal()/do_notify_resume()
 
+	GET_CURR_THR_INFO_FLAGS   r9
 	bbit0  r9, TIF_SIGPENDING, .Lchk_notify_resume
 
 	; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs
@@ -640,6 +644,8 @@ resume_kernel_mode:
 
 restore_regs :
 
+	TRACE_ASM_IRQ_ENABLE
+
 	lr	r10, [status32]
 
 	; Restore REG File. In case multiple Events outstanding,
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 0f944f02451..2c878e964a6 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -95,7 +95,7 @@ stext:
 ;----------------------------------------------------------------
 ;     First lines of code run by secondary before jumping to 'C'
 ;----------------------------------------------------------------
-	.section .init.text, "ax",@progbits
+	.section .text, "ax",@progbits
 	.type first_lines_of_secondary, @function
 	.globl first_lines_of_secondary
 
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 5fc92455da3..a4b141ee9a6 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -39,10 +39,14 @@ void arc_init_IRQ(void)
 	level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5;
 	level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6;
 
-	if (level_mask) {
+	/*
+	 * Write to register, even if no LV2 IRQs configured to reset it
+	 * in case bootloader had mucked with it
+	 */
+	write_aux_reg(AUX_IRQ_LEV, level_mask);
+
+	if (level_mask)
 		pr_info("Level-2 interrupts bitset %x\n", level_mask);
-		write_aux_reg(AUX_IRQ_LEV, level_mask);
-	}
 }
 
 /*
@@ -146,7 +150,7 @@ void arch_do_IRQ(unsigned int irq, struct pt_regs *regs)
 	set_irq_regs(old_regs);
 }
 
-int __init get_hw_config_num_irq(void)
+int get_hw_config_num_irq(void)
 {
 	uint32_t val = read_aux_reg(ARC_REG_VECBASE_BCR);
 
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index a7698fb1481..a2ff5c5d145 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -196,6 +196,18 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 	instruction_pointer(regs) = ip;
 }
 
+static void kgdb_call_nmi_hook(void *ignored)
+{
+	kgdb_nmicallback(raw_smp_processor_id(), NULL);
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+	local_irq_enable();
+	smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+	local_irq_disable();
+}
+
 struct kgdb_arch arch_kgdb_ops = {
 	/* breakpoint instruction: TRAP_S 0x3 */
 #ifdef CONFIG_CPU_BIG_ENDIAN
diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index 72f97822784..42b05046fad 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -87,13 +87,13 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
 
 static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
 	kcb->kprobe_status = kcb->prev_kprobe.status;
 }
 
 static inline void __kprobes set_current_kprobe(struct kprobe *p)
 {
-	__get_cpu_var(current_kprobe) = p;
+	__this_cpu_write(current_kprobe, p);
 }
 
 static void __kprobes resume_execution(struct kprobe *p, unsigned long addr,
@@ -237,7 +237,7 @@ int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
 
 		return 1;
 	} else if (kprobe_running()) {
-		p = __get_cpu_var(current_kprobe);
+		p = __this_cpu_read(current_kprobe);
 		if (p->break_handler && p->break_handler(p, regs)) {
 			setup_singlestep(p, regs);
 			kcb->kprobe_status = KPROBE_HIT_SS;
@@ -327,7 +327,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned long trapnr)
 		 */
 
 		/* We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accouting
+		 * we can also use npre/npostfault count for accounting
 		 * these specific fault cases.
 		 */
 		kprobes_inc_nmissed_count(cur);
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
new file mode 100644
index 00000000000..e46d81f7097
--- /dev/null
+++ b/arch/arc/kernel/perf_event.c
@@ -0,0 +1,326 @@
+/*
+ * Linux performance counter support for ARC700 series
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This code is inspired by the perf support of various other architectures.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <asm/arcregs.h>
+
+struct arc_pmu {
+	struct pmu	pmu;
+	int		counter_size;	/* in bits */
+	int		n_counters;
+	unsigned long	used_mask[BITS_TO_LONGS(ARC_PMU_MAX_HWEVENTS)];
+	int		ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
+};
+
+/* read counter #idx; note that counter# != event# on ARC! */
+static uint64_t arc_pmu_read_counter(int idx)
+{
+	uint32_t tmp;
+	uint64_t result;
+
+	/*
+	 * ARC supports making 'snapshots' of the counters, so we don't
+	 * need to care about counters wrapping to 0 underneath our feet
+	 */
+	write_aux_reg(ARC_REG_PCT_INDEX, idx);
+	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
+	write_aux_reg(ARC_REG_PCT_CONTROL, tmp | ARC_REG_PCT_CONTROL_SN);
+	result = (uint64_t) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32;
+	result |= read_aux_reg(ARC_REG_PCT_SNAPL);
+
+	return result;
+}
+
+static void arc_perf_event_update(struct perf_event *event,
+				  struct hw_perf_event *hwc, int idx)
+{
+	struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
+	uint64_t prev_raw_count, new_raw_count;
+	int64_t delta;
+
+	do {
+		prev_raw_count = local64_read(&hwc->prev_count);
+		new_raw_count = arc_pmu_read_counter(idx);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+				 new_raw_count) != prev_raw_count);
+
+	delta = (new_raw_count - prev_raw_count) &
+		((1ULL << arc_pmu->counter_size) - 1ULL);
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+}
+
+static void arc_pmu_read(struct perf_event *event)
+{
+	arc_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static int arc_pmu_cache_event(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	int ret;
+
+	cache_type	= (config >>  0) & 0xff;
+	cache_op	= (config >>  8) & 0xff;
+	cache_result	= (config >> 16) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+	if (cache_type >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+	if (cache_type >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ret = arc_pmu_cache_map[cache_type][cache_op][cache_result];
+
+	if (ret == CACHE_OP_UNSUPPORTED)
+		return -ENOENT;
+
+	return ret;
+}
+
+/* initializes hw_perf_event structure if event is supported */
+static int arc_pmu_event_init(struct perf_event *event)
+{
+	struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int ret;
+
+	/* ARC 700 PMU does not support sampling events */
+	if (is_sampling_event(event))
+		return -ENOENT;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		if (event->attr.config >= PERF_COUNT_HW_MAX)
+			return -ENOENT;
+		if (arc_pmu->ev_hw_idx[event->attr.config] < 0)
+			return -ENOENT;
+		hwc->config = arc_pmu->ev_hw_idx[event->attr.config];
+		pr_debug("initializing event %d with cfg %d\n",
+			 (int) event->attr.config, (int) hwc->config);
+		return 0;
+	case PERF_TYPE_HW_CACHE:
+		ret = arc_pmu_cache_event(event->attr.config);
+		if (ret < 0)
+			return ret;
+		hwc->config = arc_pmu->ev_hw_idx[ret];
+		return 0;
+	default:
+		return -ENOENT;
+	}
+}
+
+/* starts all counters */
+static void arc_pmu_enable(struct pmu *pmu)
+{
+	uint32_t tmp;
+	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
+	write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x1);
+}
+
+/* stops all counters */
+static void arc_pmu_disable(struct pmu *pmu)
+{
+	uint32_t tmp;
+	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
+	write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0);
+}
+
+/*
+ * Assigns hardware counter to hardware condition.
+ * Note that there is no separate start/stop mechanism;
+ * stopping is achieved by assigning the 'never' condition
+ */
+static void arc_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	event->hw.state = 0;
+
+	/* enable ARC pmu here */
+	write_aux_reg(ARC_REG_PCT_INDEX, idx);
+	write_aux_reg(ARC_REG_PCT_CONFIG, hwc->config);
+}
+
+static void arc_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (!(event->hw.state & PERF_HES_STOPPED)) {
+		/* stop ARC pmu here */
+		write_aux_reg(ARC_REG_PCT_INDEX, idx);
+
+		/* condition code #0 is always "never" */
+		write_aux_reg(ARC_REG_PCT_CONFIG, 0);
+
+		event->hw.state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) &&
+	    !(event->hw.state & PERF_HES_UPTODATE)) {
+		arc_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+static void arc_pmu_del(struct perf_event *event, int flags)
+{
+	struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
+
+	arc_pmu_stop(event, PERF_EF_UPDATE);
+	__clear_bit(event->hw.idx, arc_pmu->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+/* allocate hardware counter and optionally start counting */
+static int arc_pmu_add(struct perf_event *event, int flags)
+{
+	struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (__test_and_set_bit(idx, arc_pmu->used_mask)) {
+		idx = find_first_zero_bit(arc_pmu->used_mask,
+					  arc_pmu->n_counters);
+		if (idx == arc_pmu->n_counters)
+			return -EAGAIN;
+
+		__set_bit(idx, arc_pmu->used_mask);
+		hwc->idx = idx;
+	}
+
+	write_aux_reg(ARC_REG_PCT_INDEX, idx);
+	write_aux_reg(ARC_REG_PCT_CONFIG, 0);
+	write_aux_reg(ARC_REG_PCT_COUNTL, 0);
+	write_aux_reg(ARC_REG_PCT_COUNTH, 0);
+	local64_set(&hwc->prev_count, 0);
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (flags & PERF_EF_START)
+		arc_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static int arc_pmu_device_probe(struct platform_device *pdev)
+{
+	struct arc_pmu *arc_pmu;
+	struct arc_reg_pct_build pct_bcr;
+	struct arc_reg_cc_build cc_bcr;
+	int i, j, ret;
+
+	union cc_name {
+		struct {
+			uint32_t word0, word1;
+			char sentinel;
+		} indiv;
+		char str[9];
+	} cc_name;
+
+
+	READ_BCR(ARC_REG_PCT_BUILD, pct_bcr);
+	if (!pct_bcr.v) {
+		pr_err("This core does not have performance counters!\n");
+		return -ENODEV;
+	}
+
+	arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu),
+			       GFP_KERNEL);
+	if (!arc_pmu)
+		return -ENOMEM;
+
+	arc_pmu->n_counters = pct_bcr.c;
+	BUG_ON(arc_pmu->n_counters > ARC_PMU_MAX_HWEVENTS);
+
+	arc_pmu->counter_size = 32 + (pct_bcr.s << 4);
+	pr_info("ARC PMU found with %d counters of size %d bits\n",
+		arc_pmu->n_counters, arc_pmu->counter_size);
+
+	READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
+
+	if (!cc_bcr.v)
+		pr_err("Strange! Performance counters exist, but no countable conditions?\n");
+
+	pr_info("ARC PMU has %d countable conditions\n", cc_bcr.c);
+
+	cc_name.str[8] = 0;
+	for (i = 0; i < PERF_COUNT_HW_MAX; i++)
+		arc_pmu->ev_hw_idx[i] = -1;
+
+	for (j = 0; j < cc_bcr.c; j++) {
+		write_aux_reg(ARC_REG_CC_INDEX, j);
+		cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0);
+		cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1);
+		for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
+			if (arc_pmu_ev_hw_map[i] &&
+			    !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) &&
+			    strlen(arc_pmu_ev_hw_map[i])) {
+				pr_debug("mapping %d to idx %d with name %s\n",
+					 i, j, cc_name.str);
+				arc_pmu->ev_hw_idx[i] = j;
+			}
+		}
+	}
+
+	arc_pmu->pmu = (struct pmu) {
+		.pmu_enable	= arc_pmu_enable,
+		.pmu_disable	= arc_pmu_disable,
+		.event_init	= arc_pmu_event_init,
+		.add		= arc_pmu_add,
+		.del		= arc_pmu_del,
+		.start		= arc_pmu_start,
+		.stop		= arc_pmu_stop,
+		.read		= arc_pmu_read,
+	};
+
+	ret = perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
+
+	return ret;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id arc_pmu_match[] = {
+	{ .compatible = "snps,arc700-pmu" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, arc_pmu_match);
+#endif
+
+static struct platform_driver arc_pmu_driver = {
+	.driver	= {
+		.name		= "arc700-pmu",
+		.of_match_table = of_match_ptr(arc_pmu_match),
+	},
+	.probe		= arc_pmu_device_probe,
+};
+
+module_platform_driver(arc_pmu_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mischa Jonker <mjonker@synopsys.com>");
+MODULE_DESCRIPTION("ARC PMU driver");
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
index 333238564b6..5d76706139d 100644
--- a/arch/arc/kernel/ptrace.c
+++ b/arch/arc/kernel/ptrace.c
@@ -102,7 +102,7 @@ static int genregs_set(struct task_struct *target,
 	REG_IGNORE_ONE(pad2);
 	REG_IN_CHUNK(callee, efa, cregs);	/* callee_regs[r25..r13] */
 	REG_IGNORE_ONE(efa);			/* efa update invalid */
-	REG_IN_ONE(stop_pc, &ptregs->ret);	/* stop_pc: PC update */
+	REG_IGNORE_ONE(stop_pc);			/* PC updated via @ret */
 
 	return ret;
 }
diff --git a/arch/arc/kernel/reset.c b/arch/arc/kernel/reset.c
index e227a2b1c94..2768fa1e39b 100644
--- a/arch/arc/kernel/reset.c
+++ b/arch/arc/kernel/reset.c
@@ -31,3 +31,4 @@ void machine_power_off(void)
 }
 
 void (*pm_power_off) (void) = NULL;
+EXPORT_SYMBOL(pm_power_off);
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 2c68bc7e6a7..643eae4436e 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -21,7 +21,6 @@
 #include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/irq.h>
-#include <asm/prom.h>
 #include <asm/unwind.h>
 #include <asm/clk.h>
 #include <asm/mach_desc.h>
@@ -31,14 +30,13 @@
 int running_on_hw = 1;	/* vs. on ISS */
 
 char __initdata command_line[COMMAND_LINE_SIZE];
-struct machine_desc *machine_desc;
+const struct machine_desc *machine_desc;
 
 struct task_struct *_current_task[NR_CPUS];	/* For stack switching */
 
 struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
 
-
-void read_arc_build_cfg_regs(void)
+static void read_arc_build_cfg_regs(void)
 {
 	struct bcr_perip uncached_space;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
@@ -106,7 +104,7 @@ static const struct cpuinfo_data arc_cpu_tbl[] = {
 	{ {0x00, NULL		} }
 };
 
-char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
+static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	int n = 0;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
@@ -171,7 +169,7 @@ static const struct id_to_str mac_mul_nm[] = {
 	{0x6, "Dual 16x16 and 32x16"}
 };
 
-char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
+static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	int n = 0;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
@@ -234,7 +232,7 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 	return buf;
 }
 
-void arc_chk_ccms(void)
+static void arc_chk_ccms(void)
 {
 #if defined(CONFIG_ARC_HAS_DCCM) || defined(CONFIG_ARC_HAS_ICCM)
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
@@ -269,7 +267,7 @@ void arc_chk_ccms(void)
  * hardware has dedicated regs which need to be saved/restored on ctx-sw
  * (Single Precision uses core regs), thus kernel is kind of oblivious to it
  */
-void arc_chk_fpu(void)
+static void arc_chk_fpu(void)
 {
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
 
@@ -346,8 +344,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_arch_memory();
 
 	/* copy flat DT out of .init and then unflatten it */
-	copy_devtree();
-	unflatten_device_tree();
+	unflatten_and_copy_device_tree();
 
 	/* Can be issue if someone passes cmd line arg "ro"
 	 * But that is unlikely so keeping it as it is
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index ee6ef2f60a2..7e95e1a8651 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -101,7 +101,6 @@ SYSCALL_DEFINE0(rt_sigreturn)
 {
 	struct rt_sigframe __user *sf;
 	unsigned int magic;
-	int err;
 	struct pt_regs *regs = current_pt_regs();
 
 	/* Always make any pending restarted system calls return -EINTR */
@@ -119,15 +118,16 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)))
 		goto badframe;
 
-	err = restore_usr_regs(regs, sf);
-	err |= __get_user(magic, &sf->sigret_magic);
-	if (err)
+	if (__get_user(magic, &sf->sigret_magic))
 		goto badframe;
 
 	if (unlikely(is_do_ss_needed(magic)))
 		if (restore_altstack(&sf->uc.uc_stack))
 			goto badframe;
 
+	if (restore_usr_regs(regs, sf))
+		goto badframe;
+
 	/* Don't restart from sigreturn */
 	syscall_wont_restart(regs);
 
@@ -191,6 +191,15 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info,
 		return 1;
 
 	/*
+	 * w/o SA_SIGINFO, struct ucontext is partially populated (only
+	 * uc_mcontext/uc_sigmask) for kernel's normal user state preservation
+	 * during signal handler execution. This works for SA_SIGINFO as well
+	 * although the semantics are now overloaded (the same reg state can be
+	 * inspected by userland: but are they allowed to fiddle with it ?
+	 */
+	err |= stash_usr_regs(sf, regs, set);
+
+	/*
 	 * SA_SIGINFO requires 3 args to signal handler:
 	 *  #1: sig-no (common to any handler)
 	 *  #2: struct siginfo
@@ -213,14 +222,6 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info,
 		magic = MAGIC_SIGALTSTK;
 	}
 
-	/*
-	 * w/o SA_SIGINFO, struct ucontext is partially populated (only
-	 * uc_mcontext/uc_sigmask) for kernel's normal user state preservation
-	 * during signal handler execution. This works for SA_SIGINFO as well
-	 * although the semantics are now overloaded (the same reg state can be
-	 * inspected by userland: but are they allowed to fiddle with it ?
-	 */
-	err |= stash_usr_regs(sf, regs, set);
 	err |= __put_user(magic, &sf->sigret_magic);
 	if (err)
 		return err;
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index bca3052c956..c2f9ebbc38f 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -95,7 +95,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
  *        If it turns out to be elaborate, it's better to code it in assembly
  *
  */
-void __attribute__((weak)) arc_platform_smp_wait_to_boot(int cpu)
+void __weak arc_platform_smp_wait_to_boot(int cpu)
 {
 	/*
 	 * As a hack for debugging - since debugger will single-step over the
@@ -128,6 +128,7 @@ void start_kernel_secondary(void)
 	atomic_inc(&mm->mm_users);
 	atomic_inc(&mm->mm_count);
 	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
@@ -210,7 +211,6 @@ enum ipi_msg_type {
 	IPI_NOP = 0,
 	IPI_RESCHEDULE = 1,
 	IPI_CALL_FUNC,
-	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP
 };
 
@@ -254,7 +254,7 @@ void smp_send_stop(void)
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+	ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
@@ -286,10 +286,6 @@ static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi, int cpu)
 			generic_smp_call_function_interrupt();
 			break;
 
-		case IPI_CALL_FUNC_SINGLE:
-			generic_smp_call_function_single_interrupt();
-			break;
-
 		case IPI_CPU_STOP:
 			ipi_cpu_stop(cpu);
 			break;
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index f8b7d880304..9ce47cfe230 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -237,11 +237,14 @@ unsigned int get_wchan(struct task_struct *tsk)
  */
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
+	/* Assumes @tsk is sleeping so unwinds from __switch_to */
 	arc_unwind_core(tsk, NULL, __collect_all_but_sched, trace);
 }
 
 void save_stack_trace(struct stack_trace *trace)
 {
-	arc_unwind_core(current, NULL, __collect_all, trace);
+	/* Pass NULL for task so it unwinds the current call frame */
+	arc_unwind_core(NULL, NULL, __collect_all, trace);
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
 #endif
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 0e51e69cf30..e5f3a837fb3 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -63,9 +63,10 @@
 
 int arc_counter_setup(void)
 {
-	/* RTSC insn taps into cpu clk, needs no setup */
-
-	/* For SMP, only allowed if cross-core-sync, hence usable as cs */
+	/*
+	 * For SMP this needs to be 0. However Kconfig glue doesn't
+	 * enable this option for SMP configs
+	 */
 	return 1;
 }
 
@@ -206,7 +207,7 @@ static DEFINE_PER_CPU(struct clock_event_device, arc_clockevent_device) = {
 
 static irqreturn_t timer_irq_handler(int irq, void *dev_id)
 {
-	struct clock_event_device *clk = &__get_cpu_var(arc_clockevent_device);
+	struct clock_event_device *clk = this_cpu_ptr(&arc_clockevent_device);
 
 	arc_timer_event_ack(clk->mode == CLOCK_EVT_MODE_PERIODIC);
 	clk->event_handler(clk);
@@ -223,16 +224,13 @@ static struct irqaction arc_timer_irq = {
  * Setup the local event timer for @cpu
  * N.B. weak so that some exotic ARC SoCs can completely override it
  */
-void __attribute__((weak)) arc_local_timer_setup(unsigned int cpu)
+void __weak arc_local_timer_setup(unsigned int cpu)
 {
 	struct clock_event_device *clk = &per_cpu(arc_clockevent_device, cpu);
 
-	clockevents_calc_mult_shift(clk, arc_get_core_freq(), 5);
-
-	clk->max_delta_ns = clockevent_delta2ns(ARC_TIMER_MAX, clk);
 	clk->cpumask = cpumask_of(cpu);
-
-	clockevents_register_device(clk);
+	clockevents_config_and_register(clk, arc_get_core_freq(),
+					0, ARC_TIMER_MAX);
 
 	/*
 	 * setup the per-cpu timer IRQ handler - for all cpus
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index e21692d2fda..3eadfdabc32 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -84,19 +84,18 @@ DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", do_memory_error, BUS_ADRERR)
 DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
 DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
 
-#ifdef CONFIG_ARC_MISALIGN_ACCESS
 /*
  * Entry Point for Misaligned Data access Exception, for emulating in software
  */
 int do_misaligned_access(unsigned long address, struct pt_regs *regs,
 			 struct callee_regs *cregs)
 {
+	/* If emulation not enabled, or failed, kill the task */
 	if (misaligned_fixup(address, regs, cregs) != 0)
 		return do_misaligned_error(address, regs);
 
 	return 0;
 }
-#endif
 
 /*
  * Entry point for miscll errors such as Nested Exceptions
diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
index 28d17006074..7ff5b5c183b 100644
--- a/arch/arc/kernel/unaligned.c
+++ b/arch/arc/kernel/unaligned.c
@@ -245,6 +245,12 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
 		regs->status32 &= ~STATUS_DE_MASK;
 	} else {
 		regs->ret += state.instr_len;
+
+		/* handle zero-overhead-loop */
+		if ((regs->ret == regs->lp_end) && (regs->lp_count)) {
+			regs->ret = regs->lp_start;
+			regs->lp_count--;
+		}
 	}
 
 	return 0;
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 5a1259cd948..6b58c1de757 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -182,7 +182,7 @@ void arc_cache_init(void)
 
 #ifdef CONFIG_ARC_HAS_ICACHE
 	/* 1. Confirm some of I-cache params which Linux assumes */
-	if (ic->line_len != ARC_ICACHE_LINE_LEN)
+	if (ic->line_len != L1_CACHE_BYTES)
 		panic("Cache H/W doesn't match kernel Config");
 
 	if (ic->ver != CONFIG_ARC_MMU_VER)
@@ -205,7 +205,7 @@ chk_dc:
 		return;
 
 #ifdef CONFIG_ARC_HAS_DCACHE
-	if (dc->line_len != ARC_DCACHE_LINE_LEN)
+	if (dc->line_len != L1_CACHE_BYTES)
 		panic("Cache H/W doesn't match kernel Config");
 
 	/* check for D-Cache aliasing */
@@ -240,6 +240,67 @@ chk_dc:
 #define OP_INV		0x1
 #define OP_FLUSH	0x2
 #define OP_FLUSH_N_INV	0x3
+#define OP_INV_IC	0x4
+
+/*
+ * Common Helper for Line Operations on {I,D}-Cache
+ */
+static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
+				     unsigned long sz, const int cacheop)
+{
+	unsigned int aux_cmd, aux_tag;
+	int num_lines;
+	const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+	if (cacheop == OP_INV_IC) {
+		aux_cmd = ARC_REG_IC_IVIL;
+		aux_tag = ARC_REG_IC_PTAG;
+	}
+	else {
+		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+		aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+		aux_tag = ARC_REG_DC_PTAG;
+	}
+
+	/* Ensure we properly floor/ceil the non-line aligned/sized requests
+	 * and have @paddr - aligned to cache line and integral @num_lines.
+	 * This however can be avoided for page sized since:
+	 *  -@paddr will be cache-line aligned already (being page aligned)
+	 *  -@sz will be integral multiple of line size (being page sized).
+	 */
+	if (!full_page_op) {
+		sz += paddr & ~CACHE_LINE_MASK;
+		paddr &= CACHE_LINE_MASK;
+		vaddr &= CACHE_LINE_MASK;
+	}
+
+	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+#if (CONFIG_ARC_MMU_VER <= 2)
+	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
+	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
+#else
+	/* if V-P const for loop, PTAG can be written once outside loop */
+	if (full_page_op)
+		write_aux_reg(ARC_REG_DC_PTAG, paddr);
+#endif
+
+	while (num_lines-- > 0) {
+#if (CONFIG_ARC_MMU_VER > 2)
+		/* MMUv3, cache ops require paddr seperately */
+		if (!full_page_op) {
+			write_aux_reg(aux_tag, paddr);
+			paddr += L1_CACHE_BYTES;
+		}
+
+		write_aux_reg(aux_cmd, vaddr);
+		vaddr += L1_CACHE_BYTES;
+#else
+		write_aux_reg(aux, paddr);
+		paddr += L1_CACHE_BYTES;
+#endif
+	}
+}
 
 #ifdef CONFIG_ARC_HAS_DCACHE
 
@@ -289,53 +350,6 @@ static inline void __dc_entire_op(const int cacheop)
 		write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
 }
 
-/*
- * Per Line Operation on D-Cache
- * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete
- * It's sole purpose is to help gcc generate ZOL
- * (aliasing VIPT dcache flushing needs both vaddr and paddr)
- */
-static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr,
-				  unsigned long sz, const int aux_reg)
-{
-	int num_lines;
-
-	/* Ensure we properly floor/ceil the non-line aligned/sized requests
-	 * and have @paddr - aligned to cache line and integral @num_lines.
-	 * This however can be avoided for page sized since:
-	 *  -@paddr will be cache-line aligned already (being page aligned)
-	 *  -@sz will be integral multiple of line size (being page sized).
-	 */
-	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
-		sz += paddr & ~DCACHE_LINE_MASK;
-		paddr &= DCACHE_LINE_MASK;
-		vaddr &= DCACHE_LINE_MASK;
-	}
-
-	num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN);
-
-#if (CONFIG_ARC_MMU_VER <= 2)
-	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-#endif
-
-	while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-		/*
-		 * Just as for I$, in MMU v3, D$ ops also require
-		 * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops
-		 */
-		write_aux_reg(ARC_REG_DC_PTAG, paddr);
-
-		write_aux_reg(aux_reg, vaddr);
-		vaddr += ARC_DCACHE_LINE_LEN;
-#else
-		/* paddr contains stuffed vaddrs bits */
-		write_aux_reg(aux_reg, paddr);
-#endif
-		paddr += ARC_DCACHE_LINE_LEN;
-	}
-}
-
 /* For kernel mappings cache operation: index is same as paddr */
 #define __dc_line_op_k(p, sz, op)	__dc_line_op(p, p, sz, op)
 
@@ -346,7 +360,6 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
 				unsigned long sz, const int cacheop)
 {
 	unsigned long flags, tmp = tmp;
-	int aux;
 
 	local_irq_save(flags);
 
@@ -361,12 +374,7 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
 		write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
 	}
 
-	if (cacheop & OP_INV)	/* Inv / flush-n-inv use same cmd reg */
-		aux = ARC_REG_DC_IVDL;
-	else
-		aux = ARC_REG_DC_FLDL;
-
-	__dc_line_loop(paddr, vaddr, sz, aux);
+	__cache_line_loop(paddr, vaddr, sz, cacheop);
 
 	if (cacheop & OP_FLUSH)	/* flush / flush-n-inv both wait */
 		wait_for_flush();
@@ -438,42 +446,9 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
 				unsigned long sz)
 {
 	unsigned long flags;
-	int num_lines;
-
-	/*
-	 * Ensure we properly floor/ceil the non-line aligned/sized requests:
-	 * However page sized flushes can be compile time optimised.
-	 *  -@paddr will be cache-line aligned already (being page aligned)
-	 *  -@sz will be integral multiple of line size (being page sized).
-	 */
-	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
-		sz += paddr & ~ICACHE_LINE_MASK;
-		paddr &= ICACHE_LINE_MASK;
-		vaddr &= ICACHE_LINE_MASK;
-	}
-
-	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
-
-#if (CONFIG_ARC_MMU_VER <= 2)
-	/* bits 17:13 of vaddr go as bits 4:0 of paddr */
-	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-#endif
 
 	local_irq_save(flags);
-	while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-		/* tag comes from phy addr */
-		write_aux_reg(ARC_REG_IC_PTAG, paddr);
-
-		/* index bits come from vaddr */
-		write_aux_reg(ARC_REG_IC_IVIL, vaddr);
-		vaddr += ARC_ICACHE_LINE_LEN;
-#else
-		/* paddr contains stuffed vaddrs bits */
-		write_aux_reg(ARC_REG_IC_IVIL, paddr);
-#endif
-		paddr += ARC_ICACHE_LINE_LEN;
-	}
+	__cache_line_loop(paddr, vaddr, sz, OP_INV_IC);
 	local_irq_restore(flags);
 }
 
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index d63f3de0cd5..9c69552350c 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -17,7 +17,7 @@
 #include <asm/pgalloc.h>
 #include <asm/mmu.h>
 
-static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address)
+static int handle_vmalloc_fault(unsigned long address)
 {
 	/*
 	 * Synchronize this task's top level page-table
@@ -27,7 +27,7 @@ static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address)
 	pud_t *pud, *pud_k;
 	pmd_t *pmd, *pmd_k;
 
-	pgd = pgd_offset_fast(mm, address);
+	pgd = pgd_offset_fast(current->active_mm, address);
 	pgd_k = pgd_offset_k(address);
 
 	if (!pgd_present(*pgd_k))
@@ -52,7 +52,7 @@ bad_area:
 	return 1;
 }
 
-void do_page_fault(struct pt_regs *regs, unsigned long address)
+void do_page_fault(unsigned long address, struct pt_regs *regs)
 {
 	struct vm_area_struct *vma = NULL;
 	struct task_struct *tsk = current;
@@ -72,7 +72,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
 	 * nothing more.
 	 */
 	if (address >= VMALLOC_START && address <= VMALLOC_END) {
-		ret = handle_vmalloc_fault(mm, address);
+		ret = handle_vmalloc_fault(address);
 		if (unlikely(ret))
 			goto bad_area_nosemaphore;
 		else
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 81279ec73a6..55e0a85bea7 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -125,10 +125,3 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
-
-#ifdef CONFIG_OF_FLATTREE
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
-	pr_err("%s(%llx, %llx)\n", __func__, start, end);
-}
-#endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 71cb26df425..e1acf0ce564 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -100,7 +100,7 @@
 
 
 /* A copy of the ASID from the PID reg is kept in asid_cache */
-unsigned int asid_cache = MM_CTXT_FIRST_CYCLE;
+DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 
 /*
  * Utility Routine to erase a J-TLB entry
@@ -274,6 +274,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm)
 void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			   unsigned long end)
 {
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	/* If range @start to @end is more than 32 TLB entries deep,
@@ -297,9 +298,9 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
 	local_irq_save(flags);
 
-	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
 		while (start < end) {
-			tlb_entry_erase(start | hw_pid(vma->vm_mm));
+			tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu));
 			start += PAGE_SIZE;
 		}
 	}
@@ -346,6 +347,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 
 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 {
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	/* Note that it is critical that interrupts are DISABLED between
@@ -353,14 +355,87 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 	 */
 	local_irq_save(flags);
 
-	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
-		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm));
+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
+		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu));
 		utlb_invalidate();
 	}
 
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_SMP
+
+struct tlb_args {
+	struct vm_area_struct *ta_vma;
+	unsigned long ta_start;
+	unsigned long ta_end;
+};
+
+static inline void ipi_flush_tlb_page(void *arg)
+{
+	struct tlb_args *ta = arg;
+
+	local_flush_tlb_page(ta->ta_vma, ta->ta_start);
+}
+
+static inline void ipi_flush_tlb_range(void *arg)
+{
+	struct tlb_args *ta = arg;
+
+	local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+
+static inline void ipi_flush_tlb_kernel_range(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
+}
+
+void flush_tlb_all(void)
+{
+	on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm,
+			 mm, 1);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+{
+	struct tlb_args ta = {
+		.ta_vma = vma,
+		.ta_start = uaddr
+	};
+
+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	struct tlb_args ta = {
+		.ta_vma = vma,
+		.ta_start = start,
+		.ta_end = end
+	};
+
+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	struct tlb_args ta = {
+		.ta_start = start,
+		.ta_end = end
+	};
+
+	on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+}
+#endif
+
 /*
  * Routine to create a TLB entry
  */
@@ -400,7 +475,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 
 	local_irq_save(flags);
 
-	tlb_paranoid_check(vma->vm_mm->context.asid, address);
+	tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address);
 
 	address &= PAGE_MASK;
 
@@ -610,9 +685,9 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 			  struct pt_regs *regs)
 {
 	int set, way, n;
-	unsigned int pd0[4], pd1[4];	/* assume max 4 ways */
 	unsigned long flags, is_valid;
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	unsigned int pd0[mmu->ways], pd1[mmu->ways];
 
 	local_irq_save(flags);
 
@@ -637,7 +712,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 			continue;
 
 		/* Scan the set for duplicate ways: needs a nested loop */
-		for (way = 0; way < mmu->ways; way++) {
+		for (way = 0; way < mmu->ways - 1; way++) {
 			if (!pd0[way])
 				continue;
 
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index cf7d7d9ad69..3fcfdb38d24 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -369,8 +369,8 @@ do_slow_path_pf:
 	EXCEPTION_PROLOGUE
 
 	; ------- setup args for Linux Page fault Hanlder ---------
-	mov_s r0, sp
-	lr  r1, [efa]
+	mov_s r1, sp
+	lr    r0, [efa]
 
 	; We don't want exceptions to be disabled while the fault is handled.
 	; Now that we have saved the context we return from exception hence
diff --git a/arch/arc/plat-tb10x/Kconfig b/arch/arc/plat-tb10x/Kconfig
index 1ab386bb5da..6994c188dc8 100644
--- a/arch/arc/plat-tb10x/Kconfig
+++ b/arch/arc/plat-tb10x/Kconfig
@@ -20,8 +20,10 @@ menuconfig ARC_PLAT_TB10X
 	bool "Abilis TB10x"
 	select COMMON_CLK
 	select PINCTRL
+	select PINCTRL_TB10X
 	select PINMUX
 	select ARCH_REQUIRE_GPIOLIB
+	select GPIO_TB10X
 	select TB10X_IRQC
 	help
 	  Support for platforms based on the TB10x home media gateway SOC by