24 files changed, 2386 insertions, 427 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 1b73ffe746d..ed21737a00c 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -283,6 +283,17 @@ config FORCE_MAX_ZONEORDER
 	default "17" if HUGETLB_PAGE
 	default "11"
 
+config VIRT_CPU_ACCOUNTING
+	bool "Deterministic task and CPU time accounting"
+	default n
+	help
+	  Select this option to enable more accurate task and CPU time
+	  accounting.  This is done by reading a CPU counter on each
+	  kernel entry and exit and on transitions within the kernel
+	  between system, softirq and hardirq state, so there is a
+	  small performance impact.
+	  If in doubt, say N here.
+
 config SMP
 	bool "Symmetric multi-processing support"
 	help
diff --git a/arch/ia64/ia32/elfcore32.h b/arch/ia64/ia32/elfcore32.h
index 446c9aac924..9a3abf58cea 100644
--- a/arch/ia64/ia32/elfcore32.h
+++ b/arch/ia64/ia32/elfcore32.h
@@ -30,7 +30,19 @@ struct elf_siginfo
 	int	si_errno;			/* errno */
 };
 
-#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/*
+ * Hacks are here since types between compat_timeval (= pair of s32) and
+ * ia64-native timeval (= pair of s64) are not compatible, at least a file
+ * arch/ia64/ia32/../../../fs/binfmt_elf.c will get warnings from compiler on
+ * use of cputime_to_timeval(), which usually an alias of jiffies_to_timeval().
+ */
+#define cputime_to_timeval(a,b) \
+	do { (b)->tv_usec = 0; (b)->tv_sec = (a)/NSEC_PER_SEC; } while(0)
+#else
+#define jiffies_to_timeval(a,b) \
+	do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; } while(0)
+#endif
 
 struct elf_prstatus
 {
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index b1bf51fe97b..7e028ceb93b 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -38,6 +38,7 @@
 #include <linux/eventpoll.h>
 #include <linux/personality.h>
 #include <linux/ptrace.h>
+#include <linux/regset.h>
 #include <linux/stat.h>
 #include <linux/ipc.h>
 #include <linux/capability.h>
@@ -2387,16 +2388,45 @@ get_free_idx (void)
 	return -ESRCH;
 }
 
+static void set_tls_desc(struct task_struct *p, int idx,
+		const struct ia32_user_desc *info, int n)
+{
+	struct thread_struct *t = &p->thread;
+	struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
+	int cpu;
+
+	/*
+	 * We must not get preempted while modifying the TLS.
+	 */
+	cpu = get_cpu();
+
+	while (n-- > 0) {
+		if (LDT_empty(info)) {
+			desc->a = 0;
+			desc->b = 0;
+		} else {
+			desc->a = LDT_entry_a(info);
+			desc->b = LDT_entry_b(info);
+		}
+
+		++info;
+		++desc;
+	}
+
+	if (t == &current->thread)
+		load_TLS(t, cpu);
+
+	put_cpu();
+}
+
 /*
  * Set a given TLS descriptor:
  */
 asmlinkage int
 sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 {
-	struct thread_struct *t = &current->thread;
 	struct ia32_user_desc info;
-	struct desc_struct *desc;
-	int cpu, idx;
+	int idx;
 
 	if (copy_from_user(&info, u_info, sizeof(info)))
 		return -EFAULT;
@@ -2416,18 +2446,7 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	cpu = smp_processor_id();
-
-	if (LDT_empty(&info)) {
-		desc->a = 0;
-		desc->b = 0;
-	} else {
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
-	load_TLS(t, cpu);
+	set_tls_desc(current, idx, &info, 1);
 	return 0;
 }
 
@@ -2451,6 +2470,20 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 #define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
 #define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
 
+static void fill_user_desc(struct ia32_user_desc *info, int idx,
+		const struct desc_struct *desc)
+{
+	info->entry_number = idx;
+	info->base_addr = GET_BASE(desc);
+	info->limit = GET_LIMIT(desc);
+	info->seg_32bit = GET_32BIT(desc);
+	info->contents = GET_CONTENTS(desc);
+	info->read_exec_only = !GET_WRITABLE(desc);
+	info->limit_in_pages = GET_LIMIT_PAGES(desc);
+	info->seg_not_present = !GET_PRESENT(desc);
+	info->useable = GET_USEABLE(desc);
+}
+
 asmlinkage int
 sys32_get_thread_area (struct ia32_user_desc __user *u_info)
 {
@@ -2464,22 +2497,588 @@ sys32_get_thread_area (struct ia32_user_desc __user *u_info)
 		return -EINVAL;
 
 	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	info.entry_number = idx;
-	info.base_addr = GET_BASE(desc);
-	info.limit = GET_LIMIT(desc);
-	info.seg_32bit = GET_32BIT(desc);
-	info.contents = GET_CONTENTS(desc);
-	info.read_exec_only = !GET_WRITABLE(desc);
-	info.limit_in_pages = GET_LIMIT_PAGES(desc);
-	info.seg_not_present = !GET_PRESENT(desc);
-	info.useable = GET_USEABLE(desc);
+	fill_user_desc(&info, idx, desc);
 
 	if (copy_to_user(u_info, &info, sizeof(info)))
 		return -EFAULT;
 	return 0;
 }
 
+struct regset_get {
+	void *kbuf;
+	void __user *ubuf;
+};
+
+struct regset_set {
+	const void *kbuf;
+	const void __user *ubuf;
+};
+
+struct regset_getset {
+	struct task_struct *target;
+	const struct user_regset *regset;
+	union {
+		struct regset_get get;
+		struct regset_set set;
+	} u;
+	unsigned int pos;
+	unsigned int count;
+	int ret;
+};
+
+static void getfpreg(struct task_struct *task, int regno, int *val)
+{
+	switch (regno / sizeof(int)) {
+	case 0:
+		*val = task->thread.fcr & 0xffff;
+		break;
+	case 1:
+		*val = task->thread.fsr & 0xffff;
+		break;
+	case 2:
+		*val = (task->thread.fsr>>16) & 0xffff;
+		break;
+	case 3:
+		*val = task->thread.fir;
+		break;
+	case 4:
+		*val = (task->thread.fir>>32) & 0xffff;
+		break;
+	case 5:
+		*val = task->thread.fdr;
+		break;
+	case 6:
+		*val = (task->thread.fdr >> 32) & 0xffff;
+		break;
+	}
+}
+
+static void setfpreg(struct task_struct *task, int regno, int val)
+{
+	switch (regno / sizeof(int)) {
+	case 0:
+		task->thread.fcr = (task->thread.fcr & (~0x1f3f))
+			| (val & 0x1f3f);
+		break;
+	case 1:
+		task->thread.fsr = (task->thread.fsr & (~0xffff)) | val;
+		break;
+	case 2:
+		task->thread.fsr = (task->thread.fsr & (~0xffff0000))
+			| (val << 16);
+		break;
+	case 3:
+		task->thread.fir = (task->thread.fir & (~0xffffffff)) | val;
+		break;
+	case 5:
+		task->thread.fdr = (task->thread.fdr & (~0xffffffff)) | val;
+		break;
+	}
+}
+
+static void access_fpreg_ia32(int regno, void *reg,
+		struct pt_regs *pt, struct switch_stack *sw,
+		int tos, int write)
+{
+	void *f;
+
+	if ((regno += tos) >= 8)
+		regno -= 8;
+	if (regno < 4)
+		f = &pt->f8 + regno;
+	else if (regno <= 7)
+		f = &sw->f12 + (regno - 4);
+	else {
+		printk(KERN_ERR "regno must be less than 7 \n");
+		 return;
+	}
+
+	if (write)
+		memcpy(f, reg, sizeof(struct _fpreg_ia32));
+	else
+		memcpy(reg, f, sizeof(struct _fpreg_ia32));
+}
+
+static void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	int start, end, tos;
+	char buf[80];
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+	if (dst->pos < 7 * sizeof(int)) {
+		end = min((dst->pos + dst->count),
+			(unsigned int)(7 * sizeof(int)));
+		for (start = dst->pos; start < end; start += sizeof(int))
+			getfpreg(task, start, (int *)(buf + start));
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, buf,
+				0, 7 * sizeof(int));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = min(dst->pos + dst->count,
+			(unsigned int)(sizeof(struct ia32_user_i387_struct)));
+		start = (dst->pos - 7 * sizeof(int)) /
+			sizeof(struct _fpreg_ia32);
+		end = (end - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
+		for (; start < end; start++)
+			access_fpreg_ia32(start,
+				(struct _fpreg_ia32 *)buf + start,
+				pt, info->sw, tos, 0);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				buf, 7 * sizeof(int),
+				sizeof(struct ia32_user_i387_struct));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+}
+
+static void do_fpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	char buf[80];
+	int end, start, tos;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+
+	if (dst->pos < 7 * sizeof(int)) {
+		start = dst->pos;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, buf,
+				0, 7 * sizeof(int));
+		if (dst->ret)
+			return;
+		for (; start < dst->pos; start += sizeof(int))
+			setfpreg(task, start, *((int *)(buf + start)));
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
+		start = (dst->pos - 7 * sizeof(int)) /
+			sizeof(struct _fpreg_ia32);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, 7 * sizeof(int),
+				sizeof(struct ia32_user_i387_struct));
+		if (dst->ret)
+			return;
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = (dst->pos - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
+		for (; start < end; start++)
+			access_fpreg_ia32(start,
+				(struct _fpreg_ia32 *)buf + start,
+				pt, info->sw, tos, 1);
+		if (dst->count == 0)
+			return;
+	}
+}
+
+#define OFFSET(member) ((int)(offsetof(struct ia32_user_fxsr_struct, member)))
+static void getfpxreg(struct task_struct *task, int start, int end, char *buf)
+{
+	int min_val;
+
+	min_val = min(end, OFFSET(fop));
+	while (start < min_val) {
+		if (start == OFFSET(cwd))
+			*((short *)buf) = task->thread.fcr & 0xffff;
+		else if (start == OFFSET(swd))
+			*((short *)buf) = task->thread.fsr & 0xffff;
+		else if (start == OFFSET(twd))
+			*((short *)buf) = (task->thread.fsr>>16) & 0xffff;
+		buf += 2;
+		start += 2;
+	}
+	/* skip fop element */
+	if (start == OFFSET(fop)) {
+		start += 2;
+		buf += 2;
+	}
+	while (start < end) {
+		if (start == OFFSET(fip))
+			*((int *)buf) = task->thread.fir;
+		else if (start == OFFSET(fcs))
+			*((int *)buf) = (task->thread.fir>>32) & 0xffff;
+		else if (start == OFFSET(foo))
+			*((int *)buf) = task->thread.fdr;
+		else if (start == OFFSET(fos))
+			*((int *)buf) = (task->thread.fdr>>32) & 0xffff;
+		else if (start == OFFSET(mxcsr))
+			*((int *)buf) = ((task->thread.fcr>>32) & 0xff80)
+					 | ((task->thread.fsr>>32) & 0x3f);
+		buf += 4;
+		start += 4;
+	}
+}
+
+static void setfpxreg(struct task_struct *task, int start, int end, char *buf)
+{
+	int min_val, num32;
+	short num;
+	unsigned long num64;
+
+	min_val = min(end, OFFSET(fop));
+	while (start < min_val) {
+		num = *((short *)buf);
+		if (start == OFFSET(cwd)) {
+			task->thread.fcr = (task->thread.fcr & (~0x1f3f))
+						| (num & 0x1f3f);
+		} else if (start == OFFSET(swd)) {
+			task->thread.fsr = (task->thread.fsr & (~0xffff)) | num;
+		} else if (start == OFFSET(twd)) {
+			task->thread.fsr = (task->thread.fsr & (~0xffff0000))
+				| (((int)num) << 16);
+		}
+		buf += 2;
+		start += 2;
+	}
+	/* skip fop element */
+	if (start == OFFSET(fop)) {
+		start += 2;
+		buf += 2;
+	}
+	while (start < end) {
+		num32 = *((int *)buf);
+		if (start == OFFSET(fip))
+			task->thread.fir = (task->thread.fir & (~0xffffffff))
+						 | num32;
+		else if (start == OFFSET(foo))
+			task->thread.fdr = (task->thread.fdr & (~0xffffffff))
+						 | num32;
+		else if (start == OFFSET(mxcsr)) {
+			num64 = num32 & 0xff10;
+			task->thread.fcr = (task->thread.fcr &
+				(~0xff1000000000UL)) | (num64<<32);
+			num64 = num32 & 0x3f;
+			task->thread.fsr = (task->thread.fsr &
+				(~0x3f00000000UL)) | (num64<<32);
+		}
+		buf += 4;
+		start += 4;
+	}
+}
+
+static void do_fpxregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	char buf[128];
+	int start, end, tos;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+	if (dst->pos < OFFSET(st_space[0])) {
+		end = min(dst->pos + dst->count, (unsigned int)32);
+		getfpxreg(task, dst->pos, end, buf);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, buf,
+				0, OFFSET(st_space[0]));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(xmm_space[0])) {
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = min(dst->pos + dst->count,
+				(unsigned int)OFFSET(xmm_space[0]));
+		start = (dst->pos - OFFSET(st_space[0])) / 16;
+		end = (end - OFFSET(st_space[0])) / 16;
+		for (; start < end; start++)
+			access_fpreg_ia32(start, buf + 16 * start, pt,
+						info->sw, tos, 0);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(padding[0]))
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				&info->sw->f16, OFFSET(xmm_space[0]),
+				OFFSET(padding[0]));
+}
+
+static void do_fpxregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	char buf[128];
+	int start, end;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+
+	if (dst->pos < OFFSET(st_space[0])) {
+		start = dst->pos;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, 0, OFFSET(st_space[0]));
+		if (dst->ret)
+			return;
+		setfpxreg(task, start, dst->pos, buf);
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(xmm_space[0])) {
+		struct pt_regs *pt;
+		int tos;
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		start = (dst->pos - OFFSET(st_space[0])) / 16;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
+		if (dst->ret)
+			return;
+		end = (dst->pos - OFFSET(st_space[0])) / 16;
+		for (; start < end; start++)
+			access_fpreg_ia32(start, buf + 16 * start, pt, info->sw,
+						 tos, 1);
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(padding[0]))
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				&info->sw->f16, OFFSET(xmm_space[0]),
+				 OFFSET(padding[0]));
+}
+#undef OFFSET
+
+static int do_regset_call(void (*call)(struct unw_frame_info *, void *),
+		struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	struct regset_getset info = { .target = target, .regset = regset,
+		.pos = pos, .count = count,
+		.u.set = { .kbuf = kbuf, .ubuf = ubuf },
+		.ret = 0 };
+
+	if (target == current)
+		unw_init_running(call, &info);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, target);
+		(*call)(&ufi, &info);
+	}
+
+	return info.ret;
+}
+
+static int ia32_fpregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpxregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpxregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpxregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpxregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_genregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	if (kbuf) {
+		u32 *kp = kbuf;
+		while (count > 0) {
+			*kp++ = getreg(target, pos);
+			pos += 4;
+			count -= 4;
+		}
+	} else {
+		u32 __user *up = ubuf;
+		while (count > 0) {
+			if (__put_user(getreg(target, pos), up++))
+				return -EFAULT;
+			pos += 4;
+			count -= 4;
+		}
+	}
+	return 0;
+}
+
+static int ia32_genregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	int ret = 0;
+
+	if (kbuf) {
+		const u32 *kp = kbuf;
+		while (!ret && count > 0) {
+			putreg(target, pos, *kp++);
+			pos += 4;
+			count -= 4;
+		}
+	} else {
+		const u32 __user *up = ubuf;
+		u32 val;
+		while (!ret && count > 0) {
+			ret = __get_user(val, up++);
+			if (!ret)
+				putreg(target, pos, val);
+			pos += 4;
+			count -= 4;
+		}
+	}
+	return ret;
+}
+
+static int ia32_tls_active(struct task_struct *target,
+		const struct user_regset *regset)
+{
+	struct thread_struct *t = &target->thread;
+	int n = GDT_ENTRY_TLS_ENTRIES;
+	while (n > 0 && desc_empty(&t->tls_array[n -1]))
+		--n;
+	return n;
+}
+
+static int ia32_tls_get(struct task_struct *target,
+		const struct user_regset *regset, unsigned int pos,
+		unsigned int count, void *kbuf, void __user *ubuf)
+{
+	const struct desc_struct *tls;
+
+	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
+			(pos % sizeof(struct ia32_user_desc)) != 0 ||
+			(count % sizeof(struct ia32_user_desc)) != 0)
+		return -EINVAL;
+
+	pos /= sizeof(struct ia32_user_desc);
+	count /= sizeof(struct ia32_user_desc);
+
+	tls = &target->thread.tls_array[pos];
+
+	if (kbuf) {
+		struct ia32_user_desc *info = kbuf;
+		while (count-- > 0)
+			fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++,
+					tls++);
+	} else {
+		struct ia32_user_desc __user *u_info = ubuf;
+		while (count-- > 0) {
+			struct ia32_user_desc info;
+			fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++);
+			if (__copy_to_user(u_info++, &info, sizeof(info)))
+				return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static int ia32_tls_set(struct task_struct *target,
+		const struct user_regset *regset, unsigned int pos,
+		unsigned int count, const void *kbuf, const void __user *ubuf)
+{
+	struct ia32_user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
+	const struct ia32_user_desc *info;
+
+	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
+			(pos % sizeof(struct ia32_user_desc)) != 0 ||
+			(count % sizeof(struct ia32_user_desc)) != 0)
+		return -EINVAL;
+
+	if (kbuf)
+		info = kbuf;
+	else if (__copy_from_user(infobuf, ubuf, count))
+		return -EFAULT;
+	else
+		info = infobuf;
+
+	set_tls_desc(target,
+		GDT_ENTRY_TLS_MIN + (pos / sizeof(struct ia32_user_desc)),
+		info, count / sizeof(struct ia32_user_desc));
+
+	return 0;
+}
+
+/*
+ * This should match arch/i386/kernel/ptrace.c:native_regsets.
+ * XXX ioperm? vm86?
+ */
+static const struct user_regset ia32_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(struct user_regs_struct32)/4,
+		.size = 4, .align = 4,
+		.get = ia32_genregs_get, .set = ia32_genregs_set
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(struct ia32_user_i387_struct) / 4,
+		.size = 4, .align = 4,
+		.get = ia32_fpregs_get, .set = ia32_fpregs_set
+	},
+	{
+		.core_note_type = NT_PRXFPREG,
+		.n = sizeof(struct ia32_user_fxsr_struct) / 4,
+		.size = 4, .align = 4,
+		.get = ia32_fpxregs_get, .set = ia32_fpxregs_set
+	},
+	{
+		.core_note_type = NT_386_TLS,
+		.n = GDT_ENTRY_TLS_ENTRIES,
+		.bias = GDT_ENTRY_TLS_MIN,
+		.size = sizeof(struct ia32_user_desc),
+		.align = sizeof(struct ia32_user_desc),
+		.active = ia32_tls_active,
+		.get = ia32_tls_get, .set = ia32_tls_set,
+	},
+};
+
+const struct user_regset_view user_ia32_view = {
+	.name = "i386", .e_machine = EM_386,
+	.regsets = ia32_regsets, .n = ARRAY_SIZE(ia32_regsets)
+};
+
 long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, 
 			__u32 len_low, __u32 len_high, int advice)
 { 
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 78f28d825f3..c7467f863c7 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -423,6 +423,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
 #define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))
 #define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))
 static struct acpi_table_slit __initdata *slit_table;
+cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
 
 static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
 {
@@ -482,6 +483,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	    (pa->apic_id << 8) | (pa->local_sapic_eid);
 	/* nid should be overridden as logical node id later */
 	node_cpuid[srat_num_cpus].nid = pxm;
+	cpu_set(srat_num_cpus, early_cpu_possible_map);
 	srat_num_cpus++;
 }
 
@@ -559,7 +561,7 @@ void __init acpi_numa_arch_fixup(void)
 	}
 
 	/* set logical node id in cpu structure */
-	for (i = 0; i < srat_num_cpus; i++)
+	for_each_possible_early_cpu(i)
 		node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
 
 	printk(KERN_INFO "Number of logical nodes in system = %d\n",
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index f7bc40dee43..230a6f92367 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -43,6 +43,12 @@ void foo(void)
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
+	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
+	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
+	DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
+#endif
 
 	BLANK();
 
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 728d7247a1a..d45f215bc8f 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -37,6 +37,7 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mca.h>
+#include <asm/tlbflush.h>
 
 #define EFI_DEBUG	0
 
@@ -403,6 +404,41 @@ efi_get_pal_addr (void)
 	return NULL;
 }
 
+
+static u8 __init palo_checksum(u8 *buffer, u32 length)
+{
+	u8 sum = 0;
+	u8 *end = buffer + length;
+
+	while (buffer < end)
+		sum = (u8) (sum + *(buffer++));
+
+	return sum;
+}
+
+/*
+ * Parse and handle PALO table which is published at:
+ * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
+ */
+static void __init handle_palo(unsigned long palo_phys)
+{
+	struct palo_table *palo = __va(palo_phys);
+	u8  checksum;
+
+	if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
+		printk(KERN_INFO "PALO signature incorrect.\n");
+		return;
+	}
+
+	checksum = palo_checksum((u8 *)palo, palo->length);
+	if (checksum) {
+		printk(KERN_INFO "PALO checksum incorrect.\n");
+		return;
+	}
+
+	setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
+}
+
 void
 efi_map_pal_code (void)
 {
@@ -432,6 +468,7 @@ efi_init (void)
 	u64 efi_desc_size;
 	char *cp, vendor[100] = "unknown";
 	int i;
+	unsigned long palo_phys;
 
 	/*
 	 * It's too early to be able to use the standard kernel command line
@@ -496,6 +533,8 @@ efi_init (void)
 	efi.hcdp       = EFI_INVALID_TABLE_ADDR;
 	efi.uga        = EFI_INVALID_TABLE_ADDR;
 
+	palo_phys      = EFI_INVALID_TABLE_ADDR;
+
 	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
 		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
 			efi.mps = config_tables[i].table;
@@ -515,10 +554,17 @@ efi_init (void)
 		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
 			efi.hcdp = config_tables[i].table;
 			printk(" HCDP=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid,
+			 PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
+			palo_phys = config_tables[i].table;
+			printk(" PALO=0x%lx", config_tables[i].table);
 		}
 	}
 	printk("\n");
 
+	if (palo_phys != EFI_INVALID_TABLE_ADDR)
+		handle_palo(palo_phys);
+
 	runtime = __va(efi.systab->runtime);
 	efi.get_time = phys_get_time;
 	efi.set_time = phys_set_time;
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 3c331c464b4..b0be4a28017 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -710,6 +710,16 @@ ENTRY(ia64_leave_syscall)
 (pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
 #endif
 .work_processed_syscall:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	adds r2=PT(LOADRS)+16,r12
+(pUStk)	mov.m r22=ar.itc			// fetch time at leave
+	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
+	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
+	adds r3=PT(AR_BSPSTORE)+16,r12		// deferred
+	;;
+#else
 	adds r2=PT(LOADRS)+16,r12
 	adds r3=PT(AR_BSPSTORE)+16,r12
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -718,6 +728,7 @@ ENTRY(ia64_leave_syscall)
 	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
 	nop.i 0
 	;;
+#endif
 	mov r16=ar.bsp				// M2  get existing backing store pointer
 	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
 (p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
@@ -737,12 +748,21 @@ ENTRY(ia64_leave_syscall)
 
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
 	ld8 r28=[r3],16		// M0|1 load cr.iip
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r30=[r2],16		// M0|1 load cr.ifs
+	ld8 r25=[r3],16		// M0|1 load ar.unat
+(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+#else
 	mov r22=r0		// A    clear r22
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
 	ld8 r25=[r3],16		// M0|1 load ar.unat
 (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
 	;;
+#endif
 	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
 (pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
 	nop 0
@@ -759,7 +779,11 @@ ENTRY(ia64_leave_syscall)
 	ld8.fill r1=[r3],16			// M0|1 load r1
 (pUStk) mov r17=1				// A
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) st1 [r15]=r17				// M2|3
+#else
 (pUStk) st1 [r14]=r17				// M2|3
+#endif
 	ld8.fill r13=[r3],16			// M0|1
 	mov f8=f0				// F    clear f8
 	;;
@@ -775,12 +799,22 @@ ENTRY(ia64_leave_syscall)
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
 	cover				// B    add current frame into dirty partition & set cr.ifs
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov r19=ar.bsp			// M2   get new backing store pointer
+	st8 [r14]=r22			// M	save time at leave
+	mov f10=f0			// F    clear f10
+
+	mov r22=r0			// A	clear r22
+	movl r14=__kernel_syscall_via_epc // X
+	;;
+#else
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	mov f10=f0			// F    clear f10
 
 	nop.m 0
 	movl r14=__kernel_syscall_via_epc // X
 	;;
+#endif
 	mov.m ar.csd=r0			// M2   clear ar.csd
 	mov.m ar.ccv=r0			// M2   clear ar.ccv
 	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
@@ -913,10 +947,18 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	.pred.rel.mutex pUStk,pKStk
+(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+(pUStk)	mov.m r22=ar.itc	// M  fetch time at leave
+	nop.i 0
+	;;
+#else
 (pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
 	nop.i 0
 	nop.i 0
 	;;
+#endif
 	ld8 r29=[r16],16	// load cr.ipsr
 	ld8 r28=[r17],16	// load cr.iip
 	;;
@@ -938,15 +980,37 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
+#else
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+#endif
 	;;
 	ld8 r20=[r16],16	// ar.fpsr
 	ld8.fill r15=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
+#endif
 	;;
 	ld8.fill r14=[r16],16
 	ld8.fill r2=[r17]
 (pUStk)	mov r17=1
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
+	//  mib  :  mov add br        ->  mib  : ld8 add br
+	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
+	//
+	//  no one require bsp in r16 if (pKStk) branch is selected.
+(pUStk)	st8 [r3]=r22		// save time at leave
+(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
+	;;
+	ld8.fill r3=[r16]	// deferred
+	LOAD_PHYS_STACK_REG_SIZE(r17)
+(pKStk)	br.cond.dpnt skip_rbs_switch
+	mov r16=ar.bsp		// get existing backing store pointer
+#else
 	ld8.fill r3=[r16]
 (pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
 	shr.u r18=r19,16	// get byte size of existing "dirty" partition
@@ -954,6 +1018,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	mov r16=ar.bsp		// get existing backing store pointer
 	LOAD_PHYS_STACK_REG_SIZE(r17)
 (pKStk)	br.cond.dpnt skip_rbs_switch
+#endif
 
 	/*
 	 * Restore user backing store.
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 3f570e6fcd9..c1625c7e177 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -236,27 +236,25 @@ ENTRY(fsys_gettimeofday)
 	// Note that instructions are optimized for McKinley. McKinley can
 	// process two bundles simultaneously and therefore we continuously
 	// try to feed the CPU two bundles and then a stop.
-	//
-	// Additional note that code has changed a lot. Optimization is TBD.
-	// Comments begin with "?" are maybe outdated.
-	tnat.nz p6,p0 = r31	// ? branch deferred to fit later bundle
-	mov pr = r30,0xc000	// Set predicates according to function
+
 	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+	tnat.nz p6,p0 = r31		// guard against Nat argument
+(p6)	br.cond.spnt.few .fail_einval
 	movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
 	;;
+	ld4 r2 = [r2]			// process work pending flags
 	movl r29 = itc_jitter_data	// itc_jitter
 	add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20	// wall_time
-	ld4 r2 = [r2]		// process work pending flags
-	;;
-(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
 	add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
-	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+	mov pr = r30,0xc000	// Set predicates according to function
+	;;
 	and r2 = TIF_ALLWORK_MASK,r2
-(p6)    br.cond.spnt.few .fail_einval	// ? deferred branch
+	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
 	;;
-	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
+	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20	// clksrc_cycle_last
 	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
-(p6)    br.cond.spnt.many fsys_fallback_syscall
+(p6)	br.cond.spnt.many fsys_fallback_syscall
 	;;
 	// Begin critical section
 .time_redo:
@@ -284,7 +282,6 @@ ENTRY(fsys_gettimeofday)
 (p8)	mov r2 = ar.itc		// CPU_TIMER. 36 clocks latency!!!
 (p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
 (p13)	ld8 r25 = [r19]		// get itc_lastcycle value
-	;;		// ? could be removed by moving the last add upward
 	ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_sec
 	;;
 	ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_nsec
@@ -311,13 +308,12 @@ ENTRY(fsys_gettimeofday)
 EX(.fail_efault, probe.w.fault r31, 3)
 	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
 	;;
-	// ? simulate tbit.nz.or p7,p0 = r28,0
 	getf.sig r2 = f8
 	mf
 	;;
 	ld4 r10 = [r20]		// gtod_lock.sequence
 	shr.u r2 = r2,r23	// shift by factor
-	;;		// ? overloaded 3 bundles!
+	;;
 	add r8 = r8,r2		// Add xtime.nsecs
 	cmp4.ne p7,p0 = r28,r10
 (p7)	br.cond.dpnt.few .time_redo	// sequence number changed, redo
@@ -345,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
 EX(.fail_efault, probe.w.fault r23, 3)	// This also costs 5 cycles
 (p14)	xmpy.hu f8 = f8, f7		// xmpy has 5 cycles latency so use it
 	;;
-	mov r8 = r0
 (p14)	getf.sig r2 = f8
 	;;
+	mov r8 = r0
 (p14)	shr.u r21 = r2, 4
 	;;
 EX(.fail_efault, st8 [r31] = r9)
@@ -686,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	nop.i 0
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov.m r30=ar.itc			// M    get cycle for accounting
+#else
 	nop.m 0
+#endif
 	nop.i 0
 	;;
 	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
@@ -708,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at leave kernel
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r30,r19				// elapsed time in user mode
+	;;
+	add r20=r20,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r20				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	mov rp=r14				// I0   set the real return addr
 	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index d3a41d5f8d1..ddeab4e36fd 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1002,6 +1002,26 @@ GLOBAL_ENTRY(sched_clock)
 	br.ret.sptk.many rp
 END(sched_clock)
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+GLOBAL_ENTRY(cycle_to_cputime)
+	alloc r16=ar.pfs,1,0,0,0
+	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	;;
+	ldf8 f8=[r8]
+	;;
+	setf.sig f9=r32
+	;;
+	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
+	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
+	;;
+	getf.sig r8=f10		//						(5 cyc)
+	getf.sig r9=f11
+	;;
+	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+	br.ret.sptk.many rp
+END(cycle_to_cputime)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
 GLOBAL_ENTRY(start_kernel_thread)
 	.prologue
 	.save rp, r0				// this is the end of the call-chain
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 34f44d8be00..6678c49daba 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -805,8 +805,13 @@ ENTRY(break_fault)
 
 (p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
 (p9)	adds r8=1,r8				// A    increment ei to next slot
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	;;
+	mov b6=r30				// I0   setup syscall handler branch reg early
+#else
 	nop.i 0
 	;;
+#endif
 
 	mov.m r25=ar.unat			// M2 (5 cyc)
 	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
@@ -817,7 +822,11 @@ ENTRY(break_fault)
 	//
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov.m r30=ar.itc			// M    get cycle for accounting
+#else
 	mov b6=r30				// I0   setup syscall handler branch reg early
+#endif
 	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
 
 	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
@@ -829,6 +838,30 @@ ENTRY(break_fault)
 	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 1:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
+(pKStk)	br.cond.spnt .skip_accounting		// B	unlikely skip
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// M  get last stamp
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// M  time at leave
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// M  cumulated stime
+	ld8 r21=[r17]				// M  cumulated utime
+	sub r22=r19,r18				// A  stime before leave
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// M  update stamp
+	sub r18=r30,r19				// A  elapsed time in user
+	;;
+	add r20=r20,r22				// A  sum stime
+	add r21=r21,r18				// A  sum utime
+	;;
+	st8 [r16]=r20				// M  update stime
+	st8 [r17]=r21				// M  update utime
+	;;
+.skip_accounting:
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	nop 0
 	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
@@ -928,6 +961,7 @@ END(interrupt)
 	 *	- r27: saved ar.rsc
 	 *	- r28: saved cr.iip
 	 *	- r29: saved cr.ipsr
+	 *	- r30: ar.itc for accounting (don't touch)
 	 *	- r31: saved pr
 	 *	-  b0: original contents (to be saved)
 	 * On exit:
@@ -1090,6 +1124,41 @@ END(dispatch_illegal_op_fault)
 	DBG_FAULT(16)
 	FAULT(16)
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	/*
+	 * There is no particular reason for this code to be here, other than
+	 * that there happens to be space here that would go unused otherwise.
+	 * If this fault ever gets "unreserved", simply moved the following
+	 * code to a more suitable spot...
+	 *
+	 * account_sys_enter is called from SAVE_MIN* macros if accounting is
+	 * enabled and if the macro is entered from user mode.
+	 */
+ENTRY(account_sys_enter)
+	// mov.m r20=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at left from kernel
+        ;;
+	ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r20,r19				// elapsed time in user mode
+	;;
+	add r23=r23,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r23				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+	br.ret.sptk.many rp
+END(account_sys_enter)
+#endif
+
 	.org ia64_ivt+0x4400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4400 Entry 17 (size 64 bundles) Reserved
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 1ae51291087..e51bced3b0f 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -98,6 +98,7 @@
 
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
+#include <asm/tlb.h>
 
 #include "mca_drv.h"
 #include "entry.h"
@@ -113,6 +114,7 @@ DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
 DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
 DEFINE_PER_CPU(u64, ia64_mca_pal_pte);	    /* PTE to map PAL code */
 DEFINE_PER_CPU(u64, ia64_mca_pal_base);    /* vaddr PAL code granule */
+DEFINE_PER_CPU(u64, ia64_mca_tr_reload);   /* Flag for TR reload */
 
 unsigned long __per_cpu_mca[NR_CPUS];
 
@@ -1184,6 +1186,49 @@ all_in:
 	return;
 }
 
+/*  mca_insert_tr
+ *
+ *  Switch rid when TR reload and needed!
+ *  iord: 1: itr, 2: itr;
+ *
+*/
+static void mca_insert_tr(u64 iord)
+{
+
+	int i;
+	u64 old_rr;
+	struct ia64_tr_entry *p;
+	unsigned long psr;
+	int cpu = smp_processor_id();
+
+	psr = ia64_clear_ic();
+	for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
+		p = &__per_cpu_idtrs[cpu][iord-1][i];
+		if (p->pte & 0x1) {
+			old_rr = ia64_get_rr(p->ifa);
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, p->rr);
+				ia64_srlz_d();
+			}
+			ia64_ptr(iord, p->ifa, p->itir >> 2);
+			ia64_srlz_i();
+			if (iord & 0x1) {
+				ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (iord & 0x2) {
+				ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, old_rr);
+				ia64_srlz_d();
+			}
+		}
+	}
+	ia64_set_psr(psr);
+}
+
 /*
  * ia64_mca_handler
  *
@@ -1270,6 +1315,11 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 		ia64_mlogbuf_finish(1);
 	}
 
+	if (__get_cpu_var(ia64_mca_tr_reload)) {
+		mca_insert_tr(0x1); /*Reload dynamic itrs*/
+		mca_insert_tr(0x2); /*Reload dynamic itrs*/
+	}
+
 	if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
 			== NOTIFY_STOP)
 		ia64_mca_spin(__func__);
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 8bc7d259e0c..a06d46548ff 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -219,8 +219,13 @@ ia64_reload_tr:
 	mov r20=IA64_TR_CURRENT_STACK
 	;;
 	itr.d dtr[r20]=r16
+	GET_THIS_PADDR(r2, ia64_mca_tr_reload)
+	mov r18 = 1
 	;;
 	srlz.d
+	;;
+	st8 [r2] =r18
+	;;
 
 done_tlb_purge_and_reload:
 
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index c9ac8bada78..7c548ac52bb 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -3,6 +3,18 @@
 
 #include "entry.h"
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define ACCOUNT_GET_STAMP				\
+(pUStk) mov.m r20=ar.itc;
+#define ACCOUNT_SYS_ENTER				\
+(pUStk) br.call.spnt rp=account_sys_enter		\
+	;;
+#else
+#define ACCOUNT_GET_STAMP
+#define ACCOUNT_SYS_ENTER
+#endif
+
 /*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
@@ -122,11 +134,13 @@
 	;;											\
 .mem.offset 0,0; st8.spill [r16]=r2,16;								\
 .mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	ACCOUNT_GET_STAMP									\
 	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
 	;;											\
 	EXTRA;											\
 	movl r1=__gp;		/* establish kernel global pointer */				\
 	;;											\
+	ACCOUNT_SYS_ENTER									\
 	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
 	;;
 
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index a78b45f5fe2..c93420c9740 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void)
 	for(node=0; node < MAX_NUMNODES; node++)
 		cpus_clear(node_to_cpu_mask[node]);
 
-	for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+	for_each_possible_early_cpu(cpu) {
 		node = -1;
 		for (i = 0; i < NR_CPUS; ++i)
 			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index 2cb9425e042..e0dca8743db 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -135,10 +135,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 
 	while (offp < (s32 *) end) {
 		wp = (u64 *) ia64_imva((char *) offp + *offp);
-		wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
-		wp[1] = 0x0004000000000200UL;
-		wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
-		wp[3] = 0x0084006880000200UL;
+		wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+		wp[1] = 0x0084006880000200UL;
+		wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+		wp[3] = 0x0004000000000200UL;
 		ia64_fc(wp); ia64_fc(wp + 2);
 		++offp;
 	}
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 49937a383b2..a5ea817cbcb 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -625,21 +625,6 @@ do_dump_fpu (struct unw_frame_info *info, void *arg)
 	do_dump_task_fpu(current, info, arg);
 }
 
-int
-dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
-{
-	struct unw_frame_info tcore_info;
-
-	if (current == task) {
-		unw_init_running(do_copy_regs, regs);
-	} else {
-		memset(&tcore_info, 0, sizeof(tcore_info));
-		unw_init_from_blocked_task(&tcore_info, task);
-		do_copy_task_regs(task, &tcore_info, regs);
-	}
-	return 1;
-}
-
 void
 ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
 {
@@ -647,21 +632,6 @@ ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
 }
 
 int
-dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
-{
-	struct unw_frame_info tcore_info;
-
-	if (current == task) {
-		unw_init_running(do_dump_fpu, dst);
-	} else {
-		memset(&tcore_info, 0, sizeof(tcore_info));
-		unw_init_from_blocked_task(&tcore_info, task);
-		do_dump_task_fpu(task, &tcore_info, dst);
-	}
-	return 1;
-}
-
-int
 dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
 {
 	unw_init_running(do_dump_fpu, dst);
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index ab784ec4319..2a9943b5947 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -3,6 +3,9 @@
  *
  * Copyright (C) 1999-2005 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2006 Intel Co
+ *  2006-08-12	- IA64 Native Utrace implementation support added by
+ *	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  *
  * Derived from the x86 and Alpha versions.
  */
@@ -17,6 +20,8 @@
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -740,25 +745,6 @@ ia64_sync_fph (struct task_struct *task)
 	psr->dfh = 1;
 }
 
-static int
-access_fr (struct unw_frame_info *info, int regnum, int hi,
-	   unsigned long *data, int write_access)
-{
-	struct ia64_fpreg fpval;
-	int ret;
-
-	ret = unw_get_fr(info, regnum, &fpval);
-	if (ret < 0)
-		return ret;
-
-	if (write_access) {
-		fpval.u.bits[hi] = *data;
-		ret = unw_set_fr(info, regnum, fpval);
-	} else
-		*data = fpval.u.bits[hi];
-	return ret;
-}
-
 /*
  * Change the machine-state of CHILD such that it will return via the normal
  * kernel exit-path, rather than the syscall-exit path.
@@ -860,309 +846,7 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt,
 
 static int
 access_uarea (struct task_struct *child, unsigned long addr,
-	      unsigned long *data, int write_access)
-{
-	unsigned long *ptr, regnum, urbs_end, cfm;
-	struct switch_stack *sw;
-	struct pt_regs *pt;
-#	define pt_reg_addr(pt, reg)	((void *)			    \
-					 ((unsigned long) (pt)		    \
-					  + offsetof(struct pt_regs, reg)))
-
-
-	pt = task_pt_regs(child);
-	sw = (struct switch_stack *) (child->thread.ksp + 16);
-
-	if ((addr & 0x7) != 0) {
-		dprintk("ptrace: unaligned register address 0x%lx\n", addr);
-		return -1;
-	}
-
-	if (addr < PT_F127 + 16) {
-		/* accessing fph */
-		if (write_access)
-			ia64_sync_fph(child);
-		else
-			ia64_flush_fph(child);
-		ptr = (unsigned long *)
-			((unsigned long) &child->thread.fph + addr);
-	} else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) {
-		/* scratch registers untouched by kernel (saved in pt_regs) */
-		ptr = pt_reg_addr(pt, f10) + (addr - PT_F10);
-	} else if (addr >= PT_F12 && addr < PT_F15 + 16) {
-		/*
-		 * Scratch registers untouched by kernel (saved in
-		 * switch_stack).
-		 */
-		ptr = (unsigned long *) ((long) sw
-					 + (addr - PT_NAT_BITS - 32));
-	} else if (addr < PT_AR_LC + 8) {
-		/* preserved state: */
-		struct unw_frame_info info;
-		char nat = 0;
-		int ret;
-
-		unw_init_from_blocked_task(&info, child);
-		if (unw_unwind_to_user(&info) < 0)
-			return -1;
-
-		switch (addr) {
-		      case PT_NAT_BITS:
-			return access_nat_bits(child, pt, &info,
-					       data, write_access);
-
-		      case PT_R4: case PT_R5: case PT_R6: case PT_R7:
-			if (write_access) {
-				/* read NaT bit first: */
-				unsigned long dummy;
-
-				ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4,
-						 &dummy, &nat);
-				if (ret < 0)
-					return ret;
-			}
-			return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data,
-					     &nat, write_access);
-
-		      case PT_B1: case PT_B2: case PT_B3:
-		      case PT_B4: case PT_B5:
-			return unw_access_br(&info, (addr - PT_B1)/8 + 1, data,
-					     write_access);
-
-		      case PT_AR_EC:
-			return unw_access_ar(&info, UNW_AR_EC, data,
-					     write_access);
-
-		      case PT_AR_LC:
-			return unw_access_ar(&info, UNW_AR_LC, data,
-					     write_access);
-
-		      default:
-			if (addr >= PT_F2 && addr < PT_F5 + 16)
-				return access_fr(&info, (addr - PT_F2)/16 + 2,
-						 (addr & 8) != 0, data,
-						 write_access);
-			else if (addr >= PT_F16 && addr < PT_F31 + 16)
-				return access_fr(&info,
-						 (addr - PT_F16)/16 + 16,
-						 (addr & 8) != 0,
-						 data, write_access);
-			else {
-				dprintk("ptrace: rejecting access to register "
-					"address 0x%lx\n", addr);
-				return -1;
-			}
-		}
-	} else if (addr < PT_F9+16) {
-		/* scratch state */
-		switch (addr) {
-		      case PT_AR_BSP:
-			/*
-			 * By convention, we use PT_AR_BSP to refer to
-			 * the end of the user-level backing store.
-			 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
-			 * to get the real value of ar.bsp at the time
-			 * the kernel was entered.
-			 *
-			 * Furthermore, when changing the contents of
-			 * PT_AR_BSP (or PT_CFM) while the task is
-			 * blocked in a system call, convert the state
-			 * so that the non-system-call exit
-			 * path is used.  This ensures that the proper
-			 * state will be picked up when resuming
-			 * execution.  However, it *also* means that
-			 * once we write PT_AR_BSP/PT_CFM, it won't be
-			 * possible to modify the syscall arguments of
-			 * the pending system call any longer.  This
-			 * shouldn't be an issue because modifying
-			 * PT_AR_BSP/PT_CFM generally implies that
-			 * we're either abandoning the pending system
-			 * call or that we defer it's re-execution
-			 * (e.g., due to GDB doing an inferior
-			 * function call).
-			 */
-			urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
-			if (write_access) {
-				if (*data != urbs_end) {
-					if (in_syscall(pt))
-						convert_to_non_syscall(child,
-								       pt,
-								       cfm);
-					/*
-					 * Simulate user-level write
-					 * of ar.bsp:
-					 */
-					pt->loadrs = 0;
-					pt->ar_bspstore = *data;
-				}
-			} else
-				*data = urbs_end;
-			return 0;
-
-		      case PT_CFM:
-			urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
-			if (write_access) {
-				if (((cfm ^ *data) & PFM_MASK) != 0) {
-					if (in_syscall(pt))
-						convert_to_non_syscall(child,
-								       pt,
-								       cfm);
-					pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
-						      | (*data & PFM_MASK));
-				}
-			} else
-				*data = cfm;
-			return 0;
-
-		      case PT_CR_IPSR:
-			if (write_access) {
-				unsigned long tmp = *data;
-				/* psr.ri==3 is a reserved value: SDM 2:25 */
-				if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
-					tmp &= ~IA64_PSR_RI;
-				pt->cr_ipsr = ((tmp & IPSR_MASK)
-					       | (pt->cr_ipsr & ~IPSR_MASK));
-			} else
-				*data = (pt->cr_ipsr & IPSR_MASK);
-			return 0;
-
-		      case PT_AR_RSC:
-			if (write_access)
-				pt->ar_rsc = *data | (3 << 2); /* force PL3 */
-			else
-				*data = pt->ar_rsc;
-			return 0;
-
-		      case PT_AR_RNAT:
-			ptr = pt_reg_addr(pt, ar_rnat);
-			break;
-		      case PT_R1:
-			ptr = pt_reg_addr(pt, r1);
-			break;
-		      case PT_R2:  case PT_R3:
-			ptr = pt_reg_addr(pt, r2) + (addr - PT_R2);
-			break;
-		      case PT_R8:  case PT_R9:  case PT_R10: case PT_R11:
-			ptr = pt_reg_addr(pt, r8) + (addr - PT_R8);
-			break;
-		      case PT_R12: case PT_R13:
-			ptr = pt_reg_addr(pt, r12) + (addr - PT_R12);
-			break;
-		      case PT_R14:
-			ptr = pt_reg_addr(pt, r14);
-			break;
-		      case PT_R15:
-			ptr = pt_reg_addr(pt, r15);
-			break;
-		      case PT_R16: case PT_R17: case PT_R18: case PT_R19:
-		      case PT_R20: case PT_R21: case PT_R22: case PT_R23:
-		      case PT_R24: case PT_R25: case PT_R26: case PT_R27:
-		      case PT_R28: case PT_R29: case PT_R30: case PT_R31:
-			ptr = pt_reg_addr(pt, r16) + (addr - PT_R16);
-			break;
-		      case PT_B0:
-			ptr = pt_reg_addr(pt, b0);
-			break;
-		      case PT_B6:
-			ptr = pt_reg_addr(pt, b6);
-			break;
-		      case PT_B7:
-			ptr = pt_reg_addr(pt, b7);
-			break;
-		      case PT_F6:  case PT_F6+8: case PT_F7: case PT_F7+8:
-		      case PT_F8:  case PT_F8+8: case PT_F9: case PT_F9+8:
-			ptr = pt_reg_addr(pt, f6) + (addr - PT_F6);
-			break;
-		      case PT_AR_BSPSTORE:
-			ptr = pt_reg_addr(pt, ar_bspstore);
-			break;
-		      case PT_AR_UNAT:
-			ptr = pt_reg_addr(pt, ar_unat);
-			break;
-		      case PT_AR_PFS:
-			ptr = pt_reg_addr(pt, ar_pfs);
-			break;
-		      case PT_AR_CCV:
-			ptr = pt_reg_addr(pt, ar_ccv);
-			break;
-		      case PT_AR_FPSR:
-			ptr = pt_reg_addr(pt, ar_fpsr);
-			break;
-		      case PT_CR_IIP:
-			ptr = pt_reg_addr(pt, cr_iip);
-			break;
-		      case PT_PR:
-			ptr = pt_reg_addr(pt, pr);
-			break;
-			/* scratch register */
-
-		      default:
-			/* disallow accessing anything else... */
-			dprintk("ptrace: rejecting access to register "
-				"address 0x%lx\n", addr);
-			return -1;
-		}
-	} else if (addr <= PT_AR_SSD) {
-		ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD);
-	} else {
-		/* access debug registers */
-
-		if (addr >= PT_IBR) {
-			regnum = (addr - PT_IBR) >> 3;
-			ptr = &child->thread.ibr[0];
-		} else {
-			regnum = (addr - PT_DBR) >> 3;
-			ptr = &child->thread.dbr[0];
-		}
-
-		if (regnum >= 8) {
-			dprintk("ptrace: rejecting access to register "
-				"address 0x%lx\n", addr);
-			return -1;
-		}
-#ifdef CONFIG_PERFMON
-		/*
-		 * Check if debug registers are used by perfmon. This
-		 * test must be done once we know that we can do the
-		 * operation, i.e. the arguments are all valid, but
-		 * before we start modifying the state.
-		 *
-		 * Perfmon needs to keep a count of how many processes
-		 * are trying to modify the debug registers for system
-		 * wide monitoring sessions.
-		 *
-		 * We also include read access here, because they may
-		 * cause the PMU-installed debug register state
-		 * (dbr[], ibr[]) to be reset. The two arrays are also
-		 * used by perfmon, but we do not use
-		 * IA64_THREAD_DBG_VALID. The registers are restored
-		 * by the PMU context switch code.
-		 */
-		if (pfm_use_debug_registers(child)) return -1;
-#endif
-
-		if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
-			child->thread.flags |= IA64_THREAD_DBG_VALID;
-			memset(child->thread.dbr, 0,
-			       sizeof(child->thread.dbr));
-			memset(child->thread.ibr, 0,
-			       sizeof(child->thread.ibr));
-		}
-
-		ptr += regnum;
-
-		if ((regnum & 1) && write_access) {
-			/* don't let the user set kernel-level breakpoints: */
-			*ptr = *data & ~(7UL << 56);
-			return 0;
-		}
-	}
-	if (write_access)
-		*ptr = *data;
-	else
-		*data = *ptr;
-	return 0;
-}
+	      unsigned long *data, int write_access);
 
 static long
 ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
@@ -1626,3 +1310,892 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
 	if (test_thread_flag(TIF_RESTORE_RSE))
 		ia64_sync_krbs();
 }
+
+/* Utrace implementation starts here */
+struct regset_get {
+	void *kbuf;
+	void __user *ubuf;
+};
+
+struct regset_set {
+	const void *kbuf;
+	const void __user *ubuf;
+};
+
+struct regset_getset {
+	struct task_struct *target;
+	const struct user_regset *regset;
+	union {
+		struct regset_get get;
+		struct regset_set set;
+	} u;
+	unsigned int pos;
+	unsigned int count;
+	int ret;
+};
+
+static int
+access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+	int ret;
+	char nat = 0;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_GR_OFFSET(1):
+		ptr = &pt->r1;
+		break;
+	case ELF_GR_OFFSET(2):
+	case ELF_GR_OFFSET(3):
+		ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2));
+		break;
+	case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7):
+		if (write_access) {
+			/* read NaT bit first: */
+			unsigned long dummy;
+
+			ret = unw_get_gr(info, addr/8, &dummy, &nat);
+			if (ret < 0)
+				return ret;
+		}
+		return unw_access_gr(info, addr/8, data, &nat, write_access);
+	case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11):
+		ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8);
+		break;
+	case ELF_GR_OFFSET(12):
+	case ELF_GR_OFFSET(13):
+		ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12);
+		break;
+	case ELF_GR_OFFSET(14):
+		ptr = &pt->r14;
+		break;
+	case ELF_GR_OFFSET(15):
+		ptr = &pt->r15;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static int
+access_elf_breg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_BR_OFFSET(0):
+		ptr = &pt->b0;
+		break;
+	case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5):
+		return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8,
+				     data, write_access);
+	case ELF_BR_OFFSET(6):
+		ptr = &pt->b6;
+		break;
+	case ELF_BR_OFFSET(7):
+		ptr = &pt->b7;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static int
+access_elf_areg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long cfm, urbs_end;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) {
+		switch (addr) {
+		case ELF_AR_RSC_OFFSET:
+			/* force PL3 */
+			if (write_access)
+				pt->ar_rsc = *data | (3 << 2);
+			else
+				*data = pt->ar_rsc;
+			return 0;
+		case ELF_AR_BSP_OFFSET:
+			/*
+			 * By convention, we use PT_AR_BSP to refer to
+			 * the end of the user-level backing store.
+			 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
+			 * to get the real value of ar.bsp at the time
+			 * the kernel was entered.
+			 *
+			 * Furthermore, when changing the contents of
+			 * PT_AR_BSP (or PT_CFM) while the task is
+			 * blocked in a system call, convert the state
+			 * so that the non-system-call exit
+			 * path is used.  This ensures that the proper
+			 * state will be picked up when resuming
+			 * execution.  However, it *also* means that
+			 * once we write PT_AR_BSP/PT_CFM, it won't be
+			 * possible to modify the syscall arguments of
+			 * the pending system call any longer.  This
+			 * shouldn't be an issue because modifying
+			 * PT_AR_BSP/PT_CFM generally implies that
+			 * we're either abandoning the pending system
+			 * call or that we defer it's re-execution
+			 * (e.g., due to GDB doing an inferior
+			 * function call).
+			 */
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (*data != urbs_end) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					/*
+					 * Simulate user-level write
+					 * of ar.bsp:
+					 */
+					pt->loadrs = 0;
+					pt->ar_bspstore = *data;
+				}
+			} else
+				*data = urbs_end;
+			return 0;
+		case ELF_AR_BSPSTORE_OFFSET:
+			ptr = &pt->ar_bspstore;
+			break;
+		case ELF_AR_RNAT_OFFSET:
+			ptr = &pt->ar_rnat;
+			break;
+		case ELF_AR_CCV_OFFSET:
+			ptr = &pt->ar_ccv;
+			break;
+		case ELF_AR_UNAT_OFFSET:
+			ptr = &pt->ar_unat;
+			break;
+		case ELF_AR_FPSR_OFFSET:
+			ptr = &pt->ar_fpsr;
+			break;
+		case ELF_AR_PFS_OFFSET:
+			ptr = &pt->ar_pfs;
+			break;
+		case ELF_AR_LC_OFFSET:
+			return unw_access_ar(info, UNW_AR_LC, data,
+					     write_access);
+		case ELF_AR_EC_OFFSET:
+			return unw_access_ar(info, UNW_AR_EC, data,
+					     write_access);
+		case ELF_AR_CSD_OFFSET:
+			ptr = &pt->ar_csd;
+			break;
+		case ELF_AR_SSD_OFFSET:
+			ptr = &pt->ar_ssd;
+		}
+	} else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) {
+		switch (addr) {
+		case ELF_CR_IIP_OFFSET:
+			ptr = &pt->cr_iip;
+			break;
+		case ELF_CFM_OFFSET:
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (((cfm ^ *data) & PFM_MASK) != 0) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
+						      | (*data & PFM_MASK));
+				}
+			} else
+				*data = cfm;
+			return 0;
+		case ELF_CR_IPSR_OFFSET:
+			if (write_access) {
+				unsigned long tmp = *data;
+				/* psr.ri==3 is a reserved value: SDM 2:25 */
+				if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
+					tmp &= ~IA64_PSR_RI;
+				pt->cr_ipsr = ((tmp & IPSR_MASK)
+					       | (pt->cr_ipsr & ~IPSR_MASK));
+			} else
+				*data = (pt->cr_ipsr & IPSR_MASK);
+			return 0;
+		}
+	} else if (addr == ELF_NAT_OFFSET)
+		return access_nat_bits(target, pt, info,
+				       data, write_access);
+	else if (addr == ELF_PR_OFFSET)
+		ptr = &pt->pr;
+	else
+		return -1;
+
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+
+	return 0;
+}
+
+static int
+access_elf_reg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15))
+		return access_elf_gpreg(target, info, addr, data, write_access);
+	else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7))
+		return access_elf_breg(target, info, addr, data, write_access);
+	else
+		return access_elf_areg(target, info, addr, data, write_access);
+}
+
+void do_gpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index, min_copy;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/*
+	 * coredump format:
+	 *      r0-r31
+	 *      NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+	 *      predicate registers (p0-p63)
+	 *      b0-b7
+	 *      ip cfm user-mask
+	 *      ar.rsc ar.bsp ar.bspstore ar.rnat
+	 *      ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+	 */
+
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1 - gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_GR_OFFSET(16);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* r16-r31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_CR_IIP_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+	 */
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_AR_END_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+	}
+}
+
+void do_gpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1-gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* gr16-gr31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		i = dst->pos;
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret)
+			return;
+		for (; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+	 */
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+	}
+}
+
+#define ELF_FP_OFFSET(i)	(i * sizeof(elf_fpreg_t))
+
+void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	elf_fpreg_t tmp[30];
+	int index, min_copy, i;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t);
+
+		min_copy = min(((unsigned int)ELF_FP_OFFSET(32)),
+				dst->pos + dst->count);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t),
+				index++)
+			if (unw_get_fr(info, i / sizeof(elf_fpreg_t),
+					 &tmp[index])) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fph */
+	if (dst->count > 0) {
+		ia64_flush_fph(dst->target);
+		if (task->thread.flags & IA64_THREAD_FPH_VALID)
+			dst->ret = user_regset_copyout(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				&dst->target->thread.fph,
+				ELF_FP_OFFSET(32), -1);
+		else
+			/* Zero fill instead.  */
+			dst->ret = user_regset_copyout_zero(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				ELF_FP_OFFSET(32), -1);
+	}
+}
+
+void do_fpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	elf_fpreg_t fpreg, tmp[30];
+	int index, start, end;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		start = dst->pos;
+		end = min(((unsigned int)ELF_FP_OFFSET(32)),
+			 dst->pos + dst->count);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->ret)
+			return;
+
+		if (start & 0xF) { /* only write high part */
+			if (unw_get_fr(info, start / sizeof(elf_fpreg_t),
+					 &fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0]
+				= fpreg.u.bits[0];
+			start &= ~0xFUL;
+		}
+		if (end & 0xF) { /* only write low part */
+			if (unw_get_fr(info, end / sizeof(elf_fpreg_t),
+					&fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1]
+				= fpreg.u.bits[1];
+			end = (end + 0xF) & ~0xFUL;
+		}
+
+		for ( ;	start < end ; start += sizeof(elf_fpreg_t)) {
+			index = start / sizeof(elf_fpreg_t);
+			if (unw_set_fr(info, index, tmp[index - 2])) {
+				dst->ret = -EIO;
+				return;
+			}
+		}
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* fph */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) {
+		ia64_sync_fph(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+						&dst->u.set.kbuf,
+						&dst->u.set.ubuf,
+						&dst->target->thread.fph,
+						ELF_FP_OFFSET(32), -1);
+	}
+}
+
+static int
+do_regset_call(void (*call)(struct unw_frame_info *, void *),
+	       struct task_struct *target,
+	       const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf)
+{
+	struct regset_getset info = { .target = target, .regset = regset,
+				 .pos = pos, .count = count,
+				 .u.set = { .kbuf = kbuf, .ubuf = ubuf },
+				 .ret = 0 };
+
+	if (target == current)
+		unw_init_running(call, &info);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, target);
+		(*call)(&ufi, &info);
+	}
+
+	return info.ret;
+}
+
+static int
+gpregs_get(struct task_struct *target,
+	   const struct user_regset *regset,
+	   unsigned int pos, unsigned int count,
+	   void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int gpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static void do_gpregs_writeback(struct unw_frame_info *info, void *arg)
+{
+	do_sync_rbs(info, ia64_sync_user_rbs);
+}
+
+/*
+ * This is called to write back the register backing store.
+ * ptrace does this before it stops, so that a tracer reading the user
+ * memory after the thread stops will get the current register data.
+ */
+static int
+gpregs_writeback(struct task_struct *target,
+		 const struct user_regset *regset,
+		 int now)
+{
+	if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE))
+		return 0;
+	tsk_set_notify_resume(target);
+	return do_regset_call(do_gpregs_writeback, target, regset, 0, 0,
+		NULL, NULL);
+}
+
+static int
+fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+	return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32;
+}
+
+static int fpregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int fpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int
+access_uarea(struct task_struct *child, unsigned long addr,
+	      unsigned long *data, int write_access)
+{
+	unsigned int pos = -1; /* an invalid value */
+	int ret;
+	unsigned long *ptr, regnum;
+
+	if ((addr & 0x7) != 0) {
+		dprintk("ptrace: unaligned register address 0x%lx\n", addr);
+		return -1;
+	}
+	if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) ||
+		(addr >= PT_R7 + 8 && addr < PT_B1) ||
+		(addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) ||
+		(addr >= PT_AR_SSD + 8 && addr < PT_DBR)) {
+		dprintk("ptrace: rejecting access to register "
+					"address 0x%lx\n", addr);
+		return -1;
+	}
+
+	switch (addr) {
+	case PT_F32 ... (PT_F127 + 15):
+		pos = addr - PT_F32 + ELF_FP_OFFSET(32);
+		break;
+	case PT_F2 ... (PT_F5 + 15):
+		pos = addr - PT_F2 + ELF_FP_OFFSET(2);
+		break;
+	case PT_F10 ... (PT_F31 + 15):
+		pos = addr - PT_F10 + ELF_FP_OFFSET(10);
+		break;
+	case PT_F6 ... (PT_F9 + 15):
+		pos = addr - PT_F6 + ELF_FP_OFFSET(6);
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = fpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = fpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
+	}
+
+	switch (addr) {
+	case PT_NAT_BITS:
+		pos = ELF_NAT_OFFSET;
+		break;
+	case PT_R4 ... PT_R7:
+		pos = addr - PT_R4 + ELF_GR_OFFSET(4);
+		break;
+	case PT_B1 ... PT_B5:
+		pos = addr - PT_B1 + ELF_BR_OFFSET(1);
+		break;
+	case PT_AR_EC:
+		pos = ELF_AR_EC_OFFSET;
+		break;
+	case PT_AR_LC:
+		pos = ELF_AR_LC_OFFSET;
+		break;
+	case PT_CR_IPSR:
+		pos = ELF_CR_IPSR_OFFSET;
+		break;
+	case PT_CR_IIP:
+		pos = ELF_CR_IIP_OFFSET;
+		break;
+	case PT_CFM:
+		pos = ELF_CFM_OFFSET;
+		break;
+	case PT_AR_UNAT:
+		pos = ELF_AR_UNAT_OFFSET;
+		break;
+	case PT_AR_PFS:
+		pos = ELF_AR_PFS_OFFSET;
+		break;
+	case PT_AR_RSC:
+		pos = ELF_AR_RSC_OFFSET;
+		break;
+	case PT_AR_RNAT:
+		pos = ELF_AR_RNAT_OFFSET;
+		break;
+	case PT_AR_BSPSTORE:
+		pos = ELF_AR_BSPSTORE_OFFSET;
+		break;
+	case PT_PR:
+		pos = ELF_PR_OFFSET;
+		break;
+	case PT_B6:
+		pos = ELF_BR_OFFSET(6);
+		break;
+	case PT_AR_BSP:
+		pos = ELF_AR_BSP_OFFSET;
+		break;
+	case PT_R1 ... PT_R3:
+		pos = addr - PT_R1 + ELF_GR_OFFSET(1);
+		break;
+	case PT_R12 ... PT_R15:
+		pos = addr - PT_R12 + ELF_GR_OFFSET(12);
+		break;
+	case PT_R8 ... PT_R11:
+		pos = addr - PT_R8 + ELF_GR_OFFSET(8);
+		break;
+	case PT_R16 ... PT_R31:
+		pos = addr - PT_R16 + ELF_GR_OFFSET(16);
+		break;
+	case PT_AR_CCV:
+		pos = ELF_AR_CCV_OFFSET;
+		break;
+	case PT_AR_FPSR:
+		pos = ELF_AR_FPSR_OFFSET;
+		break;
+	case PT_B0:
+		pos = ELF_BR_OFFSET(0);
+		break;
+	case PT_B7:
+		pos = ELF_BR_OFFSET(7);
+		break;
+	case PT_AR_CSD:
+		pos = ELF_AR_CSD_OFFSET;
+		break;
+	case PT_AR_SSD:
+		pos = ELF_AR_SSD_OFFSET;
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = gpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = gpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
+	}
+
+	/* access debug registers */
+	if (addr >= PT_IBR) {
+		regnum = (addr - PT_IBR) >> 3;
+		ptr = &child->thread.ibr[0];
+	} else {
+		regnum = (addr - PT_DBR) >> 3;
+		ptr = &child->thread.dbr[0];
+	}
+
+	if (regnum >= 8) {
+		dprintk("ptrace: rejecting access to register "
+				"address 0x%lx\n", addr);
+		return -1;
+	}
+#ifdef CONFIG_PERFMON
+	/*
+	 * Check if debug registers are used by perfmon. This
+	 * test must be done once we know that we can do the
+	 * operation, i.e. the arguments are all valid, but
+	 * before we start modifying the state.
+	 *
+	 * Perfmon needs to keep a count of how many processes
+	 * are trying to modify the debug registers for system
+	 * wide monitoring sessions.
+	 *
+	 * We also include read access here, because they may
+	 * cause the PMU-installed debug register state
+	 * (dbr[], ibr[]) to be reset. The two arrays are also
+	 * used by perfmon, but we do not use
+	 * IA64_THREAD_DBG_VALID. The registers are restored
+	 * by the PMU context switch code.
+	 */
+	if (pfm_use_debug_registers(child))
+		return -1;
+#endif
+
+	if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+		child->thread.flags |= IA64_THREAD_DBG_VALID;
+		memset(child->thread.dbr, 0,
+				sizeof(child->thread.dbr));
+		memset(child->thread.ibr, 0,
+				sizeof(child->thread.ibr));
+	}
+
+	ptr += regnum;
+
+	if ((regnum & 1) && write_access) {
+		/* don't let the user set kernel-level breakpoints: */
+		*ptr = *data & ~(7UL << 56);
+		return 0;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static const struct user_regset native_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t),
+		.get = gpregs_get, .set = gpregs_set,
+		.writeback = gpregs_writeback
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = ELF_NFPREG,
+		.size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t),
+		.get = fpregs_get, .set = fpregs_set, .active = fpregs_active
+	},
+};
+
+static const struct user_regset_view user_ia64_view = {
+	.name = "ia64",
+	.e_machine = EM_IA_64,
+	.regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *tsk)
+{
+#ifdef CONFIG_IA32_SUPPORT
+	extern const struct user_regset_view user_ia32_view;
+	if (IS_IA32_PROCESS(task_pt_regs(tsk)))
+		return &user_ia32_view;
+#endif
+	return &user_ia64_view;
+}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c85b7dd6ef3..5015ca1275c 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -59,6 +59,7 @@
 #include <asm/setup.h>
 #include <asm/smp.h>
 #include <asm/system.h>
+#include <asm/tlbflush.h>
 #include <asm/unistd.h>
 #include <asm/hpsim.h>
 
@@ -516,6 +517,8 @@ setup_arch (char **cmdline_p)
 	acpi_table_init();
 # ifdef CONFIG_ACPI_NUMA
 	acpi_numa_init();
+	per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
+		32 : cpus_weight(early_cpu_possible_map)), additional_cpus);
 # endif
 #else
 # ifdef CONFIG_SMP
@@ -969,9 +972,10 @@ cpu_init (void)
 #endif
 
 	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
-	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+	if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
 		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
-	else {
+		setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL);
+	} else {
 		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
 		max_ctx = (1U << 15) - 1;	/* use architected minimum */
 	}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 4e446aa5f4a..9a9d4c48933 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -213,6 +213,19 @@ send_IPI_allbutself (int op)
  * Called with preemption disabled.
  */
 static inline void
+send_IPI_mask(cpumask_t mask, int op)
+{
+	unsigned int cpu;
+
+	for_each_cpu_mask(cpu, mask) {
+			send_IPI_single(cpu, op);
+	}
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
 send_IPI_all (int op)
 {
 	int i;
@@ -401,6 +414,75 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * <mask>	The set of cpus to run on.  Must not include the current cpu.
+ * <func> 	The function to run. This must be fast and non-blocking.
+ * <info>	An arbitrary pointer to pass to the function.
+ * <wait>	If true, wait (atomically) until function
+ *		has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function_mask(cpumask_t mask,
+			   void (*func)(void *), void *info,
+			   int wait)
+{
+	struct call_data_struct data;
+	cpumask_t allbutself;
+	int cpus;
+
+	spin_lock(&call_lock);
+	allbutself = cpu_online_map;
+	cpu_clear(smp_processor_id(), allbutself);
+
+	cpus_and(mask, mask, allbutself);
+	cpus = cpus_weight(mask);
+	if (!cpus) {
+		spin_unlock(&call_lock);
+		return 0;
+	}
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
+
+	call_data = &data;
+	mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/
+
+	/* Send a message to other CPUs */
+	if (cpus_equal(mask, allbutself))
+		send_IPI_allbutself(IPI_CALL_FUNC);
+	else
+		send_IPI_mask(mask, IPI_CALL_FUNC);
+
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		cpu_relax();
+
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			cpu_relax();
+	call_data = NULL;
+
+	spin_unlock(&call_lock);
+	return 0;
+
+}
+EXPORT_SYMBOL(smp_call_function_mask);
+
 /*
  * this function sends a 'generic call function' IPI to all other CPUs
  * in the system.
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 17fda5293c6..48e15a51782 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -59,6 +59,84 @@ static struct clocksource clocksource_itc = {
 };
 static struct clocksource *itc_clocksource;
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+
+#include <linux/kernel_stat.h>
+
+extern cputime_t cycle_to_cputime(u64 cyc);
+
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
+{
+	struct thread_info *pi = task_thread_info(prev);
+	struct thread_info *ni = task_thread_info(next);
+	cputime_t delta_stime, delta_utime;
+	__u64 now;
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
+	account_system_time(prev, 0, delta_stime);
+	account_system_time_scaled(prev, delta_stime);
+
+	if (pi->ac_utime) {
+		delta_utime = cycle_to_cputime(pi->ac_utime);
+		account_user_time(prev, delta_utime);
+		account_user_time_scaled(prev, delta_utime);
+	}
+
+	pi->ac_stamp = ni->ac_stamp = now;
+	ni->ac_stime = ni->ac_utime = 0;
+}
+
+/*
+ * Account time for a transition between system, hard irq or soft irq state.
+ * Note that this function is called with interrupts enabled.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	unsigned long flags;
+	cputime_t delta_stime;
+	__u64 now;
+
+	local_irq_save(flags);
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
+	account_system_time(tsk, 0, delta_stime);
+	account_system_time_scaled(tsk, delta_stime);
+	ti->ac_stime = 0;
+
+	ti->ac_stamp = now;
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Called from the timer interrupt handler to charge accumulated user time
+ * to the current process.  Must be called with interrupts disabled.
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+	struct thread_info *ti = task_thread_info(p);
+	cputime_t delta_utime;
+
+	if (ti->ac_utime) {
+		delta_utime = cycle_to_cputime(ti->ac_utime);
+		account_user_time(p, delta_utime);
+		account_user_time_scaled(p, delta_utime);
+		ti->ac_utime = 0;
+	}
+}
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id)
 {
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 96d5fbfa44a..544dc420c65 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -104,7 +104,7 @@ static int __meminit early_nr_cpus_node(int node)
 {
 	int cpu, n = 0;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
+	for_each_possible_early_cpu(cpu)
 		if (node == node_cpuid[cpu].nid)
 			n++;
 
@@ -124,6 +124,7 @@ static unsigned long __meminit compute_pernodesize(int node)
 	pernodesize += node * L1_CACHE_BYTES;
 	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 	pernodesize = PAGE_ALIGN(pernodesize);
 	return pernodesize;
 }
@@ -142,7 +143,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
 #ifdef CONFIG_SMP
 	int cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_early_cpu(cpu) {
 		if (node == node_cpuid[cpu].nid) {
 			memcpy(__va(cpu_data), __phys_per_cpu_start,
 			       __per_cpu_end - __per_cpu_start);
@@ -345,7 +346,7 @@ static void __init initialize_pernode_data(void)
 
 #ifdef CONFIG_SMP
 	/* Set the node_data pointer for each per-cpu struct */
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_early_cpu(cpu) {
 		node = node_cpuid[cpu].nid;
 		per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
 	}
@@ -493,13 +494,9 @@ void __cpuinit *per_cpu_init(void)
 	int cpu;
 	static int first_time = 1;
 
-
-	if (smp_processor_id() != 0)
-		return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
-
 	if (first_time) {
 		first_time = 0;
-		for (cpu = 0; cpu < NR_CPUS; cpu++)
+		for_each_possible_early_cpu(cpu)
 			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
 	}
 
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index 7807fc5c042..b73bf1838e5 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -27,7 +27,9 @@
  */
 int num_node_memblks;
 struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
-struct node_cpuid_s node_cpuid[NR_CPUS];
+struct node_cpuid_s node_cpuid[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } };
+
 /*
  * This is a matrix with "distances" between nodes, they should be
  * proportional to the memory access latency ratios.
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 655da240d13..d52ec4e8340 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -11,6 +11,9 @@
  * Rohit Seth <rohit.seth@intel.com>
  * Ken Chen <kenneth.w.chen@intel.com>
  * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation
+ * Copyright (C) 2007 Intel Corp
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *	Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
  */
 #include <linux/module.h>
 #include <linux/init.h>
@@ -26,6 +29,9 @@
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
 #include <asm/dma.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlb.h>
 
 static struct {
 	unsigned long mask;	/* mask of supported purge page-sizes */
@@ -39,6 +45,10 @@ struct ia64_ctx ia64_ctx = {
 };
 
 DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
+DEFINE_PER_CPU(u8, ia64_tr_num);  /*Number of TR slots in current processor*/
+DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
+
+struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX];
 
 /*
  * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
@@ -84,14 +94,140 @@ wrap_mmu_context (struct mm_struct *mm)
 	local_flush_tlb_all();
 }
 
+/*
+ * Implement "spinaphores" ... like counting semaphores, but they
+ * spin instead of sleeping.  If there are ever any other users for
+ * this primitive it can be moved up to a spinaphore.h header.
+ */
+struct spinaphore {
+	atomic_t	cur;
+};
+
+static inline void spinaphore_init(struct spinaphore *ss, int val)
+{
+	atomic_set(&ss->cur, val);
+}
+
+static inline void down_spin(struct spinaphore *ss)
+{
+	while (unlikely(!atomic_add_unless(&ss->cur, -1, 0)))
+		while (atomic_read(&ss->cur) == 0)
+			cpu_relax();
+}
+
+static inline void up_spin(struct spinaphore *ss)
+{
+	atomic_add(1, &ss->cur);
+}
+
+static struct spinaphore ptcg_sem;
+static u16 nptcg = 1;
+static int need_ptcg_sem = 1;
+static int toolatetochangeptcgsem = 0;
+
+/*
+ * Kernel parameter "nptcg=" overrides max number of concurrent global TLB
+ * purges which is reported from either PAL or SAL PALO.
+ *
+ * We don't have sanity checking for nptcg value. It's the user's responsibility
+ * for valid nptcg value on the platform. Otherwise, kernel may hang in some
+ * cases.
+ */
+static int __init
+set_nptcg(char *str)
+{
+	int value = 0;
+
+	get_option(&str, &value);
+	setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER);
+
+	return 1;
+}
+
+__setup("nptcg=", set_nptcg);
+
+/*
+ * Maximum number of simultaneous ptc.g purges in the system can
+ * be defined by PAL_VM_SUMMARY (in which case we should take
+ * the smallest value for any cpu in the system) or by the PAL
+ * override table (in which case we should ignore the value from
+ * PAL_VM_SUMMARY).
+ *
+ * Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g
+ * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case,
+ * we should ignore the value from either PAL_VM_SUMMARY or PAL override table.
+ *
+ * Complicating the logic here is the fact that num_possible_cpus()
+ * isn't fully setup until we start bringing cpus online.
+ */
+void
+setup_ptcg_sem(int max_purges, int nptcg_from)
+{
+	static int kp_override;
+	static int palo_override;
+	static int firstcpu = 1;
+
+	if (toolatetochangeptcgsem) {
+		BUG_ON(max_purges < nptcg);
+		return;
+	}
+
+	if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) {
+		kp_override = 1;
+		nptcg = max_purges;
+		goto resetsema;
+	}
+	if (kp_override) {
+		need_ptcg_sem = num_possible_cpus() > nptcg;
+		return;
+	}
+
+	if (nptcg_from == NPTCG_FROM_PALO) {
+		palo_override = 1;
+
+		/* In PALO max_purges == 0 really means it! */
+		if (max_purges == 0)
+			panic("Whoa! Platform does not support global TLB purges.\n");
+		nptcg = max_purges;
+		if (nptcg == PALO_MAX_TLB_PURGES) {
+			need_ptcg_sem = 0;
+			return;
+		}
+		goto resetsema;
+	}
+	if (palo_override) {
+		if (nptcg != PALO_MAX_TLB_PURGES)
+			need_ptcg_sem = (num_possible_cpus() > nptcg);
+		return;
+	}
+
+	/* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
+	if (max_purges == 0) max_purges = 1;
+
+	if (firstcpu) {
+		nptcg = max_purges;
+		firstcpu = 0;
+	}
+	if (max_purges < nptcg)
+		nptcg = max_purges;
+	if (nptcg == PAL_MAX_PURGES) {
+		need_ptcg_sem = 0;
+		return;
+	} else
+		need_ptcg_sem = (num_possible_cpus() > nptcg);
+
+resetsema:
+	spinaphore_init(&ptcg_sem, max_purges);
+}
+
 void
 ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
 		       unsigned long end, unsigned long nbits)
 {
-	static DEFINE_SPINLOCK(ptcg_lock);
-
 	struct mm_struct *active_mm = current->active_mm;
 
+	toolatetochangeptcgsem = 1;
+
 	if (mm != active_mm) {
 		/* Restore region IDs for mm */
 		if (mm && active_mm) {
@@ -102,19 +238,20 @@ ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
 		}
 	}
 
-	/* HW requires global serialization of ptc.ga.  */
-	spin_lock(&ptcg_lock);
-	{
-		do {
-			/*
-			 * Flush ALAT entries also.
-			 */
-			ia64_ptcga(start, (nbits<<2));
-			ia64_srlz_i();
-			start += (1UL << nbits);
-		} while (start < end);
-	}
-	spin_unlock(&ptcg_lock);
+	if (need_ptcg_sem)
+		down_spin(&ptcg_sem);
+
+	do {
+		/*
+		 * Flush ALAT entries also.
+		 */
+		ia64_ptcga(start, (nbits << 2));
+		ia64_srlz_i();
+		start += (1UL << nbits);
+	} while (start < end);
+
+	if (need_ptcg_sem)
+		up_spin(&ptcg_sem);
 
         if (mm != active_mm) {
                 activate_context(active_mm);
@@ -190,6 +327,9 @@ ia64_tlb_init (void)
 	ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
 	unsigned long tr_pgbits;
 	long status;
+	pal_vm_info_1_u_t vm_info_1;
+	pal_vm_info_2_u_t vm_info_2;
+	int cpu = smp_processor_id();
 
 	if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
 		printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; "
@@ -206,4 +346,191 @@ ia64_tlb_init (void)
 	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
 
 	local_flush_tlb_all();	/* nuke left overs from bootstrapping... */
+	status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2);
+
+	if (status) {
+		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
+		per_cpu(ia64_tr_num, cpu) = 8;
+		return;
+	}
+	per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) >
+				(vm_info_1.pal_vm_info_1_s.max_dtr_entry+1))
+		per_cpu(ia64_tr_num, cpu) =
+				vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
+		per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
+		printk(KERN_DEBUG "TR register number exceeds IA64_TR_ALLOC_MAX!"
+			"IA64_TR_ALLOC_MAX should be extended\n");
+	}
+}
+
+/*
+ * is_tr_overlap
+ *
+ * Check overlap with inserted TRs.
+ */
+static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size)
+{
+	u64 tr_log_size;
+	u64 tr_end;
+	u64 va_rr = ia64_get_rr(va);
+	u64 va_rid = RR_TO_RID(va_rr);
+	u64 va_end = va + (1<<log_size) - 1;
+
+	if (va_rid != RR_TO_RID(p->rr))
+		return 0;
+	tr_log_size = (p->itir & 0xff) >> 2;
+	tr_end = p->ifa + (1<<tr_log_size) - 1;
+
+	if (va > tr_end || p->ifa > va_end)
+		return 0;
+	return 1;
+
+}
+
+/*
+ * ia64_insert_tr in virtual mode. Allocate a TR slot
+ *
+ * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr
+ *
+ * va 	: virtual address.
+ * pte 	: pte entries inserted.
+ * log_size: range to be covered.
+ *
+ * Return value:  <0 :  error No.
+ *
+ *		  >=0 : slot number allocated for TR.
+ * Must be called with preemption disabled.
+ */
+int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
+{
+	int i, r;
+	unsigned long psr;
+	struct ia64_tr_entry *p;
+	int cpu = smp_processor_id();
+
+	r = -EINVAL;
+	/*Check overlap with existing TR entries*/
+	if (target_mask & 0x1) {
+		p = &__per_cpu_idtrs[cpu][0][0];
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+			}
+		}
+	}
+	if (target_mask & 0x2) {
+		p = &__per_cpu_idtrs[cpu][1][0];
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+				}
+		}
+	}
+
+	for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
+		switch (target_mask & 0x3) {
+		case 1:
+			if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1))
+				goto found;
+			continue;
+		case 2:
+			if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+				goto found;
+			continue;
+		case 3:
+			if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) &&
+				!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+				goto found;
+			continue;
+		default:
+			r = -EINVAL;
+			goto out;
+		}
+	}
+found:
+	if (i >= per_cpu(ia64_tr_num, cpu))
+		return -EBUSY;
+
+	/*Record tr info for mca hander use!*/
+	if (i > per_cpu(ia64_tr_used, cpu))
+		per_cpu(ia64_tr_used, cpu) = i;
+
+	psr = ia64_clear_ic();
+	if (target_mask & 0x1) {
+		ia64_itr(0x1, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = &__per_cpu_idtrs[cpu][0][i];
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	if (target_mask & 0x2) {
+		ia64_itr(0x2, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = &__per_cpu_idtrs[cpu][1][i];
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	ia64_set_psr(psr);
+	r = i;
+out:
+	return r;
+}
+EXPORT_SYMBOL_GPL(ia64_itr_entry);
+
+/*
+ * ia64_purge_tr
+ *
+ * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr.
+ * slot: slot number to be freed.
+ *
+ * Must be called with preemption disabled.
+ */
+void ia64_ptr_entry(u64 target_mask, int slot)
+{
+	int cpu = smp_processor_id();
+	int i;
+	struct ia64_tr_entry *p;
+
+	if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu))
+		return;
+
+	if (target_mask & 0x1) {
+		p = &__per_cpu_idtrs[cpu][0][slot];
+		if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x1, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	if (target_mask & 0x2) {
+		p = &__per_cpu_idtrs[cpu][1][slot];
+		if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x2, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
+		if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) ||
+				(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+			break;
+	}
+	per_cpu(ia64_tr_used, cpu) = i;
 }
+EXPORT_SYMBOL_GPL(ia64_ptr_entry);
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index e282c348dcd..53d0a8ee35d 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -362,7 +362,12 @@ pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
 	info.name = name;
 	acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window,
 			&info);
-
+	/*
+	 * See arch/x86/pci/acpi.c.
+	 * The desired pci bus might already be scanned in a quirk. We
+	 * should handle the case here, but it appears that IA64 hasn't
+	 * such quirk. So we just ignore the case now.
+	 */
 	pbus = pci_scan_bus_parented(NULL, bus, &pci_root_ops, controller);
 	if (pbus)
 		pcibios_setup_root_windows(pbus, controller);