From a7db50405216610c8a0d62b8b400180b6f366733 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 22 Jun 2009 08:08:07 -0600 Subject: PCI: remove pcibios_scan_all_fns() This was #define'd as 0 on all platforms, so let's get rid of it. This change makes pci_scan_slot() slightly easier to read. Cc: Yoshinori Sato Cc: Tony Luck Cc: David Howells Cc: "David S. Miller" Cc: Jeff Dike Cc: Ingo Molnar Cc: Ivan Kokshaysky Reviewed-by: Matthew Wilcox Acked-by: Russell King Acked-by: Ralf Baechle Acked-by: Kyle McMartin Acked-by: Benjamin Herrenschmidt Acked-by: Paul Mundt Acked-by: Arnd Bergmann Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- arch/mips/include/asm/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index a68d111e55e..5ebf82572ec 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -65,8 +65,6 @@ extern int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin); extern unsigned int pcibios_assign_all_busses(void); -#define pcibios_scan_all_fns(a, b) 0 - extern unsigned long PCIBIOS_MIN_IO; extern unsigned long PCIBIOS_MIN_MEM; -- cgit v1.2.3-70-g09d2 From c88d5910890ad35af283344417891344604f0438 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Sep 2009 13:50:02 +0200 Subject: sched: Merge select_task_rq_fair() and sched_balance_self() The problem with wake_idle() is that is doesn't respect things like cpu_power, which means it doesn't deal well with SMT nor the recent RT interaction. To cure this, it needs to do what sched_balance_self() does, which leads to the possibility of merging select_task_rq_fair() and sched_balance_self(). Modify sched_balance_self() to: - update_shares() when walking up the domain tree, (it only called it for the top domain, but it should have done this anyway), which allows us to remove this ugly bit from try_to_wake_up(). - do wake_affine() on the smallest domain that contains both this (the waking) and the prev (the wakee) cpu for WAKE invocations. Then use the top-down balance steps it had to replace wake_idle(). This leads to the dissapearance of SD_WAKE_BALANCE and SD_WAKE_IDLE_FAR, with SD_WAKE_IDLE replaced with SD_BALANCE_WAKE. SD_WAKE_AFFINE needs SD_BALANCE_WAKE to be effective. Touch all topology bits to replace the old with new SD flags -- platforms might need re-tuning, enabling SD_BALANCE_WAKE conditionally on a NUMA distance seems like a good additional feature, magny-core and small nehalem systems would want this enabled, systems with slow interconnects would not. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 5 +- arch/mips/include/asm/mach-ip27/topology.h | 2 +- arch/powerpc/include/asm/topology.h | 5 +- arch/sh/include/asm/topology.h | 4 +- arch/sparc/include/asm/topology_64.h | 4 +- arch/x86/include/asm/topology.h | 4 +- include/linux/sched.h | 7 +- include/linux/topology.h | 16 +- kernel/sched.c | 41 +---- kernel/sched_fair.c | 233 ++++++++--------------------- 10 files changed, 84 insertions(+), 237 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 7b4c8c70b2d..cf6053b226c 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -67,6 +67,7 @@ void build_cpu_to_node_map(void); .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ + | SD_BALANCE_WAKE \ | SD_WAKE_AFFINE, \ .last_balance = jiffies, \ .balance_interval = 1, \ @@ -91,8 +92,8 @@ void build_cpu_to_node_map(void); .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_BALANCE_WAKE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 64, \ .nr_balance_failed = 0, \ diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 07547231e07..d8332398f5b 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -48,7 +48,7 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; .cache_nice_tries = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ - | SD_WAKE_BALANCE, \ + | SD_BALANCE_WAKE, \ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 054a16d6808..c6343313ff5 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -62,9 +62,8 @@ static inline int pcibus_to_node(struct pci_bus *bus) .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_NEWIDLE \ - | SD_WAKE_IDLE \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_BALANCE_WAKE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index b69ee850906..dc1531e2f25 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -21,8 +21,8 @@ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_BALANCE_WAKE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index e5ea8d33242..1d091abd2d1 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -57,8 +57,8 @@ static inline int pcibus_to_node(struct pci_bus *pbus) .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_BALANCE_WAKE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ } diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 26d06e052a1..966d58dc627 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -145,14 +145,12 @@ extern unsigned long node_remap_size[]; | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 0*SD_WAKE_IDLE \ + | 1*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 1*SD_WAKE_BALANCE \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_SHARE_PKG_RESOURCES \ | 1*SD_SERIALIZE \ - | 1*SD_WAKE_IDLE_FAR \ | 0*SD_PREFER_SIBLING \ , \ .last_balance = jiffies, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3b0ca66bd6c..c30bf3d516d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -803,16 +803,15 @@ enum cpu_idle_type { #define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ #define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ -#define SD_WAKE_IDLE 0x0010 /* Wake to idle CPU on task wakeup */ +#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ -#define SD_WAKE_BALANCE 0x0040 /* Perform balancing at task wakeup */ + #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ #define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */ #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ -#define SD_WAKE_IDLE_FAR 0x0800 /* Gain latency sacrificing cache hit */ + #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ -#define SD_BALANCE_WAKE 0x2000 /* Balance on wakeup */ enum powersavings_balance_level { POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ diff --git a/include/linux/topology.h b/include/linux/topology.h index 85e8cf7d393..6a8cd15555b 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -95,14 +95,12 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 0*SD_WAKE_IDLE \ + | 1*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 1*SD_WAKE_BALANCE \ | 1*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ - | 0*SD_WAKE_IDLE_FAR \ | 0*SD_PREFER_SIBLING \ , \ .last_balance = jiffies, \ @@ -129,13 +127,11 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 1*SD_WAKE_IDLE \ + | 1*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 1*SD_WAKE_BALANCE \ | 0*SD_SHARE_CPUPOWER \ | 1*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ - | 0*SD_WAKE_IDLE_FAR \ | sd_balance_for_mc_power() \ | sd_power_saving_flags() \ , \ @@ -163,13 +159,11 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 1*SD_WAKE_IDLE \ + | 1*SD_BALANCE_WAKE \ | 0*SD_WAKE_AFFINE \ - | 1*SD_WAKE_BALANCE \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ - | 0*SD_WAKE_IDLE_FAR \ | sd_balance_for_package_power() \ | sd_power_saving_flags() \ , \ @@ -191,14 +185,12 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_NEWIDLE \ | 0*SD_BALANCE_EXEC \ | 0*SD_BALANCE_FORK \ - | 0*SD_WAKE_IDLE \ + | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 0*SD_WAKE_BALANCE \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_SHARE_PKG_RESOURCES \ | 1*SD_SERIALIZE \ - | 1*SD_WAKE_IDLE_FAR \ | 0*SD_PREFER_SIBLING \ , \ .last_balance = jiffies, \ diff --git a/kernel/sched.c b/kernel/sched.c index fc6fda881d2..6c819f338b1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -512,14 +512,6 @@ struct root_domain { #ifdef CONFIG_SMP struct cpupri cpupri; #endif -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) - /* - * Preferred wake up cpu nominated by sched_mc balance that will be - * used when most cpus are idle in the system indicating overall very - * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2) - */ - unsigned int sched_mc_preferred_wakeup_cpu; -#endif }; /* @@ -2315,22 +2307,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) if (!sched_feat(SYNC_WAKEUPS)) sync = 0; -#ifdef CONFIG_SMP - if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) { - struct sched_domain *sd; - - this_cpu = raw_smp_processor_id(); - cpu = task_cpu(p); - - for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { - update_shares(sd); - break; - } - } - } -#endif - this_cpu = get_cpu(); smp_wmb(); @@ -3533,11 +3509,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, *imbalance = sds->min_load_per_task; sds->busiest = sds->group_min; - if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { - cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = - group_first_cpu(sds->group_leader); - } - return 1; } @@ -7850,9 +7821,7 @@ static int sd_degenerate(struct sched_domain *sd) } /* Following flags don't use groups */ - if (sd->flags & (SD_WAKE_IDLE | - SD_WAKE_AFFINE | - SD_WAKE_BALANCE)) + if (sd->flags & (SD_WAKE_AFFINE)) return 0; return 1; @@ -7869,10 +7838,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) return 0; - /* Does parent contain flags not in child? */ - /* WAKE_BALANCE is a subset of WAKE_AFFINE */ - if (cflags & SD_WAKE_AFFINE) - pflags &= ~SD_WAKE_BALANCE; /* Flags needing groups don't count if only 1 group in parent */ if (parent->groups == parent->groups->next) { pflags &= ~(SD_LOAD_BALANCE | @@ -8558,10 +8523,10 @@ static void set_domain_attribute(struct sched_domain *sd, request = attr->relax_domain_level; if (request < sd->level) { /* turn off idle balance on this domain */ - sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE); + sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); } else { /* turn on idle balance on this domain */ - sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE); + sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); } } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f2eb5b93471..09d19f77eb3 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1062,83 +1062,6 @@ static void yield_task_fair(struct rq *rq) se->vruntime = rightmost->vruntime + 1; } -/* - * wake_idle() will wake a task on an idle cpu if task->cpu is - * not idle and an idle cpu is available. The span of cpus to - * search starts with cpus closest then further out as needed, - * so we always favor a closer, idle cpu. - * Domains may include CPUs that are not usable for migration, - * hence we need to mask them out (rq->rd->online) - * - * Returns the CPU we should wake onto. - */ -#if defined(ARCH_HAS_SCHED_WAKE_IDLE) - -#define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online) - -static int wake_idle(int cpu, struct task_struct *p) -{ - struct sched_domain *sd; - int i; - unsigned int chosen_wakeup_cpu; - int this_cpu; - struct rq *task_rq = task_rq(p); - - /* - * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu - * are idle and this is not a kernel thread and this task's affinity - * allows it to be moved to preferred cpu, then just move! - */ - - this_cpu = smp_processor_id(); - chosen_wakeup_cpu = - cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; - - if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && - idle_cpu(cpu) && idle_cpu(this_cpu) && - p->mm && !(p->flags & PF_KTHREAD) && - cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) - return chosen_wakeup_cpu; - - /* - * If it is idle, then it is the best cpu to run this task. - * - * This cpu is also the best, if it has more than one task already. - * Siblings must be also busy(in most cases) as they didn't already - * pickup the extra load from this cpu and hence we need not check - * sibling runqueue info. This will avoid the checks and cache miss - * penalities associated with that. - */ - if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1) - return cpu; - - for_each_domain(cpu, sd) { - if ((sd->flags & SD_WAKE_IDLE) - || ((sd->flags & SD_WAKE_IDLE_FAR) - && !task_hot(p, task_rq->clock, sd))) { - for_each_cpu_and(i, sched_domain_span(sd), - &p->cpus_allowed) { - if (cpu_rd_active(i, task_rq) && idle_cpu(i)) { - if (i != task_cpu(p)) { - schedstat_inc(p, - se.nr_wakeups_idle); - } - return i; - } - } - } else { - break; - } - } - return cpu; -} -#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/ -static inline int wake_idle(int cpu, struct task_struct *p) -{ - return cpu; -} -#endif - #ifdef CONFIG_SMP #ifdef CONFIG_FAIR_GROUP_SCHED @@ -1225,21 +1148,22 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, #endif -static int -wake_affine(struct sched_domain *this_sd, struct rq *this_rq, - struct task_struct *p, int prev_cpu, int this_cpu, int sync, - int idx, unsigned long load, unsigned long this_load, - unsigned int imbalance) +static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) { - struct task_struct *curr = this_rq->curr; - struct task_group *tg; - unsigned long tl = this_load; + struct task_struct *curr = current; + unsigned long this_load, load; + int idx, this_cpu, prev_cpu; unsigned long tl_per_task; + unsigned int imbalance; + struct task_group *tg; unsigned long weight; int balanced; - if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) - return 0; + idx = sd->wake_idx; + this_cpu = smp_processor_id(); + prev_cpu = task_cpu(p); + load = source_load(prev_cpu, idx); + this_load = target_load(this_cpu, idx); if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || p->se.avg_overlap > sysctl_sched_migration_cost)) @@ -1254,24 +1178,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, tg = task_group(current); weight = current->se.load.weight; - tl += effective_load(tg, this_cpu, -weight, -weight); + this_load += effective_load(tg, this_cpu, -weight, -weight); load += effective_load(tg, prev_cpu, 0, -weight); } tg = task_group(p); weight = p->se.load.weight; + imbalance = 100 + (sd->imbalance_pct - 100) / 2; + /* * In low-load situations, where prev_cpu is idle and this_cpu is idle - * due to the sync cause above having dropped tl to 0, we'll always have - * an imbalance, but there's really nothing you can do about that, so - * that's good too. + * due to the sync cause above having dropped this_load to 0, we'll + * always have an imbalance, but there's really nothing you can do + * about that, so that's good too. * * Otherwise check if either cpus are near enough in load to allow this * task to be woken on this_cpu. */ - balanced = !tl || - 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= + balanced = !this_load || + 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <= imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); /* @@ -1285,14 +1211,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, schedstat_inc(p, se.nr_wakeups_affine_attempts); tl_per_task = cpu_avg_load_per_task(this_cpu); - if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= - tl_per_task)) { + if (balanced || + (this_load <= load && + this_load + target_load(prev_cpu, idx) <= tl_per_task)) { /* * This domain has SD_WAKE_AFFINE and * p is cache cold in this domain, and * there is no bad imbalance. */ - schedstat_inc(this_sd, ttwu_move_affine); + schedstat_inc(sd, ttwu_move_affine); schedstat_inc(p, se.nr_wakeups_affine); return 1; @@ -1300,72 +1227,6 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, return 0; } -static int sched_balance_self(int cpu, int flag); - -static int select_task_rq_fair(struct task_struct *p, int flag, int sync) -{ - struct sched_domain *sd, *this_sd = NULL; - int prev_cpu, this_cpu, new_cpu; - unsigned long load, this_load; - struct rq *this_rq; - unsigned int imbalance; - int idx; - - prev_cpu = task_cpu(p); - this_cpu = smp_processor_id(); - this_rq = cpu_rq(this_cpu); - new_cpu = prev_cpu; - - if (flag != SD_BALANCE_WAKE) - return sched_balance_self(this_cpu, flag); - - /* - * 'this_sd' is the first domain that both - * this_cpu and prev_cpu are present in: - */ - for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { - this_sd = sd; - break; - } - } - - if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) - goto out; - - /* - * Check for affine wakeup and passive balancing possibilities. - */ - if (!this_sd) - goto out; - - idx = this_sd->wake_idx; - - imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; - - load = source_load(prev_cpu, idx); - this_load = target_load(this_cpu, idx); - - if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, - load, this_load, imbalance)) - return this_cpu; - - /* - * Start passive balancing when half the imbalance_pct - * limit is reached. - */ - if (this_sd->flags & SD_WAKE_BALANCE) { - if (imbalance*this_load <= 100*load) { - schedstat_inc(this_sd, ttwu_move_balance); - schedstat_inc(p, se.nr_wakeups_passive); - return this_cpu; - } - } - -out: - return wake_idle(new_cpu, p); -} - /* * find_idlest_group finds and returns the least busy CPU group within the * domain. @@ -1455,10 +1316,20 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) * * preempt must be disabled. */ -static int sched_balance_self(int cpu, int flag) +static int select_task_rq_fair(struct task_struct *p, int flag, int sync) { struct task_struct *t = current; struct sched_domain *tmp, *sd = NULL; + int cpu = smp_processor_id(); + int prev_cpu = task_cpu(p); + int new_cpu = cpu; + int want_affine = 0; + + if (flag & SD_BALANCE_WAKE) { + if (sched_feat(AFFINE_WAKEUPS)) + want_affine = 1; + new_cpu = prev_cpu; + } for_each_domain(cpu, tmp) { /* @@ -1466,16 +1337,38 @@ static int sched_balance_self(int cpu, int flag) */ if (tmp->flags & SD_POWERSAVINGS_BALANCE) break; - if (tmp->flags & flag) - sd = tmp; - } - if (sd) - update_shares(sd); + switch (flag) { + case SD_BALANCE_WAKE: + if (!sched_feat(LB_WAKEUP_UPDATE)) + break; + case SD_BALANCE_FORK: + case SD_BALANCE_EXEC: + if (root_task_group_empty()) + break; + update_shares(tmp); + default: + break; + } + + if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && + cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { + + if (wake_affine(tmp, p, sync)) + return cpu; + + want_affine = 0; + } + + if (!(tmp->flags & flag)) + continue; + + sd = tmp; + } while (sd) { struct sched_group *group; - int new_cpu, weight; + int weight; if (!(sd->flags & flag)) { sd = sd->child; @@ -1508,7 +1401,7 @@ static int sched_balance_self(int cpu, int flag) /* while loop will break here if sd == NULL */ } - return cpu; + return new_cpu; } #endif /* CONFIG_SMP */ -- cgit v1.2.3-70-g09d2 From 182a85f8a119c789610a9d464f4129ded9f3c107 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Sep 2009 13:24:49 +0200 Subject: sched: Disable wakeup balancing Sysbench thinks SD_BALANCE_WAKE is too agressive and kbuild doesn't really mind too much, SD_BALANCE_NEWIDLE picks up most of the slack. On a dual socket, quad core, dual thread nehalem system: sysbench (--num_threads=16): SD_BALANCE_WAKE-: 13982 tx/s SD_BALANCE_WAKE+: 15688 tx/s kbuild (-j16): SD_BALANCE_WAKE-: 47.648295846 seconds time elapsed ( +- 0.312% ) SD_BALANCE_WAKE+: 47.608607360 seconds time elapsed ( +- 0.026% ) (same within noise) Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 2 -- arch/mips/include/asm/mach-ip27/topology.h | 1 - arch/powerpc/include/asm/topology.h | 1 - arch/sh/include/asm/topology.h | 1 - arch/sparc/include/asm/topology_64.h | 1 - arch/x86/include/asm/topology.h | 2 +- include/linux/topology.h | 6 +++--- 7 files changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 569b9dafc78..d0141fbf51d 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -68,7 +68,6 @@ void build_cpu_to_node_map(void); | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_BALANCE_WAKE \ | SD_WAKE_AFFINE, \ .last_balance = jiffies, \ .balance_interval = 1, \ @@ -94,7 +93,6 @@ void build_cpu_to_node_map(void); | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_BALANCE_WAKE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 64, \ diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index d8332398f5b..23059170700 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -48,7 +48,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; .cache_nice_tries = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ - | SD_BALANCE_WAKE, \ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 1a2c9eb42a0..394edcbcce7 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -63,7 +63,6 @@ static inline int pcibus_to_node(struct pci_bus *bus) | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_WAKE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index a8cc564b703..f8c40cc6505 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -21,7 +21,6 @@ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ - | SD_BALANCE_WAKE \ | SD_BALANCE_NEWIDLE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 10b979d1de2..26cd25c0839 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -57,7 +57,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus) .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ - | SD_BALANCE_WAKE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 589f12383d7..6f0695d744b 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -141,7 +141,7 @@ extern unsigned long node_remap_size[]; | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 1*SD_BALANCE_WAKE \ + | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ diff --git a/include/linux/topology.h b/include/linux/topology.h index a6614b0242a..809b26c0709 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -95,7 +95,7 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 1*SD_BALANCE_WAKE \ + | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ | 1*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ @@ -127,7 +127,7 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 1*SD_BALANCE_WAKE \ + | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ | 1*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ @@ -160,7 +160,7 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 1*SD_BALANCE_WAKE \ + | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ | 1*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ -- cgit v1.2.3-70-g09d2 From e3bf887d73309808d47c74f2f024d2497c8f7048 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 28 Aug 2009 12:29:58 +0100 Subject: MIPS: Fix potencial build error in will break if HZ isn't defined. In 2.6.26 and later we're usually lucky ... Signed-off-by: Ralf Baechle --- arch/mips/include/asm/delay.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/delay.h b/arch/mips/include/asm/delay.h index d2d8949be6b..e7cd78277c2 100644 --- a/arch/mips/include/asm/delay.h +++ b/arch/mips/include/asm/delay.h @@ -11,6 +11,8 @@ #ifndef _ASM_DELAY_H #define _ASM_DELAY_H +#include + extern void __delay(unsigned int loops); extern void __ndelay(unsigned int ns); extern void __udelay(unsigned int us); -- cgit v1.2.3-70-g09d2 From 2608441164fde82b7fc0dd050ca0c46be0df23f8 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 20 Aug 2009 12:35:53 -0700 Subject: MIPS: Octeon: Check all CCAs in cvmx_write_csr. The current code only checks CCA of 0 when deciding if a dummy read is needed. Since the kernel can (and does) use other CCAs we need to mask out the CCA bits from the address. Since the address constant now fits in 16 bits, there is an added benefit that smaller code is generated. Signed-off-by: David Daney Signed-off-by: Ralf Baechle --- arch/mips/include/asm/octeon/cvmx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h index e31e3fe14f8..9d9381e2e3d 100644 --- a/arch/mips/include/asm/octeon/cvmx.h +++ b/arch/mips/include/asm/octeon/cvmx.h @@ -271,7 +271,7 @@ static inline void cvmx_write_csr(uint64_t csr_addr, uint64_t val) * what RSL read we do, so we choose CVMX_MIO_BOOT_BIST_STAT * because it is fast and harmless. */ - if ((csr_addr >> 40) == (0x800118)) + if (((csr_addr >> 40) & 0x7ffff) == (0x118)) cvmx_read64(CVMX_MIO_BOOT_BIST_STAT); } -- cgit v1.2.3-70-g09d2 From 6de4c6f9c8ead69d2f423ea80a384ef98bb4b3f8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 12 Aug 2009 23:59:27 +0400 Subject: MIPS: Lasat: Fix compilation Header needed for current_cpu_data which expands to smp_processor_id(). However, linux/smp.h can't be included into asm/cpu-info.h due to horrible circular dependencies, so plug it here. Signed-off-by: Alexey Dobriyan Signed-off-by: Ralf Baechle --- arch/mips/include/asm/lasat/lasat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/lasat/lasat.h b/arch/mips/include/asm/lasat/lasat.h index caeba1e302a..a1ada1c27c1 100644 --- a/arch/mips/include/asm/lasat/lasat.h +++ b/arch/mips/include/asm/lasat/lasat.h @@ -227,6 +227,7 @@ extern void lasat_write_eeprom_info(void); * It is used for the bit-banging rtc and eeprom drivers */ #include +#include /* calculating with the slowest board with 100 MHz clock */ #define LASAT_100_DIVIDER 20 -- cgit v1.2.3-70-g09d2 From 75f453164178a1749297fc466300bf3e34ef8103 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Fri, 28 Aug 2009 11:26:58 +0200 Subject: MIPS: Alchemy: add gpio_request/gpio_free stubs for CONFIG_GPIOLIB=n Some drivers use gpio_request/gpio_free regardless of whether gpiolib is actually built; add stubs to work around the ensuing compile failures. Signed-off-by: Manuel Lauss Tested-by: Florian Fainelli Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-au1x00/gpio-au1000.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h index 127d4ed9f07..feea00148b5 100644 --- a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h +++ b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h @@ -578,6 +578,15 @@ static inline int irq_to_gpio(int irq) return alchemy_irq_to_gpio(irq); } +static inline int gpio_request(unsigned gpio, const char *label) +{ + return 0; +} + +static inline void gpio_free(unsigned gpio) +{ +} + #endif /* !CONFIG_ALCHEMY_GPIO_INDIRECT */ -- cgit v1.2.3-70-g09d2 From 99502d94c3649c5c5e6e81e323caf422a2f1591c Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Fri, 31 Jul 2009 16:58:17 -0400 Subject: MIPS: make page.h constants available to assembly. page.h includes ifndef __ASSEMBLY__ guards, but PAGE_SIZE and some other constants are defined using "1UL", which the assembler does not support. Use the _AC macro from const.h to make them available to assembly (and linker scripts). Signed-off-by: Nelson Elhage Signed-off-by: Tim Abbott Signed-off-by: Ralf Baechle --- arch/mips/include/asm/page.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index 4320239cf4e..f266295cce5 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -10,6 +10,7 @@ #define _ASM_PAGE_H #include +#include /* * PAGE_SHIFT determines the page size @@ -29,12 +30,12 @@ #ifdef CONFIG_PAGE_SIZE_64KB #define PAGE_SHIFT 16 #endif -#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) #ifdef CONFIG_HUGETLB_PAGE #define HPAGE_SHIFT (PAGE_SHIFT + PAGE_SHIFT - 3) -#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) +#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #endif /* CONFIG_HUGETLB_PAGE */ -- cgit v1.2.3-70-g09d2 From 38c9fb743f0d6e9dd8ee45e3e6247bc7d147c4de Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 15 Sep 2009 15:00:02 +0200 Subject: MIPS: Fulong: Convert reset initialization to initcall. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mips-boards/generic.h | 2 -- arch/mips/lemote/lm2e/reset.c | 7 ++++++- arch/mips/lemote/lm2e/setup.c | 3 --- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/mips-boards/generic.h b/arch/mips/include/asm/mips-boards/generic.h index c0da1a881e3..46c08563e53 100644 --- a/arch/mips/include/asm/mips-boards/generic.h +++ b/arch/mips/include/asm/mips-boards/generic.h @@ -87,8 +87,6 @@ extern int mips_revision_sconid; -extern void mips_reboot_setup(void); - #ifdef CONFIG_PCI extern void mips_pcibios_init(void); #else diff --git a/arch/mips/lemote/lm2e/reset.c b/arch/mips/lemote/lm2e/reset.c index 099387a3827..2e64bf6929e 100644 --- a/arch/mips/lemote/lm2e/reset.c +++ b/arch/mips/lemote/lm2e/reset.c @@ -7,6 +7,7 @@ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com */ +#include #include #include @@ -33,9 +34,13 @@ static void loongson2e_power_off(void) loongson2e_halt(); } -void mips_reboot_setup(void) +static int __init mips_reboot_setup(void) { _machine_restart = loongson2e_restart; _machine_halt = loongson2e_halt; pm_power_off = loongson2e_power_off; + + return 0; } + +arch_initcall(mips_reboot_setup); diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c index ebd6ceaef2f..79dae630611 100644 --- a/arch/mips/lemote/lm2e/setup.c +++ b/arch/mips/lemote/lm2e/setup.c @@ -41,8 +41,6 @@ #include #endif -extern void mips_reboot_setup(void); - unsigned long cpu_clock_freq; unsigned long bus_clock; unsigned int memsize; @@ -77,7 +75,6 @@ void __init plat_mem_setup(void) { set_io_port_base((unsigned long)ioremap(LOONGSON2E_IO_PORT_BASE, IO_SPACE_LIMIT - LOONGSON2E_PCI_IO_START + 1)); - mips_reboot_setup(); __wbflush = wbflush_loongson2e; -- cgit v1.2.3-70-g09d2 From bd92aa013e8fcd17328ec8e060477761cf3380d9 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Thu, 2 Jul 2009 23:22:36 +0800 Subject: MIPS: Loongson: Split the implementation of prom and setup parts This patch split the old initilization and setup implementation to several file, one file one logic function. the other main changes include: 1. as the script/checkpatch.pl suggests, use strict_strtol instead of simple_strtol in arch/mips/lemote/lm2e/cmdline.c 2. use the existed macros in asm/mips-boards/bonito64.h as the arguments of set_io_port_base() and remove the un-needed ones in asm/mach-lemote/pci.h Signed-off-by: Wu Zhangjin Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-lemote/pci.h | 1 - arch/mips/lemote/lm2e/Makefile | 3 +- arch/mips/lemote/lm2e/cmdline.c | 53 +++++++++++++++++++ arch/mips/lemote/lm2e/env.c | 60 ++++++++++++++++++++++ arch/mips/lemote/lm2e/init.c | 34 ++++++++++++ arch/mips/lemote/lm2e/machtype.c | 15 ++++++ arch/mips/lemote/lm2e/mem.c | 13 +++++ arch/mips/lemote/lm2e/prom.c | 91 --------------------------------- arch/mips/lemote/lm2e/setup.c | 34 +----------- arch/mips/lemote/lm2e/time.c | 30 +++++++++++ 10 files changed, 208 insertions(+), 126 deletions(-) create mode 100644 arch/mips/lemote/lm2e/cmdline.c create mode 100644 arch/mips/lemote/lm2e/env.c create mode 100644 arch/mips/lemote/lm2e/init.c create mode 100644 arch/mips/lemote/lm2e/machtype.c delete mode 100644 arch/mips/lemote/lm2e/prom.c create mode 100644 arch/mips/lemote/lm2e/time.c (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/mach-lemote/pci.h b/arch/mips/include/asm/mach-lemote/pci.h index ea6aa143b78..0307e493526 100644 --- a/arch/mips/include/asm/mach-lemote/pci.h +++ b/arch/mips/include/asm/mach-lemote/pci.h @@ -25,6 +25,5 @@ #define LOONGSON2E_PCI_MEM_START 0x14000000UL #define LOONGSON2E_PCI_MEM_END 0x1fffffffUL #define LOONGSON2E_PCI_IO_START 0x00004000UL -#define LOONGSON2E_IO_PORT_BASE 0x1fd00000UL #endif /* !_LEMOTE_PCI_H_ */ diff --git a/arch/mips/lemote/lm2e/Makefile b/arch/mips/lemote/lm2e/Makefile index f19173252d6..a5bc1efc362 100644 --- a/arch/mips/lemote/lm2e/Makefile +++ b/arch/mips/lemote/lm2e/Makefile @@ -2,7 +2,8 @@ # Makefile for Lemote Fulong mini-PC board. # -obj-y += setup.o prom.o reset.o irq.o pci.o bonito-irq.o mem.o +obj-y += setup.o init.o reset.o irq.o pci.o bonito-irq.o mem.o \ + env.o cmdline.o time.o machtype.o # # Early printk support diff --git a/arch/mips/lemote/lm2e/cmdline.c b/arch/mips/lemote/lm2e/cmdline.c new file mode 100644 index 00000000000..442b93587e8 --- /dev/null +++ b/arch/mips/lemote/lm2e/cmdline.c @@ -0,0 +1,53 @@ +/* + * Based on Ocelot Linux port, which is + * Copyright 2001 MontaVista Software Inc. + * Author: jsun@mvista.com or jsun@junsun.net + * + * Copyright 2003 ICT CAS + * Author: Michael Guo + * + * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include + +#include + +int prom_argc; +/* pmon passes arguments in 32bit pointers */ +int *_prom_argv; + +void __init prom_init_cmdline(void) +{ + int i; + long l; + + /* firmware arguments are initialized in head.S */ + prom_argc = fw_arg0; + _prom_argv = (int *)fw_arg1; + + /* arg[0] is "g", the rest is boot parameters */ + arcs_cmdline[0] = '\0'; + for (i = 1; i < prom_argc; i++) { + l = (long)_prom_argv[i]; + if (strlen(arcs_cmdline) + strlen(((char *)l) + 1) + >= sizeof(arcs_cmdline)) + break; + strcat(arcs_cmdline, ((char *)l)); + strcat(arcs_cmdline, " "); + } + + if ((strstr(arcs_cmdline, "console=")) == NULL) + strcat(arcs_cmdline, " console=ttyS0,115200"); + if ((strstr(arcs_cmdline, "root=")) == NULL) + strcat(arcs_cmdline, " root=/dev/hda1"); +} diff --git a/arch/mips/lemote/lm2e/env.c b/arch/mips/lemote/lm2e/env.c new file mode 100644 index 00000000000..9e88409f7a3 --- /dev/null +++ b/arch/mips/lemote/lm2e/env.c @@ -0,0 +1,60 @@ +/* + * Based on Ocelot Linux port, which is + * Copyright 2001 MontaVista Software Inc. + * Author: jsun@mvista.com or jsun@junsun.net + * + * Copyright 2003 ICT CAS + * Author: Michael Guo + * + * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include + +#include + +unsigned long bus_clock, cpu_clock_freq; +unsigned long memsize, highmemsize; + +/* pmon passes arguments in 32bit pointers */ +int *_prom_envp; + +#define parse_even_earlier(res, option, p) \ +do { \ + if (strncmp(option, (char *)p, strlen(option)) == 0) \ + strict_strtol((char *)p + strlen(option"="), \ + 10, &res); \ +} while (0) + +void __init prom_init_env(void) +{ + long l; + + /* firmware arguments are initialized in head.S */ + _prom_envp = (int *)fw_arg2; + + l = (long)*_prom_envp; + while (l != 0) { + parse_even_earlier(bus_clock, "busclock", l); + parse_even_earlier(cpu_clock_freq, "cpuclock", l); + parse_even_earlier(memsize, "memsize", l); + parse_even_earlier(highmemsize, "highmemsize", l); + _prom_envp++; + l = (long)*_prom_envp; + } + if (memsize == 0) + memsize = 256; + + pr_info("busclock=%ld, cpuclock=%ld, memsize=%ld, highmemsize=%ld\n", + bus_clock, cpu_clock_freq, memsize, highmemsize); +} diff --git a/arch/mips/lemote/lm2e/init.c b/arch/mips/lemote/lm2e/init.c new file mode 100644 index 00000000000..6fe624d697e --- /dev/null +++ b/arch/mips/lemote/lm2e/init.c @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include + +#include +#include + +extern void __init prom_init_cmdline(void); +extern void __init prom_init_env(void); +extern void __init prom_init_memory(void); + +void __init prom_init(void) +{ + /* init base address of io space */ + set_io_port_base((unsigned long) + ioremap(BONITO_PCIIO_BASE, BONITO_PCIIO_SIZE)); + + prom_init_cmdline(); + prom_init_env(); + prom_init_memory(); +} + +void __init prom_free_prom_memory(void) +{ +} diff --git a/arch/mips/lemote/lm2e/machtype.c b/arch/mips/lemote/lm2e/machtype.c new file mode 100644 index 00000000000..8d803eea787 --- /dev/null +++ b/arch/mips/lemote/lm2e/machtype.c @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +const char *get_system_type(void) +{ + return "lemote-fulong"; +} + diff --git a/arch/mips/lemote/lm2e/mem.c b/arch/mips/lemote/lm2e/mem.c index 16cd21587d3..f24af70b669 100644 --- a/arch/mips/lemote/lm2e/mem.c +++ b/arch/mips/lemote/lm2e/mem.c @@ -8,6 +8,19 @@ #include #include +#include + +extern unsigned long memsize, highmemsize; + +void __init prom_init_memory(void) +{ + add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM); +#ifdef CONFIG_64BIT + if (highmemsize > 0) + add_memory_region(0x20000000, highmemsize << 20, BOOT_MEM_RAM); +#endif /* CONFIG_64BIT */ +} + /* override of arch/mips/mm/cache.c: __uncached_access */ int __uncached_access(struct file *file, unsigned long addr) { diff --git a/arch/mips/lemote/lm2e/prom.c b/arch/mips/lemote/lm2e/prom.c deleted file mode 100644 index d78cedf28c1..00000000000 --- a/arch/mips/lemote/lm2e/prom.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Based on Ocelot Linux port, which is - * Copyright 2001 MontaVista Software Inc. - * Author: jsun@mvista.com or jsun@junsun.net - * - * Copyright 2003 ICT CAS - * Author: Michael Guo - * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include -#include -#include - -extern unsigned long bus_clock; -extern unsigned long cpu_clock_freq; -extern unsigned int memsize, highmemsize; - -static int argc; -/* pmon passes arguments in 32bit pointers */ -static int *arg; -static int *env; - -const char *get_system_type(void) -{ - return "lemote-fulong"; -} - -void __init prom_init_cmdline(void) -{ - int i; - long l; - - /* arg[0] is "g", the rest is boot parameters */ - arcs_cmdline[0] = '\0'; - for (i = 1; i < argc; i++) { - l = (long)arg[i]; - if (strlen(arcs_cmdline) + strlen(((char *)l) + 1) - >= sizeof(arcs_cmdline)) - break; - strcat(arcs_cmdline, ((char *)l)); - strcat(arcs_cmdline, " "); - } -} - -void __init prom_init(void) -{ - long l; - argc = fw_arg0; - arg = (int *)fw_arg1; - env = (int *)fw_arg2; - - prom_init_cmdline(); - - if ((strstr(arcs_cmdline, "console=")) == NULL) - strcat(arcs_cmdline, " console=ttyS0,115200"); - if ((strstr(arcs_cmdline, "root=")) == NULL) - strcat(arcs_cmdline, " root=/dev/hda1"); - -#define parse_even_earlier(res, option, p) \ -do { \ - if (strncmp(option, (char *)p, strlen(option)) == 0) \ - res = simple_strtol((char *)p + strlen(option"="), \ - NULL, 10); \ -} while (0) - - l = (long)*env; - while (l != 0) { - parse_even_earlier(bus_clock, "busclock", l); - parse_even_earlier(cpu_clock_freq, "cpuclock", l); - parse_even_earlier(memsize, "memsize", l); - parse_even_earlier(highmemsize, "highmemsize", l); - env++; - l = (long)*env; - } - if (memsize == 0) - memsize = 256; - - pr_info("busclock=%ld, cpuclock=%ld,memsize=%d,highmemsize=%d\n", - bus_clock, cpu_clock_freq, memsize, highmemsize); -} - -void __init prom_free_prom_memory(void) -{ -} diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c index 79dae630611..03578cf0ee7 100644 --- a/arch/mips/lemote/lm2e/setup.c +++ b/arch/mips/lemote/lm2e/setup.c @@ -26,37 +26,16 @@ * 675 Mass Ave, Cambridge, MA 02139, USA. * */ -#include #include -#include +#include -#include -#include -#include #include -#include #ifdef CONFIG_VT #include #include #endif -unsigned long cpu_clock_freq; -unsigned long bus_clock; -unsigned int memsize; -unsigned int highmemsize = 0; - -void __init plat_time_init(void) -{ - /* setup mips r4k timer */ - mips_hpt_frequency = cpu_clock_freq / 2; -} - -unsigned long read_persistent_clock(void) -{ - return mc146818_get_cmos_time(); -} - void (*__wbflush)(void); EXPORT_SYMBOL(__wbflush); @@ -73,18 +52,8 @@ static void wbflush_loongson2e(void) void __init plat_mem_setup(void) { - set_io_port_base((unsigned long)ioremap(LOONGSON2E_IO_PORT_BASE, - IO_SPACE_LIMIT - LOONGSON2E_PCI_IO_START + 1)); - __wbflush = wbflush_loongson2e; - add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM); -#ifdef CONFIG_64BIT - if (highmemsize > 0) { - add_memory_region(0x20000000, highmemsize << 20, BOOT_MEM_RAM); - } -#endif - #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; @@ -104,5 +73,4 @@ void __init plat_mem_setup(void) conswitchp = &dummy_con; #endif #endif - } diff --git a/arch/mips/lemote/lm2e/time.c b/arch/mips/lemote/lm2e/time.c new file mode 100644 index 00000000000..b9d3f11b620 --- /dev/null +++ b/arch/mips/lemote/lm2e/time.c @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include + +#include +#include + +extern unsigned long cpu_clock_freq; + +void __init plat_time_init(void) +{ + /* setup mips r4k timer */ + mips_hpt_frequency = cpu_clock_freq / 2; +} + +unsigned long read_persistent_clock(void) +{ + return mc146818_get_cmos_time(); +} -- cgit v1.2.3-70-g09d2 From 5e983ff654ca3df3007b5b558b5271bb4622afa4 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Thu, 2 Jul 2009 23:23:03 +0800 Subject: MIPS: Loongson: clean up the coding style With the help of script/checkpatch.pl, i have cleaned up the coding style. 1. remove un-needed header files and tune some comments. 2. remove some un-needed { } add a new header file loongson.h: 3. move some common header files to loongson.h 4. move some common extern declartions to loongson.h and this new header file is needed for future loongson2f support. Signed-off-by: Wu Zhangjin Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-lemote/loongson.h | 36 +++++++++++++++++++++ arch/mips/include/asm/mach-lemote/pci.h | 8 +++-- arch/mips/lemote/lm2e/bonito-irq.c | 27 ++-------------- arch/mips/lemote/lm2e/cmdline.c | 5 ++- arch/mips/lemote/lm2e/early_printk.c | 4 +-- arch/mips/lemote/lm2e/env.c | 6 ++-- arch/mips/lemote/lm2e/init.c | 6 +--- arch/mips/lemote/lm2e/irq.c | 48 ++++------------------------ arch/mips/lemote/lm2e/mem.c | 2 +- arch/mips/lemote/lm2e/pci.c | 26 ++------------- arch/mips/lemote/lm2e/reset.c | 43 ------------------------- arch/mips/lemote/lm2e/setup.c | 22 ++----------- arch/mips/lemote/lm2e/time.c | 5 +-- arch/mips/pci/fixup-lm2e.c | 18 ----------- 14 files changed, 62 insertions(+), 194 deletions(-) create mode 100644 arch/mips/include/asm/mach-lemote/loongson.h delete mode 100644 arch/mips/lemote/lm2e/reset.c (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/mach-lemote/loongson.h b/arch/mips/include/asm/mach-lemote/loongson.h new file mode 100644 index 00000000000..76cc2bddfa4 --- /dev/null +++ b/arch/mips/include/asm/mach-lemote/loongson.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2009 Lemote, Inc. & Institute of Computing Technology + * Author: Wu Zhangjin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __ASM_MACH_LOONGSON_LOONGSON_H +#define __ASM_MACH_LOONGSON_LOONGSON_H + +#include +#include + +/* there is an internal bonito64-compatiable northbridge in loongson2e/2f */ +#include + +/* loongson internal northbridge initialization */ +extern void bonito_irq_init(void); + +/* loongson-based machines specific reboot setup */ +extern void mips_reboot_setup(void); + +/* environment arguments from bootloader */ +extern unsigned long bus_clock, cpu_clock_freq; +extern unsigned long memsize, highmemsize; + +/* loongson-specific command line, env and memory initialization */ +extern void __init prom_init_memory(void); +extern void __init prom_init_cmdline(void); +extern void __init prom_init_env(void); + +#endif /* __ASM_MACH_LOONGSON_LOONGSON_H */ diff --git a/arch/mips/include/asm/mach-lemote/pci.h b/arch/mips/include/asm/mach-lemote/pci.h index 0307e493526..92b2f59d890 100644 --- a/arch/mips/include/asm/mach-lemote/pci.h +++ b/arch/mips/include/asm/mach-lemote/pci.h @@ -19,11 +19,13 @@ * 02139, USA. */ -#ifndef _LEMOTE_PCI_H_ -#define _LEMOTE_PCI_H_ +#ifndef __ASM_MACH_LEMOTE_PCI_H_ +#define __ASM_MACH_LEMOTE_PCI_H_ + +extern struct pci_ops bonito64_pci_ops; #define LOONGSON2E_PCI_MEM_START 0x14000000UL #define LOONGSON2E_PCI_MEM_END 0x1fffffffUL #define LOONGSON2E_PCI_IO_START 0x00004000UL -#endif /* !_LEMOTE_PCI_H_ */ +#endif /* !__ASM_MACH_LEMOTE_PCI_H_ */ diff --git a/arch/mips/lemote/lm2e/bonito-irq.c b/arch/mips/lemote/lm2e/bonito-irq.c index 8fc3bce7075..3e31e7ad713 100644 --- a/arch/mips/lemote/lm2e/bonito-irq.c +++ b/arch/mips/lemote/lm2e/bonito-irq.c @@ -10,32 +10,10 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * */ -#include -#include -#include -#include #include -#include - -#include +#include static inline void bonito_irq_enable(unsigned int irq) { @@ -66,9 +44,8 @@ void bonito_irq_init(void) { u32 i; - for (i = BONITO_IRQ_BASE; i < BONITO_IRQ_BASE + 32; i++) { + for (i = BONITO_IRQ_BASE; i < BONITO_IRQ_BASE + 32; i++) set_irq_chip_and_handler(i, &bonito_irq_type, handle_level_irq); - } setup_irq(BONITO_IRQ_BASE + 10, &dma_timeout_irqaction); } diff --git a/arch/mips/lemote/lm2e/cmdline.c b/arch/mips/lemote/lm2e/cmdline.c index 442b93587e8..75f1b243ee4 100644 --- a/arch/mips/lemote/lm2e/cmdline.c +++ b/arch/mips/lemote/lm2e/cmdline.c @@ -17,11 +17,10 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ -#include -#include - #include +#include + int prom_argc; /* pmon passes arguments in 32bit pointers */ int *_prom_argv; diff --git a/arch/mips/lemote/lm2e/early_printk.c b/arch/mips/lemote/lm2e/early_printk.c index 811c7dec1ed..3e0a6eaa404 100644 --- a/arch/mips/lemote/lm2e/early_printk.c +++ b/arch/mips/lemote/lm2e/early_printk.c @@ -9,11 +9,9 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ -#include -#include #include -#include +#include #define UART_BASE (BONITO_PCIIO_BASE + 0x3f8) diff --git a/arch/mips/lemote/lm2e/env.c b/arch/mips/lemote/lm2e/env.c index 9e88409f7a3..b9ef5038554 100644 --- a/arch/mips/lemote/lm2e/env.c +++ b/arch/mips/lemote/lm2e/env.c @@ -17,12 +17,10 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ - -#include -#include - #include +#include + unsigned long bus_clock, cpu_clock_freq; unsigned long memsize, highmemsize; diff --git a/arch/mips/lemote/lm2e/init.c b/arch/mips/lemote/lm2e/init.c index 6fe624d697e..3abe927422a 100644 --- a/arch/mips/lemote/lm2e/init.c +++ b/arch/mips/lemote/lm2e/init.c @@ -8,15 +8,11 @@ * option) any later version. */ -#include #include #include -#include -extern void __init prom_init_cmdline(void); -extern void __init prom_init_env(void); -extern void __init prom_init_memory(void); +#include void __init prom_init(void) { diff --git a/arch/mips/lemote/lm2e/irq.c b/arch/mips/lemote/lm2e/irq.c index 1d0a09f3b83..fb7643a2561 100644 --- a/arch/mips/lemote/lm2e/irq.c +++ b/arch/mips/lemote/lm2e/irq.c @@ -6,35 +6,14 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * */ #include -#include -#include #include -#include #include #include -#include -#include - +#include /* * the first level int-handler will jump here if it is a bonito irq */ @@ -67,27 +46,24 @@ static void i8259_irqdispatch(void) int irq; irq = i8259_irq(); - if (irq >= 0) { + if (irq >= 0) do_IRQ(irq); - } else { + else spurious_interrupt(); - } - } asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; - if (pending & CAUSEF_IP7) { + if (pending & CAUSEF_IP7) do_IRQ(MIPS_CPU_IRQ_BASE + 7); - } else if (pending & CAUSEF_IP5) { + else if (pending & CAUSEF_IP5) i8259_irqdispatch(); - } else if (pending & CAUSEF_IP2) { + else if (pending & CAUSEF_IP2) bonito_irqdispatch(); - } else { + else spurious_interrupt(); - } } static struct irqaction cascade_irqaction = { @@ -97,8 +73,6 @@ static struct irqaction cascade_irqaction = { void __init arch_init_irq(void) { - extern void bonito_irq_init(void); - /* * Clear all of the interrupts while we change the able around a bit. * int-handler is not on bootstrap @@ -128,16 +102,8 @@ void __init arch_init_irq(void) init_i8259_irqs(); bonito_irq_init(); - /* - printk("GPIODATA=%x, GPIOIE=%x\n", BONITO_GPIODATA, BONITO_GPIOIE); - printk("INTEN=%x, INTSET=%x, INTCLR=%x, INTISR=%x\n", - BONITO_INTEN, BONITO_INTENSET, - BONITO_INTENCLR, BONITO_INTISR); - */ - /* bonito irq at IP2 */ setup_irq(MIPS_CPU_IRQ_BASE + 2, &cascade_irqaction); /* 8259 irq at IP5 */ setup_irq(MIPS_CPU_IRQ_BASE + 5, &cascade_irqaction); - } diff --git a/arch/mips/lemote/lm2e/mem.c b/arch/mips/lemote/lm2e/mem.c index f24af70b669..6a7feb178fa 100644 --- a/arch/mips/lemote/lm2e/mem.c +++ b/arch/mips/lemote/lm2e/mem.c @@ -10,7 +10,7 @@ #include -extern unsigned long memsize, highmemsize; +#include void __init prom_init_memory(void) { diff --git a/arch/mips/lemote/lm2e/pci.c b/arch/mips/lemote/lm2e/pci.c index 152efb65bba..bee846e6d8a 100644 --- a/arch/mips/lemote/lm2e/pci.c +++ b/arch/mips/lemote/lm2e/pci.c @@ -1,6 +1,4 @@ /* - * pci.c - * * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * @@ -8,31 +6,11 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * */ -#include #include -#include -#include -#include -#include -extern struct pci_ops bonito64_pci_ops; +#include +#include static struct resource loongson2e_pci_mem_resource = { .name = "LOONGSON2E PCI MEM", diff --git a/arch/mips/lemote/lm2e/reset.c b/arch/mips/lemote/lm2e/reset.c deleted file mode 100644 index 0faa140f403..00000000000 --- a/arch/mips/lemote/lm2e/reset.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * Copyright (C) 2009 Lemote, Inc. & Institute of Computing Technology - * Author: Zhangjin Wu, wuzj@lemote.com - */ -#include -#include -#include - -#include -#include - -static void loongson2e_restart(char *command) -{ - /* do preparation for reboot */ - BONITO_BONGENCFG &= ~(1 << 2); - BONITO_BONGENCFG |= (1 << 2); - - /* reboot via jumping to boot base address */ - ((void (*)(void))ioremap_nocache(BONITO_BOOT_BASE, 4)) (); -} - -static void loongson2e_halt(void) -{ - while (1) ; -} - -static int __init mips_reboot_setup(void) -{ - _machine_restart = loongson2e_restart; - _machine_halt = loongson2e_halt; - pm_power_off = loongson2e_halt; - - return 0; -} - -arch_initcall(mips_reboot_setup); diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c index 03578cf0ee7..66390215973 100644 --- a/arch/mips/lemote/lm2e/setup.c +++ b/arch/mips/lemote/lm2e/setup.c @@ -1,7 +1,4 @@ /* - * BRIEF MODULE DESCRIPTION - * setup.c - board dependent boot routines - * * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * @@ -9,28 +6,13 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * */ -#include #include #include +#include + #ifdef CONFIG_VT #include #include diff --git a/arch/mips/lemote/lm2e/time.c b/arch/mips/lemote/lm2e/time.c index b9d3f11b620..b13d1717465 100644 --- a/arch/mips/lemote/lm2e/time.c +++ b/arch/mips/lemote/lm2e/time.c @@ -10,13 +10,10 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ - -#include - #include #include -extern unsigned long cpu_clock_freq; +#include void __init plat_time_init(void) { diff --git a/arch/mips/pci/fixup-lm2e.c b/arch/mips/pci/fixup-lm2e.c index e18ae4f574c..0c4c7a81213 100644 --- a/arch/mips/pci/fixup-lm2e.c +++ b/arch/mips/pci/fixup-lm2e.c @@ -1,6 +1,4 @@ /* - * fixup-lm2e.c - * * Copyright (C) 2004 ICT CAS * Author: Li xiaoyu, ICT CAS * lixy@ict.ac.cn @@ -12,22 +10,6 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * */ #include #include -- cgit v1.2.3-70-g09d2 From f7face03c66cea12159191ba8d2a9ee735da0b0d Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Thu, 2 Jul 2009 23:23:30 +0800 Subject: MIPS: Loongson: PCI: Clean up pcimap setup Fixup the wrong original comment of pcimap, and make the source code more understandable. and also, some new extra consideration is added in. Signed-off-by: Wu Zhangjin Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-lemote/loongson.h | 17 ++++++++++++ arch/mips/include/asm/mach-lemote/pci.h | 4 +-- arch/mips/lemote/lm2e/pci.c | 41 ++++++++++++++++++---------- 3 files changed, 45 insertions(+), 17 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/mach-lemote/loongson.h b/arch/mips/include/asm/mach-lemote/loongson.h index 76cc2bddfa4..916eacec94e 100644 --- a/arch/mips/include/asm/mach-lemote/loongson.h +++ b/arch/mips/include/asm/mach-lemote/loongson.h @@ -33,4 +33,21 @@ extern void __init prom_init_memory(void); extern void __init prom_init_cmdline(void); extern void __init prom_init_env(void); +/* PCI Configuration Registers */ +#define LOONGSON_PCI_ISR4C BONITO_PCI_REG(0x4c) + +/* PCI_Hit*_Sel_* */ + +#define LOONGSON_PCI_HIT0_SEL_L BONITO(BONITO_REGBASE + 0x50) +#define LOONGSON_PCI_HIT0_SEL_H BONITO(BONITO_REGBASE + 0x54) +#define LOONGSON_PCI_HIT1_SEL_L BONITO(BONITO_REGBASE + 0x58) +#define LOONGSON_PCI_HIT1_SEL_H BONITO(BONITO_REGBASE + 0x5c) +#define LOONGSON_PCI_HIT2_SEL_L BONITO(BONITO_REGBASE + 0x60) +#define LOONGSON_PCI_HIT2_SEL_H BONITO(BONITO_REGBASE + 0x64) + +/* PXArb Config & Status */ + +#define LOONGSON_PXARB_CFG BONITO(BONITO_REGBASE + 0x68) +#define LOONGSON_PXARB_STATUS BONITO(BONITO_REGBASE + 0x6c) + #endif /* __ASM_MACH_LOONGSON_LOONGSON_H */ diff --git a/arch/mips/include/asm/mach-lemote/pci.h b/arch/mips/include/asm/mach-lemote/pci.h index 92b2f59d890..3e6b1300afb 100644 --- a/arch/mips/include/asm/mach-lemote/pci.h +++ b/arch/mips/include/asm/mach-lemote/pci.h @@ -24,8 +24,8 @@ extern struct pci_ops bonito64_pci_ops; -#define LOONGSON2E_PCI_MEM_START 0x14000000UL -#define LOONGSON2E_PCI_MEM_END 0x1fffffffUL +#define LOONGSON2E_PCI_MEM_START BONITO_PCILO1_BASE +#define LOONGSON2E_PCI_MEM_END (BONITO_PCILO1_BASE + 0x04000000 * 2) #define LOONGSON2E_PCI_IO_START 0x00004000UL #endif /* !__ASM_MACH_LEMOTE_PCI_H_ */ diff --git a/arch/mips/lemote/lm2e/pci.c b/arch/mips/lemote/lm2e/pci.c index bee846e6d8a..9812c30cc6e 100644 --- a/arch/mips/lemote/lm2e/pci.c +++ b/arch/mips/lemote/lm2e/pci.c @@ -34,33 +34,44 @@ static struct pci_controller loongson2e_pci_controller = { .io_offset = 0x00000000UL, }; -static void __init ict_pcimap(void) +static void __init setup_pcimap(void) { /* - * local to PCI mapping: [256M,512M] -> [256M,512M]; differ from PMON - * + * local to PCI mapping for CPU accessing PCI space * CPU address space [256M,448M] is window for accessing pci space - * we set pcimap_lo[0,1,2] to map it to pci space [256M,448M] - * pcimap: bit18,pcimap_2; bit[17-12],lo2;bit[11-6],lo1;bit[5-0],lo0 + * we set pcimap_lo[0,1,2] to map it to pci space[0M,64M], [320M,448M] + * + * pcimap: PCI_MAP2 PCI_Mem_Lo2 PCI_Mem_Lo1 PCI_Mem_Lo0 + * [<2G] [384M,448M] [320M,384M] [0M,64M] */ - /* 1,00 0110 ,0001 01,00 0000 */ - BONITO_PCIMAP = 0x46140; - - /* 1, 00 0010, 0000,01, 00 0000 */ - /* BONITO_PCIMAP = 0x42040; */ + BONITO_PCIMAP = BONITO_PCIMAP_PCIMAP_2 | + BONITO_PCIMAP_WIN(2, BONITO_PCILO2_BASE) | + BONITO_PCIMAP_WIN(1, BONITO_PCILO1_BASE) | + BONITO_PCIMAP_WIN(0, 0); /* - * PCI to local mapping: [2G,2G+256M] -> [0,256M] + * PCI-DMA to local mapping: [2G,2G+256M] -> [0M,256M] */ - BONITO_PCIBASE0 = 0x80000000; - BONITO_PCIBASE1 = 0x00800000; - BONITO_PCIBASE2 = 0x90000000; + BONITO_PCIBASE0 = 0x80000000ul; /* base: 2G -> mmap: 0M */ + /* size: 256M, burst transmission, pre-fetch enable, 64bit */ + LOONGSON_PCI_HIT0_SEL_L = 0xc000000cul; + LOONGSON_PCI_HIT0_SEL_H = 0xfffffffful; + LOONGSON_PCI_HIT1_SEL_L = 0x00000006ul; /* set this BAR as invalid */ + LOONGSON_PCI_HIT1_SEL_H = 0x00000000ul; + LOONGSON_PCI_HIT2_SEL_L = 0x00000006ul; /* set this BAR as invalid */ + LOONGSON_PCI_HIT2_SEL_H = 0x00000000ul; + + /* avoid deadlock of PCI reading/writing lock operation */ + LOONGSON_PCI_ISR4C = 0xd2000001ul; + /* can not change gnt to break pci transfer when device's gnt not + deassert for some broken device */ + LOONGSON_PXARB_CFG = 0x00fe0105ul; } static int __init pcibios_init(void) { - ict_pcimap(); + setup_pcimap(); loongson2e_pci_controller.io_map_base = mips_io_port_base; -- cgit v1.2.3-70-g09d2 From 67b35e5d01aba7a83f2161b0c90acb08afa01e3e Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Thu, 2 Jul 2009 23:25:46 +0800 Subject: MIPS: Loongson: Add oprofile support This kernel support is needed by the user-space tool:oprofile to profile linux kernel or applications via loongson2 performance counters. you can enable this driver via CONFIG_OPROFILE = y or m. On Loongson2 there are two performance counters, each one can count 16 events respectively. when anyone of the performance counter overflows, an interrupt will be generated and is routed to the IRQ MIPS_CPU_IRQ_BASE + 6. Signed-off-by: Yanhua Signed-off-by: Wu Zhangjin Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-lemote/loongson.h | 3 + arch/mips/lemote/lm2e/irq.c | 2 + arch/mips/oprofile/Makefile | 1 + arch/mips/oprofile/common.c | 4 + arch/mips/oprofile/op_model_loongson2.c | 177 +++++++++++++++++++++++++++ 5 files changed, 187 insertions(+) create mode 100644 arch/mips/oprofile/op_model_loongson2.c (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/mach-lemote/loongson.h b/arch/mips/include/asm/mach-lemote/loongson.h index 916eacec94e..95ee4c8f2d5 100644 --- a/arch/mips/include/asm/mach-lemote/loongson.h +++ b/arch/mips/include/asm/mach-lemote/loongson.h @@ -50,4 +50,7 @@ extern void __init prom_init_env(void); #define LOONGSON_PXARB_CFG BONITO(BONITO_REGBASE + 0x68) #define LOONGSON_PXARB_STATUS BONITO(BONITO_REGBASE + 0x6c) +/* loongson2-specific perf counter IRQ */ +#define LOONGSON2_PERFCNT_IRQ (MIPS_CPU_IRQ_BASE + 6) + #endif /* __ASM_MACH_LOONGSON_LOONGSON_H */ diff --git a/arch/mips/lemote/lm2e/irq.c b/arch/mips/lemote/lm2e/irq.c index fb7643a2561..9585f5aa7cc 100644 --- a/arch/mips/lemote/lm2e/irq.c +++ b/arch/mips/lemote/lm2e/irq.c @@ -58,6 +58,8 @@ asmlinkage void plat_irq_dispatch(void) if (pending & CAUSEF_IP7) do_IRQ(MIPS_CPU_IRQ_BASE + 7); + else if (pending & CAUSEF_IP6) /* perf counter loverflow */ + do_IRQ(LOONGSON2_PERFCNT_IRQ); else if (pending & CAUSEF_IP5) i8259_irqdispatch(); else if (pending & CAUSEF_IP2) diff --git a/arch/mips/oprofile/Makefile b/arch/mips/oprofile/Makefile index bf3be6fcf7f..02cc65e52d1 100644 --- a/arch/mips/oprofile/Makefile +++ b/arch/mips/oprofile/Makefile @@ -15,3 +15,4 @@ oprofile-$(CONFIG_CPU_MIPS64) += op_model_mipsxx.o oprofile-$(CONFIG_CPU_R10000) += op_model_mipsxx.o oprofile-$(CONFIG_CPU_SB1) += op_model_mipsxx.o oprofile-$(CONFIG_CPU_RM9000) += op_model_rm9000.o +oprofile-$(CONFIG_CPU_LOONGSON2) += op_model_loongson2.o diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index 3bf3354547f..7832ad257a1 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -16,6 +16,7 @@ extern struct op_mips_model op_model_mipsxx_ops __attribute__((weak)); extern struct op_mips_model op_model_rm9000_ops __attribute__((weak)); +extern struct op_mips_model op_model_loongson2_ops __attribute__((weak)); static struct op_mips_model *model; @@ -93,6 +94,9 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) case CPU_RM9000: lmodel = &op_model_rm9000_ops; break; + case CPU_LOONGSON2: + lmodel = &op_model_loongson2_ops; + break; }; if (!lmodel) diff --git a/arch/mips/oprofile/op_model_loongson2.c b/arch/mips/oprofile/op_model_loongson2.c new file mode 100644 index 00000000000..655cb8dec34 --- /dev/null +++ b/arch/mips/oprofile/op_model_loongson2.c @@ -0,0 +1,177 @@ +/* + * Loongson2 performance counter driver for oprofile + * + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Yanhua + * Author: Wu Zhangjin + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + */ +#include +#include +#include + +#include /* LOONGSON2_PERFCNT_IRQ */ +#include "op_impl.h" + +/* + * a patch should be sent to oprofile with the loongson-specific support. + * otherwise, the oprofile tool will not recognize this and complain about + * "cpu_type 'unset' is not valid". + */ +#define LOONGSON2_CPU_TYPE "mips/godson2" + +#define LOONGSON2_COUNTER1_EVENT(event) ((event & 0x0f) << 5) +#define LOONGSON2_COUNTER2_EVENT(event) ((event & 0x0f) << 9) + +#define LOONGSON2_PERFCNT_EXL (1UL << 0) +#define LOONGSON2_PERFCNT_KERNEL (1UL << 1) +#define LOONGSON2_PERFCNT_SUPERVISOR (1UL << 2) +#define LOONGSON2_PERFCNT_USER (1UL << 3) +#define LOONGSON2_PERFCNT_INT_EN (1UL << 4) +#define LOONGSON2_PERFCNT_OVERFLOW (1ULL << 31) + +/* Loongson2 performance counter register */ +#define read_c0_perfctrl() __read_64bit_c0_register($24, 0) +#define write_c0_perfctrl(val) __write_64bit_c0_register($24, 0, val) +#define read_c0_perfcnt() __read_64bit_c0_register($25, 0) +#define write_c0_perfcnt(val) __write_64bit_c0_register($25, 0, val) + +static struct loongson2_register_config { + unsigned int ctrl; + unsigned long long reset_counter1; + unsigned long long reset_counter2; + int cnt1_enalbed, cnt2_enalbed; +} reg; + +DEFINE_SPINLOCK(sample_lock); + +static char *oprofid = "LoongsonPerf"; +static irqreturn_t loongson2_perfcount_handler(int irq, void *dev_id); +/* Compute all of the registers in preparation for enabling profiling. */ + +static void loongson2_reg_setup(struct op_counter_config *cfg) +{ + unsigned int ctrl = 0; + + reg.reset_counter1 = 0; + reg.reset_counter2 = 0; + /* Compute the performance counter ctrl word. */ + /* For now count kernel and user mode */ + if (cfg[0].enabled) { + ctrl |= LOONGSON2_COUNTER1_EVENT(cfg[0].event); + reg.reset_counter1 = 0x80000000ULL - cfg[0].count; + } + + if (cfg[1].enabled) { + ctrl |= LOONGSON2_COUNTER2_EVENT(cfg[1].event); + reg.reset_counter2 = (0x80000000ULL - cfg[1].count); + } + + if (cfg[0].enabled || cfg[1].enabled) { + ctrl |= LOONGSON2_PERFCNT_EXL | LOONGSON2_PERFCNT_INT_EN; + if (cfg[0].kernel || cfg[1].kernel) + ctrl |= LOONGSON2_PERFCNT_KERNEL; + if (cfg[0].user || cfg[1].user) + ctrl |= LOONGSON2_PERFCNT_USER; + } + + reg.ctrl = ctrl; + + reg.cnt1_enalbed = cfg[0].enabled; + reg.cnt2_enalbed = cfg[1].enabled; + +} + +/* Program all of the registers in preparation for enabling profiling. */ + +static void loongson2_cpu_setup(void *args) +{ + uint64_t perfcount; + + perfcount = (reg.reset_counter2 << 32) | reg.reset_counter1; + write_c0_perfcnt(perfcount); +} + +static void loongson2_cpu_start(void *args) +{ + /* Start all counters on current CPU */ + if (reg.cnt1_enalbed || reg.cnt2_enalbed) + write_c0_perfctrl(reg.ctrl); +} + +static void loongson2_cpu_stop(void *args) +{ + /* Stop all counters on current CPU */ + write_c0_perfctrl(0); + memset(®, 0, sizeof(reg)); +} + +static irqreturn_t loongson2_perfcount_handler(int irq, void *dev_id) +{ + uint64_t counter, counter1, counter2; + struct pt_regs *regs = get_irq_regs(); + int enabled; + unsigned long flags; + + /* + * LOONGSON2 defines two 32-bit performance counters. + * To avoid a race updating the registers we need to stop the counters + * while we're messing with + * them ... + */ + + /* Check whether the irq belongs to me */ + enabled = reg.cnt1_enalbed | reg.cnt2_enalbed; + if (!enabled) + return IRQ_NONE; + + counter = read_c0_perfcnt(); + counter1 = counter & 0xffffffff; + counter2 = counter >> 32; + + spin_lock_irqsave(&sample_lock, flags); + + if (counter1 & LOONGSON2_PERFCNT_OVERFLOW) { + if (reg.cnt1_enalbed) + oprofile_add_sample(regs, 0); + counter1 = reg.reset_counter1; + } + if (counter2 & LOONGSON2_PERFCNT_OVERFLOW) { + if (reg.cnt2_enalbed) + oprofile_add_sample(regs, 1); + counter2 = reg.reset_counter2; + } + + spin_unlock_irqrestore(&sample_lock, flags); + + write_c0_perfcnt((counter2 << 32) | counter1); + + return IRQ_HANDLED; +} + +static int __init loongson2_init(void) +{ + return request_irq(LOONGSON2_PERFCNT_IRQ, loongson2_perfcount_handler, + IRQF_SHARED, "Perfcounter", oprofid); +} + +static void loongson2_exit(void) +{ + write_c0_perfctrl(0); + free_irq(LOONGSON2_PERFCNT_IRQ, oprofid); +} + +struct op_mips_model op_model_loongson2_ops = { + .reg_setup = loongson2_reg_setup, + .cpu_setup = loongson2_cpu_setup, + .init = loongson2_init, + .exit = loongson2_exit, + .cpu_start = loongson2_cpu_start, + .cpu_stop = loongson2_cpu_stop, + .cpu_type = LOONGSON2_CPU_TYPE, + .num_counters = 2 +}; -- cgit v1.2.3-70-g09d2 From 8e4971175acc910eb4258df82a6bd8f2c4e4e5b5 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Thu, 2 Jul 2009 23:26:08 +0800 Subject: MIPS: Loongson: Change naming methods To make source code of loongson sharable to the machines(such as gdium) made by the other companies, we rename arch/mips/lemote to arch/mips/loongson, asm/mach-lemote to asm/mach-loongson, and rename lm2e to the name of the machine: fuloong-2e. accordingly, FULONG are renamed to FULOONG2E to make it distinguishable to the future FULOONG2F. and also, some other relative tuning is needed. Signed-off-by: Wu Zhangjin Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 8 +- arch/mips/Makefile | 8 +- .../asm/mach-lemote/cpu-feature-overrides.h | 59 ------ arch/mips/include/asm/mach-lemote/dma-coherence.h | 68 ------- arch/mips/include/asm/mach-lemote/loongson.h | 56 ------ arch/mips/include/asm/mach-lemote/mc146818rtc.h | 36 ---- arch/mips/include/asm/mach-lemote/pci.h | 31 --- arch/mips/include/asm/mach-lemote/war.h | 25 --- .../asm/mach-loongson/cpu-feature-overrides.h | 59 ++++++ .../mips/include/asm/mach-loongson/dma-coherence.h | 68 +++++++ arch/mips/include/asm/mach-loongson/loongson.h | 53 +++++ arch/mips/include/asm/mach-loongson/mc146818rtc.h | 36 ++++ arch/mips/include/asm/mach-loongson/pci.h | 31 +++ arch/mips/include/asm/mach-loongson/war.h | 25 +++ arch/mips/include/asm/mips-boards/bonito64.h | 2 +- arch/mips/lemote/lm2e/Makefile | 13 -- arch/mips/lemote/lm2e/bonito-irq.c | 51 ----- arch/mips/lemote/lm2e/cmdline.c | 52 ----- arch/mips/lemote/lm2e/early_printk.c | 39 ---- arch/mips/lemote/lm2e/env.c | 58 ------ arch/mips/lemote/lm2e/init.c | 30 --- arch/mips/lemote/lm2e/irq.c | 111 ---------- arch/mips/lemote/lm2e/machtype.c | 15 -- arch/mips/lemote/lm2e/mem.c | 36 ---- arch/mips/lemote/lm2e/pci.c | 83 -------- arch/mips/lemote/lm2e/setup.c | 58 ------ arch/mips/lemote/lm2e/time.c | 27 --- arch/mips/loongson/fuloong-2e/Makefile | 13 ++ arch/mips/loongson/fuloong-2e/bonito-irq.c | 51 +++++ arch/mips/loongson/fuloong-2e/cmdline.c | 52 +++++ arch/mips/loongson/fuloong-2e/early_printk.c | 39 ++++ arch/mips/loongson/fuloong-2e/env.c | 58 ++++++ arch/mips/loongson/fuloong-2e/init.c | 30 +++ arch/mips/loongson/fuloong-2e/irq.c | 111 ++++++++++ arch/mips/loongson/fuloong-2e/machtype.c | 15 ++ arch/mips/loongson/fuloong-2e/mem.c | 36 ++++ arch/mips/loongson/fuloong-2e/pci.c | 83 ++++++++ arch/mips/loongson/fuloong-2e/reset.c | 44 ++++ arch/mips/loongson/fuloong-2e/time.c | 27 +++ arch/mips/pci/Makefile | 2 +- arch/mips/pci/fixup-fuloong2e.c | 224 +++++++++++++++++++++ arch/mips/pci/fixup-lm2e.c | 224 --------------------- arch/mips/pci/ops-bonito64.c | 4 +- 43 files changed, 1067 insertions(+), 1084 deletions(-) delete mode 100644 arch/mips/include/asm/mach-lemote/cpu-feature-overrides.h delete mode 100644 arch/mips/include/asm/mach-lemote/dma-coherence.h delete mode 100644 arch/mips/include/asm/mach-lemote/loongson.h delete mode 100644 arch/mips/include/asm/mach-lemote/mc146818rtc.h delete mode 100644 arch/mips/include/asm/mach-lemote/pci.h delete mode 100644 arch/mips/include/asm/mach-lemote/war.h create mode 100644 arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h create mode 100644 arch/mips/include/asm/mach-loongson/dma-coherence.h create mode 100644 arch/mips/include/asm/mach-loongson/loongson.h create mode 100644 arch/mips/include/asm/mach-loongson/mc146818rtc.h create mode 100644 arch/mips/include/asm/mach-loongson/pci.h create mode 100644 arch/mips/include/asm/mach-loongson/war.h delete mode 100644 arch/mips/lemote/lm2e/Makefile delete mode 100644 arch/mips/lemote/lm2e/bonito-irq.c delete mode 100644 arch/mips/lemote/lm2e/cmdline.c delete mode 100644 arch/mips/lemote/lm2e/early_printk.c delete mode 100644 arch/mips/lemote/lm2e/env.c delete mode 100644 arch/mips/lemote/lm2e/init.c delete mode 100644 arch/mips/lemote/lm2e/irq.c delete mode 100644 arch/mips/lemote/lm2e/machtype.c delete mode 100644 arch/mips/lemote/lm2e/mem.c delete mode 100644 arch/mips/lemote/lm2e/pci.c delete mode 100644 arch/mips/lemote/lm2e/setup.c delete mode 100644 arch/mips/lemote/lm2e/time.c create mode 100644 arch/mips/loongson/fuloong-2e/Makefile create mode 100644 arch/mips/loongson/fuloong-2e/bonito-irq.c create mode 100644 arch/mips/loongson/fuloong-2e/cmdline.c create mode 100644 arch/mips/loongson/fuloong-2e/early_printk.c create mode 100644 arch/mips/loongson/fuloong-2e/env.c create mode 100644 arch/mips/loongson/fuloong-2e/init.c create mode 100644 arch/mips/loongson/fuloong-2e/irq.c create mode 100644 arch/mips/loongson/fuloong-2e/machtype.c create mode 100644 arch/mips/loongson/fuloong-2e/mem.c create mode 100644 arch/mips/loongson/fuloong-2e/pci.c create mode 100644 arch/mips/loongson/fuloong-2e/reset.c create mode 100644 arch/mips/loongson/fuloong-2e/time.c create mode 100644 arch/mips/pci/fixup-fuloong2e.c delete mode 100644 arch/mips/pci/fixup-lm2e.c (limited to 'arch/mips/include/asm') diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index a383dac8101..3414e230182 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -6,7 +6,7 @@ config MIPS select HAVE_ARCH_KGDB # Horrible source of confusion. Die, die, die ... select EMBEDDED - select RTC_LIB if !LEMOTE_FULONG + select RTC_LIB if !LEMOTE_FULOONG2E mainmenu "Linux/MIPS Kernel Configuration" @@ -174,8 +174,8 @@ config LASAT select SYS_SUPPORTS_64BIT_KERNEL if BROKEN select SYS_SUPPORTS_LITTLE_ENDIAN -config LEMOTE_FULONG - bool "Lemote Fulong mini-PC" +config LEMOTE_FULOONG2E + bool "Lemote Fuloong2e mini-PC" select ARCH_SPARSEMEM_ENABLE select CEVT_R4K select CSRC_R4K @@ -196,7 +196,7 @@ config LEMOTE_FULONG select GENERIC_ISA_DMA_SUPPORT_BROKEN select CPU_HAS_WB help - Lemote Fulong mini-PC board based on the Chinese Loongson-2E CPU and + Lemote Fuloong2e mini-PC board based on the Chinese Loongson-2E CPU and an FPGA northbridge config MIPS_MALTA diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 861da514a46..7754cbbbf4e 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -314,11 +314,11 @@ cflags-$(CONFIG_WR_PPMC) += -I$(srctree)/arch/mips/include/asm/mach-wrppmc load-$(CONFIG_WR_PPMC) += 0xffffffff80100000 # -# lemote fulong mini-PC board +# lemote fuloong2e mini-PC board # -core-$(CONFIG_LEMOTE_FULONG) +=arch/mips/lemote/lm2e/ -load-$(CONFIG_LEMOTE_FULONG) +=0xffffffff80100000 -cflags-$(CONFIG_LEMOTE_FULONG) += -I$(srctree)/arch/mips/include/asm/mach-lemote +core-$(CONFIG_LEMOTE_FULOONG2E) +=arch/mips/loongson/fuloong-2e/ +load-$(CONFIG_LEMOTE_FULOONG2E) +=0xffffffff80100000 +cflags-$(CONFIG_LEMOTE_FULOONG2E) += -I$(srctree)/arch/mips/include/asm/mach-loongson/ # # MIPS Malta board diff --git a/arch/mips/include/asm/mach-lemote/cpu-feature-overrides.h b/arch/mips/include/asm/mach-lemote/cpu-feature-overrides.h deleted file mode 100644 index 550a10dc9db..00000000000 --- a/arch/mips/include/asm/mach-lemote/cpu-feature-overrides.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2009 Wu Zhangjin - * Copyright (C) 2009 Philippe Vachon - * Copyright (C) 2009 Zhang Le - * - * reference: /proc/cpuinfo, - * arch/mips/kernel/cpu-probe.c(cpu_probe_legacy), - * arch/mips/kernel/proc.c(show_cpuinfo), - * loongson2f user manual. - */ - -#ifndef __ASM_MACH_LEMOTE_CPU_FEATURE_OVERRIDES_H -#define __ASM_MACH_LEMOTE_CPU_FEATURE_OVERRIDES_H - -#define cpu_dcache_line_size() 32 -#define cpu_icache_line_size() 32 -#define cpu_scache_line_size() 32 - - -#define cpu_has_32fpr 1 -#define cpu_has_3k_cache 0 -#define cpu_has_4k_cache 1 -#define cpu_has_4kex 1 -#define cpu_has_64bits 1 -#define cpu_has_cache_cdex_p 0 -#define cpu_has_cache_cdex_s 0 -#define cpu_has_counter 1 -#define cpu_has_dc_aliases 1 -#define cpu_has_divec 0 -#define cpu_has_dsp 0 -#define cpu_has_ejtag 0 -#define cpu_has_fpu 1 -#define cpu_has_ic_fills_f_dc 0 -#define cpu_has_inclusive_pcaches 1 -#define cpu_has_llsc 1 -#define cpu_has_mcheck 0 -#define cpu_has_mdmx 0 -#define cpu_has_mips16 0 -#define cpu_has_mips32r1 0 -#define cpu_has_mips32r2 0 -#define cpu_has_mips3d 0 -#define cpu_has_mips64r1 0 -#define cpu_has_mips64r2 0 -#define cpu_has_mipsmt 0 -#define cpu_has_prefetch 0 -#define cpu_has_smartmips 0 -#define cpu_has_tlb 1 -#define cpu_has_tx39_cache 0 -#define cpu_has_userlocal 0 -#define cpu_has_vce 0 -#define cpu_has_vtag_icache 0 -#define cpu_has_watch 1 -#define cpu_icache_snoops_remote_store 1 - -#endif /* __ASM_MACH_LEMOTE_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-lemote/dma-coherence.h b/arch/mips/include/asm/mach-lemote/dma-coherence.h deleted file mode 100644 index c8de5e75077..00000000000 --- a/arch/mips/include/asm/mach-lemote/dma-coherence.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2006, 07 Ralf Baechle - * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - */ -#ifndef __ASM_MACH_LEMOTE_DMA_COHERENCE_H -#define __ASM_MACH_LEMOTE_DMA_COHERENCE_H - -struct device; - -static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr, - size_t size) -{ - return virt_to_phys(addr) | 0x80000000; -} - -static inline dma_addr_t plat_map_dma_mem_page(struct device *dev, - struct page *page) -{ - return page_to_phys(page) | 0x80000000; -} - -static inline unsigned long plat_dma_addr_to_phys(struct device *dev, - dma_addr_t dma_addr) -{ - return dma_addr & 0x7fffffff; -} - -static inline void plat_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction direction) -{ -} - -static inline int plat_dma_supported(struct device *dev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if (mask < DMA_BIT_MASK(24)) - return 0; - - return 1; -} - -static inline void plat_extra_sync_for_device(struct device *dev) -{ - return; -} - -static inline int plat_dma_mapping_error(struct device *dev, - dma_addr_t dma_addr) -{ - return 0; -} - -static inline int plat_device_is_coherent(struct device *dev) -{ - return 0; -} - -#endif /* __ASM_MACH_LEMOTE_DMA_COHERENCE_H */ diff --git a/arch/mips/include/asm/mach-lemote/loongson.h b/arch/mips/include/asm/mach-lemote/loongson.h deleted file mode 100644 index 95ee4c8f2d5..00000000000 --- a/arch/mips/include/asm/mach-lemote/loongson.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2009 Lemote, Inc. & Institute of Computing Technology - * Author: Wu Zhangjin - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ - -#ifndef __ASM_MACH_LOONGSON_LOONGSON_H -#define __ASM_MACH_LOONGSON_LOONGSON_H - -#include -#include - -/* there is an internal bonito64-compatiable northbridge in loongson2e/2f */ -#include - -/* loongson internal northbridge initialization */ -extern void bonito_irq_init(void); - -/* loongson-based machines specific reboot setup */ -extern void mips_reboot_setup(void); - -/* environment arguments from bootloader */ -extern unsigned long bus_clock, cpu_clock_freq; -extern unsigned long memsize, highmemsize; - -/* loongson-specific command line, env and memory initialization */ -extern void __init prom_init_memory(void); -extern void __init prom_init_cmdline(void); -extern void __init prom_init_env(void); - -/* PCI Configuration Registers */ -#define LOONGSON_PCI_ISR4C BONITO_PCI_REG(0x4c) - -/* PCI_Hit*_Sel_* */ - -#define LOONGSON_PCI_HIT0_SEL_L BONITO(BONITO_REGBASE + 0x50) -#define LOONGSON_PCI_HIT0_SEL_H BONITO(BONITO_REGBASE + 0x54) -#define LOONGSON_PCI_HIT1_SEL_L BONITO(BONITO_REGBASE + 0x58) -#define LOONGSON_PCI_HIT1_SEL_H BONITO(BONITO_REGBASE + 0x5c) -#define LOONGSON_PCI_HIT2_SEL_L BONITO(BONITO_REGBASE + 0x60) -#define LOONGSON_PCI_HIT2_SEL_H BONITO(BONITO_REGBASE + 0x64) - -/* PXArb Config & Status */ - -#define LOONGSON_PXARB_CFG BONITO(BONITO_REGBASE + 0x68) -#define LOONGSON_PXARB_STATUS BONITO(BONITO_REGBASE + 0x6c) - -/* loongson2-specific perf counter IRQ */ -#define LOONGSON2_PERFCNT_IRQ (MIPS_CPU_IRQ_BASE + 6) - -#endif /* __ASM_MACH_LOONGSON_LOONGSON_H */ diff --git a/arch/mips/include/asm/mach-lemote/mc146818rtc.h b/arch/mips/include/asm/mach-lemote/mc146818rtc.h deleted file mode 100644 index ed5147e1108..00000000000 --- a/arch/mips/include/asm/mach-lemote/mc146818rtc.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998, 2001, 03, 07 by Ralf Baechle (ralf@linux-mips.org) - * - * RTC routines for PC style attached Dallas chip. - */ -#ifndef __ASM_MACH_LEMOTE_MC146818RTC_H -#define __ASM_MACH_LEMOTE_MC146818RTC_H - -#include - -#define RTC_PORT(x) (0x70 + (x)) -#define RTC_IRQ 8 - -static inline unsigned char CMOS_READ(unsigned long addr) -{ - outb_p(addr, RTC_PORT(0)); - return inb_p(RTC_PORT(1)); -} - -static inline void CMOS_WRITE(unsigned char data, unsigned long addr) -{ - outb_p(addr, RTC_PORT(0)); - outb_p(data, RTC_PORT(1)); -} - -#define RTC_ALWAYS_BCD 0 - -#ifndef mc146818_decode_year -#define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970) -#endif - -#endif /* __ASM_MACH_LEMOTE_MC146818RTC_H */ diff --git a/arch/mips/include/asm/mach-lemote/pci.h b/arch/mips/include/asm/mach-lemote/pci.h deleted file mode 100644 index 3e6b1300afb..00000000000 --- a/arch/mips/include/asm/mach-lemote/pci.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2008 Zhang Le - * - * This program is free software; you can redistribute it - * and/or modify it under the terms of the GNU General - * Public License as published by the Free Software - * Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * This program is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA - * 02139, USA. - */ - -#ifndef __ASM_MACH_LEMOTE_PCI_H_ -#define __ASM_MACH_LEMOTE_PCI_H_ - -extern struct pci_ops bonito64_pci_ops; - -#define LOONGSON2E_PCI_MEM_START BONITO_PCILO1_BASE -#define LOONGSON2E_PCI_MEM_END (BONITO_PCILO1_BASE + 0x04000000 * 2) -#define LOONGSON2E_PCI_IO_START 0x00004000UL - -#endif /* !__ASM_MACH_LEMOTE_PCI_H_ */ diff --git a/arch/mips/include/asm/mach-lemote/war.h b/arch/mips/include/asm/mach-lemote/war.h deleted file mode 100644 index 05f89e0f2a1..00000000000 --- a/arch/mips/include/asm/mach-lemote/war.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2002, 2004, 2007 by Ralf Baechle - */ -#ifndef __ASM_MIPS_MACH_LEMOTE_WAR_H -#define __ASM_MIPS_MACH_LEMOTE_WAR_H - -#define R4600_V1_INDEX_ICACHEOP_WAR 0 -#define R4600_V1_HIT_CACHEOP_WAR 0 -#define R4600_V2_HIT_CACHEOP_WAR 0 -#define R5432_CP0_INTERRUPT_WAR 0 -#define BCM1250_M3_WAR 0 -#define SIBYTE_1956_WAR 0 -#define MIPS4K_ICACHE_REFILL_WAR 0 -#define MIPS_CACHE_SYNC_WAR 0 -#define TX49XX_ICACHE_INDEX_INV_WAR 0 -#define RM9000_CDEX_SMP_WAR 0 -#define ICACHE_REFILLS_WORKAROUND_WAR 0 -#define R10000_LLSC_WAR 0 -#define MIPS34K_MISSED_ITLB_WAR 0 - -#endif /* __ASM_MIPS_MACH_LEMOTE_WAR_H */ diff --git a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h new file mode 100644 index 00000000000..ce5b6e270e3 --- /dev/null +++ b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h @@ -0,0 +1,59 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2009 Wu Zhangjin + * Copyright (C) 2009 Philippe Vachon + * Copyright (C) 2009 Zhang Le + * + * reference: /proc/cpuinfo, + * arch/mips/kernel/cpu-probe.c(cpu_probe_legacy), + * arch/mips/kernel/proc.c(show_cpuinfo), + * loongson2f user manual. + */ + +#ifndef __ASM_MACH_LOONGSON_CPU_FEATURE_OVERRIDES_H +#define __ASM_MACH_LOONGSON_CPU_FEATURE_OVERRIDES_H + +#define cpu_dcache_line_size() 32 +#define cpu_icache_line_size() 32 +#define cpu_scache_line_size() 32 + + +#define cpu_has_32fpr 1 +#define cpu_has_3k_cache 0 +#define cpu_has_4k_cache 1 +#define cpu_has_4kex 1 +#define cpu_has_64bits 1 +#define cpu_has_cache_cdex_p 0 +#define cpu_has_cache_cdex_s 0 +#define cpu_has_counter 1 +#define cpu_has_dc_aliases 1 +#define cpu_has_divec 0 +#define cpu_has_dsp 0 +#define cpu_has_ejtag 0 +#define cpu_has_fpu 1 +#define cpu_has_ic_fills_f_dc 0 +#define cpu_has_inclusive_pcaches 1 +#define cpu_has_llsc 1 +#define cpu_has_mcheck 0 +#define cpu_has_mdmx 0 +#define cpu_has_mips16 0 +#define cpu_has_mips32r1 0 +#define cpu_has_mips32r2 0 +#define cpu_has_mips3d 0 +#define cpu_has_mips64r1 0 +#define cpu_has_mips64r2 0 +#define cpu_has_mipsmt 0 +#define cpu_has_prefetch 0 +#define cpu_has_smartmips 0 +#define cpu_has_tlb 1 +#define cpu_has_tx39_cache 0 +#define cpu_has_userlocal 0 +#define cpu_has_vce 0 +#define cpu_has_vtag_icache 0 +#define cpu_has_watch 1 +#define cpu_icache_snoops_remote_store 1 + +#endif /* __ASM_MACH_LOONGSON_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-loongson/dma-coherence.h b/arch/mips/include/asm/mach-loongson/dma-coherence.h new file mode 100644 index 00000000000..71a6851ba83 --- /dev/null +++ b/arch/mips/include/asm/mach-loongson/dma-coherence.h @@ -0,0 +1,68 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006, 07 Ralf Baechle + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + */ +#ifndef __ASM_MACH_LOONGSON_DMA_COHERENCE_H +#define __ASM_MACH_LOONGSON_DMA_COHERENCE_H + +struct device; + +static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr, + size_t size) +{ + return virt_to_phys(addr) | 0x80000000; +} + +static inline dma_addr_t plat_map_dma_mem_page(struct device *dev, + struct page *page) +{ + return page_to_phys(page) | 0x80000000; +} + +static inline unsigned long plat_dma_addr_to_phys(struct device *dev, + dma_addr_t dma_addr) +{ + return dma_addr & 0x7fffffff; +} + +static inline void plat_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction) +{ +} + +static inline int plat_dma_supported(struct device *dev, u64 mask) +{ + /* + * we fall back to GFP_DMA when the mask isn't all 1s, + * so we can't guarantee allocations that must be + * within a tighter range than GFP_DMA.. + */ + if (mask < DMA_BIT_MASK(24)) + return 0; + + return 1; +} + +static inline void plat_extra_sync_for_device(struct device *dev) +{ + return; +} + +static inline int plat_dma_mapping_error(struct device *dev, + dma_addr_t dma_addr) +{ + return 0; +} + +static inline int plat_device_is_coherent(struct device *dev) +{ + return 0; +} + +#endif /* __ASM_MACH_LOONGSON_DMA_COHERENCE_H */ diff --git a/arch/mips/include/asm/mach-loongson/loongson.h b/arch/mips/include/asm/mach-loongson/loongson.h new file mode 100644 index 00000000000..e9f74dee24e --- /dev/null +++ b/arch/mips/include/asm/mach-loongson/loongson.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2009 Lemote, Inc. & Institute of Computing Technology + * Author: Wu Zhangjin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __ASM_MACH_LOONGSON_LOONGSON_H +#define __ASM_MACH_LOONGSON_LOONGSON_H + +#include +#include + +/* there is an internal bonito64-compatiable northbridge in loongson2e/2f */ +#include + +/* loongson internal northbridge initialization */ +extern void bonito_irq_init(void); + +/* environment arguments from bootloader */ +extern unsigned long bus_clock, cpu_clock_freq; +extern unsigned long memsize, highmemsize; + +/* loongson-specific command line, env and memory initialization */ +extern void __init prom_init_memory(void); +extern void __init prom_init_cmdline(void); +extern void __init prom_init_env(void); + +/* PCI Configuration Registers */ +#define LOONGSON_PCI_ISR4C BONITO_PCI_REG(0x4c) + +/* PCI_Hit*_Sel_* */ + +#define LOONGSON_PCI_HIT0_SEL_L BONITO(BONITO_REGBASE + 0x50) +#define LOONGSON_PCI_HIT0_SEL_H BONITO(BONITO_REGBASE + 0x54) +#define LOONGSON_PCI_HIT1_SEL_L BONITO(BONITO_REGBASE + 0x58) +#define LOONGSON_PCI_HIT1_SEL_H BONITO(BONITO_REGBASE + 0x5c) +#define LOONGSON_PCI_HIT2_SEL_L BONITO(BONITO_REGBASE + 0x60) +#define LOONGSON_PCI_HIT2_SEL_H BONITO(BONITO_REGBASE + 0x64) + +/* PXArb Config & Status */ + +#define LOONGSON_PXARB_CFG BONITO(BONITO_REGBASE + 0x68) +#define LOONGSON_PXARB_STATUS BONITO(BONITO_REGBASE + 0x6c) + +/* loongson2-specific perf counter IRQ */ +#define LOONGSON2_PERFCNT_IRQ (MIPS_CPU_IRQ_BASE + 6) + +#endif /* __ASM_MACH_LOONGSON_LOONGSON_H */ diff --git a/arch/mips/include/asm/mach-loongson/mc146818rtc.h b/arch/mips/include/asm/mach-loongson/mc146818rtc.h new file mode 100644 index 00000000000..ed7fe978335 --- /dev/null +++ b/arch/mips/include/asm/mach-loongson/mc146818rtc.h @@ -0,0 +1,36 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998, 2001, 03, 07 by Ralf Baechle (ralf@linux-mips.org) + * + * RTC routines for PC style attached Dallas chip. + */ +#ifndef __ASM_MACH_LOONGSON_MC146818RTC_H +#define __ASM_MACH_LOONGSON_MC146818RTC_H + +#include + +#define RTC_PORT(x) (0x70 + (x)) +#define RTC_IRQ 8 + +static inline unsigned char CMOS_READ(unsigned long addr) +{ + outb_p(addr, RTC_PORT(0)); + return inb_p(RTC_PORT(1)); +} + +static inline void CMOS_WRITE(unsigned char data, unsigned long addr) +{ + outb_p(addr, RTC_PORT(0)); + outb_p(data, RTC_PORT(1)); +} + +#define RTC_ALWAYS_BCD 0 + +#ifndef mc146818_decode_year +#define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970) +#endif + +#endif /* __ASM_MACH_LOONGSON_MC146818RTC_H */ diff --git a/arch/mips/include/asm/mach-loongson/pci.h b/arch/mips/include/asm/mach-loongson/pci.h new file mode 100644 index 00000000000..e229b2904cc --- /dev/null +++ b/arch/mips/include/asm/mach-loongson/pci.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2008 Zhang Le + * + * This program is free software; you can redistribute it + * and/or modify it under the terms of the GNU General + * Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA + * 02139, USA. + */ + +#ifndef __ASM_MACH_LOONGSON_PCI_H_ +#define __ASM_MACH_LOONGSON_PCI_H_ + +extern struct pci_ops bonito64_pci_ops; + +#define LOONGSON2E_PCI_MEM_START BONITO_PCILO1_BASE +#define LOONGSON2E_PCI_MEM_END (BONITO_PCILO1_BASE + 0x04000000 * 2) +#define LOONGSON2E_PCI_IO_START 0x00004000UL + +#endif /* !__ASM_MACH_LOONGSON_PCI_H_ */ diff --git a/arch/mips/include/asm/mach-loongson/war.h b/arch/mips/include/asm/mach-loongson/war.h new file mode 100644 index 00000000000..4b971c3ffd8 --- /dev/null +++ b/arch/mips/include/asm/mach-loongson/war.h @@ -0,0 +1,25 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2002, 2004, 2007 by Ralf Baechle + */ +#ifndef __ASM_MACH_LOONGSON_WAR_H +#define __ASM_MACH_LOONGSON_WAR_H + +#define R4600_V1_INDEX_ICACHEOP_WAR 0 +#define R4600_V1_HIT_CACHEOP_WAR 0 +#define R4600_V2_HIT_CACHEOP_WAR 0 +#define R5432_CP0_INTERRUPT_WAR 0 +#define BCM1250_M3_WAR 0 +#define SIBYTE_1956_WAR 0 +#define MIPS4K_ICACHE_REFILL_WAR 0 +#define MIPS_CACHE_SYNC_WAR 0 +#define TX49XX_ICACHE_INDEX_INV_WAR 0 +#define RM9000_CDEX_SMP_WAR 0 +#define ICACHE_REFILLS_WORKAROUND_WAR 0 +#define R10000_LLSC_WAR 0 +#define MIPS34K_MISSED_ITLB_WAR 0 + +#endif /* __ASM_MACH_LEMOTE_WAR_H */ diff --git a/arch/mips/include/asm/mips-boards/bonito64.h b/arch/mips/include/asm/mips-boards/bonito64.h index a0f04bb99c9..a576ce044c3 100644 --- a/arch/mips/include/asm/mips-boards/bonito64.h +++ b/arch/mips/include/asm/mips-boards/bonito64.h @@ -26,7 +26,7 @@ /* offsets from base register */ #define BONITO(x) (x) -#elif defined(CONFIG_LEMOTE_FULONG) +#elif defined(CONFIG_LEMOTE_FULOONG2E) #define BONITO(x) (*(volatile u32 *)((char *)CKSEG1ADDR(BONITO_REG_BASE) + (x))) #define BONITO_IRQ_BASE 32 diff --git a/arch/mips/lemote/lm2e/Makefile b/arch/mips/lemote/lm2e/Makefile deleted file mode 100644 index a5bc1efc362..00000000000 --- a/arch/mips/lemote/lm2e/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# -# Makefile for Lemote Fulong mini-PC board. -# - -obj-y += setup.o init.o reset.o irq.o pci.o bonito-irq.o mem.o \ - env.o cmdline.o time.o machtype.o - -# -# Early printk support -# -obj-$(CONFIG_EARLY_PRINTK) += early_printk.o - -EXTRA_CFLAGS += -Werror diff --git a/arch/mips/lemote/lm2e/bonito-irq.c b/arch/mips/lemote/lm2e/bonito-irq.c deleted file mode 100644 index 3e31e7ad713..00000000000 --- a/arch/mips/lemote/lm2e/bonito-irq.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2001 MontaVista Software Inc. - * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net - * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org) - * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include - -#include - -static inline void bonito_irq_enable(unsigned int irq) -{ - BONITO_INTENSET = (1 << (irq - BONITO_IRQ_BASE)); - mmiowb(); -} - -static inline void bonito_irq_disable(unsigned int irq) -{ - BONITO_INTENCLR = (1 << (irq - BONITO_IRQ_BASE)); - mmiowb(); -} - -static struct irq_chip bonito_irq_type = { - .name = "bonito_irq", - .ack = bonito_irq_disable, - .mask = bonito_irq_disable, - .mask_ack = bonito_irq_disable, - .unmask = bonito_irq_enable, -}; - -static struct irqaction dma_timeout_irqaction = { - .handler = no_action, - .name = "dma_timeout", -}; - -void bonito_irq_init(void) -{ - u32 i; - - for (i = BONITO_IRQ_BASE; i < BONITO_IRQ_BASE + 32; i++) - set_irq_chip_and_handler(i, &bonito_irq_type, handle_level_irq); - - setup_irq(BONITO_IRQ_BASE + 10, &dma_timeout_irqaction); -} diff --git a/arch/mips/lemote/lm2e/cmdline.c b/arch/mips/lemote/lm2e/cmdline.c deleted file mode 100644 index 75f1b243ee4..00000000000 --- a/arch/mips/lemote/lm2e/cmdline.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Based on Ocelot Linux port, which is - * Copyright 2001 MontaVista Software Inc. - * Author: jsun@mvista.com or jsun@junsun.net - * - * Copyright 2003 ICT CAS - * Author: Michael Guo - * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include - -#include - -int prom_argc; -/* pmon passes arguments in 32bit pointers */ -int *_prom_argv; - -void __init prom_init_cmdline(void) -{ - int i; - long l; - - /* firmware arguments are initialized in head.S */ - prom_argc = fw_arg0; - _prom_argv = (int *)fw_arg1; - - /* arg[0] is "g", the rest is boot parameters */ - arcs_cmdline[0] = '\0'; - for (i = 1; i < prom_argc; i++) { - l = (long)_prom_argv[i]; - if (strlen(arcs_cmdline) + strlen(((char *)l) + 1) - >= sizeof(arcs_cmdline)) - break; - strcat(arcs_cmdline, ((char *)l)); - strcat(arcs_cmdline, " "); - } - - if ((strstr(arcs_cmdline, "console=")) == NULL) - strcat(arcs_cmdline, " console=ttyS0,115200"); - if ((strstr(arcs_cmdline, "root=")) == NULL) - strcat(arcs_cmdline, " root=/dev/hda1"); -} diff --git a/arch/mips/lemote/lm2e/early_printk.c b/arch/mips/lemote/lm2e/early_printk.c deleted file mode 100644 index 3e0a6eaa404..00000000000 --- a/arch/mips/lemote/lm2e/early_printk.c +++ /dev/null @@ -1,39 +0,0 @@ -/* early printk support - * - * Copyright (c) 2009 Philippe Vachon - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include - -#include - -#define UART_BASE (BONITO_PCIIO_BASE + 0x3f8) - -#define PORT(base, offset) (u8 *)(base + offset) - -static inline unsigned int serial_in(phys_addr_t base, int offset) -{ - return readb(PORT(base, offset)); -} - -static inline void serial_out(phys_addr_t base, int offset, int value) -{ - writeb(value, PORT(base, offset)); -} - -void prom_putchar(char c) -{ - phys_addr_t uart_base = - (phys_addr_t) ioremap_nocache(UART_BASE, 8); - - while ((serial_in(uart_base, UART_LSR) & UART_LSR_THRE) == 0) - ; - - serial_out(uart_base, UART_TX, c); -} diff --git a/arch/mips/lemote/lm2e/env.c b/arch/mips/lemote/lm2e/env.c deleted file mode 100644 index b9ef5038554..00000000000 --- a/arch/mips/lemote/lm2e/env.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Based on Ocelot Linux port, which is - * Copyright 2001 MontaVista Software Inc. - * Author: jsun@mvista.com or jsun@junsun.net - * - * Copyright 2003 ICT CAS - * Author: Michael Guo - * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include - -#include - -unsigned long bus_clock, cpu_clock_freq; -unsigned long memsize, highmemsize; - -/* pmon passes arguments in 32bit pointers */ -int *_prom_envp; - -#define parse_even_earlier(res, option, p) \ -do { \ - if (strncmp(option, (char *)p, strlen(option)) == 0) \ - strict_strtol((char *)p + strlen(option"="), \ - 10, &res); \ -} while (0) - -void __init prom_init_env(void) -{ - long l; - - /* firmware arguments are initialized in head.S */ - _prom_envp = (int *)fw_arg2; - - l = (long)*_prom_envp; - while (l != 0) { - parse_even_earlier(bus_clock, "busclock", l); - parse_even_earlier(cpu_clock_freq, "cpuclock", l); - parse_even_earlier(memsize, "memsize", l); - parse_even_earlier(highmemsize, "highmemsize", l); - _prom_envp++; - l = (long)*_prom_envp; - } - if (memsize == 0) - memsize = 256; - - pr_info("busclock=%ld, cpuclock=%ld, memsize=%ld, highmemsize=%ld\n", - bus_clock, cpu_clock_freq, memsize, highmemsize); -} diff --git a/arch/mips/lemote/lm2e/init.c b/arch/mips/lemote/lm2e/init.c deleted file mode 100644 index 3abe927422a..00000000000 --- a/arch/mips/lemote/lm2e/init.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include - -#include - -#include - -void __init prom_init(void) -{ - /* init base address of io space */ - set_io_port_base((unsigned long) - ioremap(BONITO_PCIIO_BASE, BONITO_PCIIO_SIZE)); - - prom_init_cmdline(); - prom_init_env(); - prom_init_memory(); -} - -void __init prom_free_prom_memory(void) -{ -} diff --git a/arch/mips/lemote/lm2e/irq.c b/arch/mips/lemote/lm2e/irq.c deleted file mode 100644 index 9585f5aa7cc..00000000000 --- a/arch/mips/lemote/lm2e/irq.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include -#include - -#include -#include - -#include -/* - * the first level int-handler will jump here if it is a bonito irq - */ -static void bonito_irqdispatch(void) -{ - u32 int_status; - int i; - - /* workaround the IO dma problem: let cpu looping to allow DMA finish */ - int_status = BONITO_INTISR; - if (int_status & (1 << 10)) { - while (int_status & (1 << 10)) { - udelay(1); - int_status = BONITO_INTISR; - } - } - - /* Get pending sources, masked by current enables */ - int_status = BONITO_INTISR & BONITO_INTEN; - - if (int_status != 0) { - i = __ffs(int_status); - int_status &= ~(1 << i); - do_IRQ(BONITO_IRQ_BASE + i); - } -} - -static void i8259_irqdispatch(void) -{ - int irq; - - irq = i8259_irq(); - if (irq >= 0) - do_IRQ(irq); - else - spurious_interrupt(); -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; - - if (pending & CAUSEF_IP7) - do_IRQ(MIPS_CPU_IRQ_BASE + 7); - else if (pending & CAUSEF_IP6) /* perf counter loverflow */ - do_IRQ(LOONGSON2_PERFCNT_IRQ); - else if (pending & CAUSEF_IP5) - i8259_irqdispatch(); - else if (pending & CAUSEF_IP2) - bonito_irqdispatch(); - else - spurious_interrupt(); -} - -static struct irqaction cascade_irqaction = { - .handler = no_action, - .name = "cascade", -}; - -void __init arch_init_irq(void) -{ - /* - * Clear all of the interrupts while we change the able around a bit. - * int-handler is not on bootstrap - */ - clear_c0_status(ST0_IM | ST0_BEV); - local_irq_disable(); - - /* most bonito irq should be level triggered */ - BONITO_INTEDGE = BONITO_ICU_SYSTEMERR | BONITO_ICU_MASTERERR | - BONITO_ICU_RETRYERR | BONITO_ICU_MBOXES; - BONITO_INTSTEER = 0; - - /* - * Mask out all interrupt by writing "1" to all bit position in - * the interrupt reset reg. - */ - BONITO_INTENCLR = ~0; - - /* init all controller - * 0-15 ------> i8259 interrupt - * 16-23 ------> mips cpu interrupt - * 32-63 ------> bonito irq - */ - - /* Sets the first-level interrupt dispatcher. */ - mips_cpu_irq_init(); - init_i8259_irqs(); - bonito_irq_init(); - - /* bonito irq at IP2 */ - setup_irq(MIPS_CPU_IRQ_BASE + 2, &cascade_irqaction); - /* 8259 irq at IP5 */ - setup_irq(MIPS_CPU_IRQ_BASE + 5, &cascade_irqaction); -} diff --git a/arch/mips/lemote/lm2e/machtype.c b/arch/mips/lemote/lm2e/machtype.c deleted file mode 100644 index 8d803eea787..00000000000 --- a/arch/mips/lemote/lm2e/machtype.c +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -const char *get_system_type(void) -{ - return "lemote-fulong"; -} - diff --git a/arch/mips/lemote/lm2e/mem.c b/arch/mips/lemote/lm2e/mem.c deleted file mode 100644 index 6a7feb178fa..00000000000 --- a/arch/mips/lemote/lm2e/mem.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include -#include -#include - -#include - -#include - -void __init prom_init_memory(void) -{ - add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM); -#ifdef CONFIG_64BIT - if (highmemsize > 0) - add_memory_region(0x20000000, highmemsize << 20, BOOT_MEM_RAM); -#endif /* CONFIG_64BIT */ -} - -/* override of arch/mips/mm/cache.c: __uncached_access */ -int __uncached_access(struct file *file, unsigned long addr) -{ - if (file->f_flags & O_SYNC) - return 1; - - /* - * On the Lemote Loongson 2e system, the peripheral registers - * reside between 0x1000:0000 and 0x2000:0000. - */ - return addr >= __pa(high_memory) || - ((addr >= 0x10000000) && (addr < 0x20000000)); -} diff --git a/arch/mips/lemote/lm2e/pci.c b/arch/mips/lemote/lm2e/pci.c deleted file mode 100644 index 9812c30cc6e..00000000000 --- a/arch/mips/lemote/lm2e/pci.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include - -#include -#include - -static struct resource loongson2e_pci_mem_resource = { - .name = "LOONGSON2E PCI MEM", - .start = LOONGSON2E_PCI_MEM_START, - .end = LOONGSON2E_PCI_MEM_END, - .flags = IORESOURCE_MEM, -}; - -static struct resource loongson2e_pci_io_resource = { - .name = "LOONGSON2E PCI IO MEM", - .start = LOONGSON2E_PCI_IO_START, - .end = IO_SPACE_LIMIT, - .flags = IORESOURCE_IO, -}; - -static struct pci_controller loongson2e_pci_controller = { - .pci_ops = &bonito64_pci_ops, - .io_resource = &loongson2e_pci_io_resource, - .mem_resource = &loongson2e_pci_mem_resource, - .mem_offset = 0x00000000UL, - .io_offset = 0x00000000UL, -}; - -static void __init setup_pcimap(void) -{ - /* - * local to PCI mapping for CPU accessing PCI space - * CPU address space [256M,448M] is window for accessing pci space - * we set pcimap_lo[0,1,2] to map it to pci space[0M,64M], [320M,448M] - * - * pcimap: PCI_MAP2 PCI_Mem_Lo2 PCI_Mem_Lo1 PCI_Mem_Lo0 - * [<2G] [384M,448M] [320M,384M] [0M,64M] - */ - BONITO_PCIMAP = BONITO_PCIMAP_PCIMAP_2 | - BONITO_PCIMAP_WIN(2, BONITO_PCILO2_BASE) | - BONITO_PCIMAP_WIN(1, BONITO_PCILO1_BASE) | - BONITO_PCIMAP_WIN(0, 0); - - /* - * PCI-DMA to local mapping: [2G,2G+256M] -> [0M,256M] - */ - BONITO_PCIBASE0 = 0x80000000ul; /* base: 2G -> mmap: 0M */ - /* size: 256M, burst transmission, pre-fetch enable, 64bit */ - LOONGSON_PCI_HIT0_SEL_L = 0xc000000cul; - LOONGSON_PCI_HIT0_SEL_H = 0xfffffffful; - LOONGSON_PCI_HIT1_SEL_L = 0x00000006ul; /* set this BAR as invalid */ - LOONGSON_PCI_HIT1_SEL_H = 0x00000000ul; - LOONGSON_PCI_HIT2_SEL_L = 0x00000006ul; /* set this BAR as invalid */ - LOONGSON_PCI_HIT2_SEL_H = 0x00000000ul; - - /* avoid deadlock of PCI reading/writing lock operation */ - LOONGSON_PCI_ISR4C = 0xd2000001ul; - - /* can not change gnt to break pci transfer when device's gnt not - deassert for some broken device */ - LOONGSON_PXARB_CFG = 0x00fe0105ul; -} - -static int __init pcibios_init(void) -{ - setup_pcimap(); - - loongson2e_pci_controller.io_map_base = mips_io_port_base; - - register_pci_controller(&loongson2e_pci_controller); - - return 0; -} - -arch_initcall(pcibios_init); diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c deleted file mode 100644 index 66390215973..00000000000 --- a/arch/mips/lemote/lm2e/setup.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include - -#include - -#include - -#ifdef CONFIG_VT -#include -#include -#endif - -void (*__wbflush)(void); -EXPORT_SYMBOL(__wbflush); - -static void wbflush_loongson2e(void) -{ - asm(".set\tpush\n\t" - ".set\tnoreorder\n\t" - ".set mips3\n\t" - "sync\n\t" - "nop\n\t" - ".set\tpop\n\t" - ".set mips0\n\t"); -} - -void __init plat_mem_setup(void) -{ - __wbflush = wbflush_loongson2e; - -#ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; - - screen_info = (struct screen_info) { - 0, 25, /* orig-x, orig-y */ - 0, /* unused */ - 0, /* orig-video-page */ - 0, /* orig-video-mode */ - 80, /* orig-video-cols */ - 0, 0, 0, /* ega_ax, ega_bx, ega_cx */ - 25, /* orig-video-lines */ - VIDEO_TYPE_VGAC, /* orig-video-isVGA */ - 16 /* orig-video-points */ - }; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif -#endif -} diff --git a/arch/mips/lemote/lm2e/time.c b/arch/mips/lemote/lm2e/time.c deleted file mode 100644 index b13d1717465..00000000000 --- a/arch/mips/lemote/lm2e/time.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include -#include - -#include - -void __init plat_time_init(void) -{ - /* setup mips r4k timer */ - mips_hpt_frequency = cpu_clock_freq / 2; -} - -unsigned long read_persistent_clock(void) -{ - return mc146818_get_cmos_time(); -} diff --git a/arch/mips/loongson/fuloong-2e/Makefile b/arch/mips/loongson/fuloong-2e/Makefile new file mode 100644 index 00000000000..feb1d6bba49 --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/Makefile @@ -0,0 +1,13 @@ +# +# Makefile for Lemote Fuloong2e mini-PC board. +# + +obj-y += setup.o init.o reset.o irq.o pci.o bonito-irq.o mem.o \ + env.o cmdline.o time.o machtype.o + +# +# Early printk support +# +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o + +EXTRA_CFLAGS += -Werror diff --git a/arch/mips/loongson/fuloong-2e/bonito-irq.c b/arch/mips/loongson/fuloong-2e/bonito-irq.c new file mode 100644 index 00000000000..3e31e7ad713 --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/bonito-irq.c @@ -0,0 +1,51 @@ +/* + * Copyright 2001 MontaVista Software Inc. + * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net + * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org) + * + * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +#include + +static inline void bonito_irq_enable(unsigned int irq) +{ + BONITO_INTENSET = (1 << (irq - BONITO_IRQ_BASE)); + mmiowb(); +} + +static inline void bonito_irq_disable(unsigned int irq) +{ + BONITO_INTENCLR = (1 << (irq - BONITO_IRQ_BASE)); + mmiowb(); +} + +static struct irq_chip bonito_irq_type = { + .name = "bonito_irq", + .ack = bonito_irq_disable, + .mask = bonito_irq_disable, + .mask_ack = bonito_irq_disable, + .unmask = bonito_irq_enable, +}; + +static struct irqaction dma_timeout_irqaction = { + .handler = no_action, + .name = "dma_timeout", +}; + +void bonito_irq_init(void) +{ + u32 i; + + for (i = BONITO_IRQ_BASE; i < BONITO_IRQ_BASE + 32; i++) + set_irq_chip_and_handler(i, &bonito_irq_type, handle_level_irq); + + setup_irq(BONITO_IRQ_BASE + 10, &dma_timeout_irqaction); +} diff --git a/arch/mips/loongson/fuloong-2e/cmdline.c b/arch/mips/loongson/fuloong-2e/cmdline.c new file mode 100644 index 00000000000..75f1b243ee4 --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/cmdline.c @@ -0,0 +1,52 @@ +/* + * Based on Ocelot Linux port, which is + * Copyright 2001 MontaVista Software Inc. + * Author: jsun@mvista.com or jsun@junsun.net + * + * Copyright 2003 ICT CAS + * Author: Michael Guo + * + * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +#include + +int prom_argc; +/* pmon passes arguments in 32bit pointers */ +int *_prom_argv; + +void __init prom_init_cmdline(void) +{ + int i; + long l; + + /* firmware arguments are initialized in head.S */ + prom_argc = fw_arg0; + _prom_argv = (int *)fw_arg1; + + /* arg[0] is "g", the rest is boot parameters */ + arcs_cmdline[0] = '\0'; + for (i = 1; i < prom_argc; i++) { + l = (long)_prom_argv[i]; + if (strlen(arcs_cmdline) + strlen(((char *)l) + 1) + >= sizeof(arcs_cmdline)) + break; + strcat(arcs_cmdline, ((char *)l)); + strcat(arcs_cmdline, " "); + } + + if ((strstr(arcs_cmdline, "console=")) == NULL) + strcat(arcs_cmdline, " console=ttyS0,115200"); + if ((strstr(arcs_cmdline, "root=")) == NULL) + strcat(arcs_cmdline, " root=/dev/hda1"); +} diff --git a/arch/mips/loongson/fuloong-2e/early_printk.c b/arch/mips/loongson/fuloong-2e/early_printk.c new file mode 100644 index 00000000000..3e0a6eaa404 --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/early_printk.c @@ -0,0 +1,39 @@ +/* early printk support + * + * Copyright (c) 2009 Philippe Vachon + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +#include + +#define UART_BASE (BONITO_PCIIO_BASE + 0x3f8) + +#define PORT(base, offset) (u8 *)(base + offset) + +static inline unsigned int serial_in(phys_addr_t base, int offset) +{ + return readb(PORT(base, offset)); +} + +static inline void serial_out(phys_addr_t base, int offset, int value) +{ + writeb(value, PORT(base, offset)); +} + +void prom_putchar(char c) +{ + phys_addr_t uart_base = + (phys_addr_t) ioremap_nocache(UART_BASE, 8); + + while ((serial_in(uart_base, UART_LSR) & UART_LSR_THRE) == 0) + ; + + serial_out(uart_base, UART_TX, c); +} diff --git a/arch/mips/loongson/fuloong-2e/env.c b/arch/mips/loongson/fuloong-2e/env.c new file mode 100644 index 00000000000..b9ef5038554 --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/env.c @@ -0,0 +1,58 @@ +/* + * Based on Ocelot Linux port, which is + * Copyright 2001 MontaVista Software Inc. + * Author: jsun@mvista.com or jsun@junsun.net + * + * Copyright 2003 ICT CAS + * Author: Michael Guo + * + * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +#include + +unsigned long bus_clock, cpu_clock_freq; +unsigned long memsize, highmemsize; + +/* pmon passes arguments in 32bit pointers */ +int *_prom_envp; + +#define parse_even_earlier(res, option, p) \ +do { \ + if (strncmp(option, (char *)p, strlen(option)) == 0) \ + strict_strtol((char *)p + strlen(option"="), \ + 10, &res); \ +} while (0) + +void __init prom_init_env(void) +{ + long l; + + /* firmware arguments are initialized in head.S */ + _prom_envp = (int *)fw_arg2; + + l = (long)*_prom_envp; + while (l != 0) { + parse_even_earlier(bus_clock, "busclock", l); + parse_even_earlier(cpu_clock_freq, "cpuclock", l); + parse_even_earlier(memsize, "memsize", l); + parse_even_earlier(highmemsize, "highmemsize", l); + _prom_envp++; + l = (long)*_prom_envp; + } + if (memsize == 0) + memsize = 256; + + pr_info("busclock=%ld, cpuclock=%ld, memsize=%ld, highmemsize=%ld\n", + bus_clock, cpu_clock_freq, memsize, highmemsize); +} diff --git a/arch/mips/loongson/fuloong-2e/init.c b/arch/mips/loongson/fuloong-2e/init.c new file mode 100644 index 00000000000..3abe927422a --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/init.c @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include + +#include + +#include + +void __init prom_init(void) +{ + /* init base address of io space */ + set_io_port_base((unsigned long) + ioremap(BONITO_PCIIO_BASE, BONITO_PCIIO_SIZE)); + + prom_init_cmdline(); + prom_init_env(); + prom_init_memory(); +} + +void __init prom_free_prom_memory(void) +{ +} diff --git a/arch/mips/loongson/fuloong-2e/irq.c b/arch/mips/loongson/fuloong-2e/irq.c new file mode 100644 index 00000000000..9585f5aa7cc --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/irq.c @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include + +#include +#include + +#include +/* + * the first level int-handler will jump here if it is a bonito irq + */ +static void bonito_irqdispatch(void) +{ + u32 int_status; + int i; + + /* workaround the IO dma problem: let cpu looping to allow DMA finish */ + int_status = BONITO_INTISR; + if (int_status & (1 << 10)) { + while (int_status & (1 << 10)) { + udelay(1); + int_status = BONITO_INTISR; + } + } + + /* Get pending sources, masked by current enables */ + int_status = BONITO_INTISR & BONITO_INTEN; + + if (int_status != 0) { + i = __ffs(int_status); + int_status &= ~(1 << i); + do_IRQ(BONITO_IRQ_BASE + i); + } +} + +static void i8259_irqdispatch(void) +{ + int irq; + + irq = i8259_irq(); + if (irq >= 0) + do_IRQ(irq); + else + spurious_interrupt(); +} + +asmlinkage void plat_irq_dispatch(void) +{ + unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; + + if (pending & CAUSEF_IP7) + do_IRQ(MIPS_CPU_IRQ_BASE + 7); + else if (pending & CAUSEF_IP6) /* perf counter loverflow */ + do_IRQ(LOONGSON2_PERFCNT_IRQ); + else if (pending & CAUSEF_IP5) + i8259_irqdispatch(); + else if (pending & CAUSEF_IP2) + bonito_irqdispatch(); + else + spurious_interrupt(); +} + +static struct irqaction cascade_irqaction = { + .handler = no_action, + .name = "cascade", +}; + +void __init arch_init_irq(void) +{ + /* + * Clear all of the interrupts while we change the able around a bit. + * int-handler is not on bootstrap + */ + clear_c0_status(ST0_IM | ST0_BEV); + local_irq_disable(); + + /* most bonito irq should be level triggered */ + BONITO_INTEDGE = BONITO_ICU_SYSTEMERR | BONITO_ICU_MASTERERR | + BONITO_ICU_RETRYERR | BONITO_ICU_MBOXES; + BONITO_INTSTEER = 0; + + /* + * Mask out all interrupt by writing "1" to all bit position in + * the interrupt reset reg. + */ + BONITO_INTENCLR = ~0; + + /* init all controller + * 0-15 ------> i8259 interrupt + * 16-23 ------> mips cpu interrupt + * 32-63 ------> bonito irq + */ + + /* Sets the first-level interrupt dispatcher. */ + mips_cpu_irq_init(); + init_i8259_irqs(); + bonito_irq_init(); + + /* bonito irq at IP2 */ + setup_irq(MIPS_CPU_IRQ_BASE + 2, &cascade_irqaction); + /* 8259 irq at IP5 */ + setup_irq(MIPS_CPU_IRQ_BASE + 5, &cascade_irqaction); +} diff --git a/arch/mips/loongson/fuloong-2e/machtype.c b/arch/mips/loongson/fuloong-2e/machtype.c new file mode 100644 index 00000000000..e03aa0de617 --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/machtype.c @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +const char *get_system_type(void) +{ + return "lemote-fuloong-2e-box"; +} + diff --git a/arch/mips/loongson/fuloong-2e/mem.c b/arch/mips/loongson/fuloong-2e/mem.c new file mode 100644 index 00000000000..6a7feb178fa --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/mem.c @@ -0,0 +1,36 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include + +#include + +#include + +void __init prom_init_memory(void) +{ + add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM); +#ifdef CONFIG_64BIT + if (highmemsize > 0) + add_memory_region(0x20000000, highmemsize << 20, BOOT_MEM_RAM); +#endif /* CONFIG_64BIT */ +} + +/* override of arch/mips/mm/cache.c: __uncached_access */ +int __uncached_access(struct file *file, unsigned long addr) +{ + if (file->f_flags & O_SYNC) + return 1; + + /* + * On the Lemote Loongson 2e system, the peripheral registers + * reside between 0x1000:0000 and 0x2000:0000. + */ + return addr >= __pa(high_memory) || + ((addr >= 0x10000000) && (addr < 0x20000000)); +} diff --git a/arch/mips/loongson/fuloong-2e/pci.c b/arch/mips/loongson/fuloong-2e/pci.c new file mode 100644 index 00000000000..9812c30cc6e --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/pci.c @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +#include +#include + +static struct resource loongson2e_pci_mem_resource = { + .name = "LOONGSON2E PCI MEM", + .start = LOONGSON2E_PCI_MEM_START, + .end = LOONGSON2E_PCI_MEM_END, + .flags = IORESOURCE_MEM, +}; + +static struct resource loongson2e_pci_io_resource = { + .name = "LOONGSON2E PCI IO MEM", + .start = LOONGSON2E_PCI_IO_START, + .end = IO_SPACE_LIMIT, + .flags = IORESOURCE_IO, +}; + +static struct pci_controller loongson2e_pci_controller = { + .pci_ops = &bonito64_pci_ops, + .io_resource = &loongson2e_pci_io_resource, + .mem_resource = &loongson2e_pci_mem_resource, + .mem_offset = 0x00000000UL, + .io_offset = 0x00000000UL, +}; + +static void __init setup_pcimap(void) +{ + /* + * local to PCI mapping for CPU accessing PCI space + * CPU address space [256M,448M] is window for accessing pci space + * we set pcimap_lo[0,1,2] to map it to pci space[0M,64M], [320M,448M] + * + * pcimap: PCI_MAP2 PCI_Mem_Lo2 PCI_Mem_Lo1 PCI_Mem_Lo0 + * [<2G] [384M,448M] [320M,384M] [0M,64M] + */ + BONITO_PCIMAP = BONITO_PCIMAP_PCIMAP_2 | + BONITO_PCIMAP_WIN(2, BONITO_PCILO2_BASE) | + BONITO_PCIMAP_WIN(1, BONITO_PCILO1_BASE) | + BONITO_PCIMAP_WIN(0, 0); + + /* + * PCI-DMA to local mapping: [2G,2G+256M] -> [0M,256M] + */ + BONITO_PCIBASE0 = 0x80000000ul; /* base: 2G -> mmap: 0M */ + /* size: 256M, burst transmission, pre-fetch enable, 64bit */ + LOONGSON_PCI_HIT0_SEL_L = 0xc000000cul; + LOONGSON_PCI_HIT0_SEL_H = 0xfffffffful; + LOONGSON_PCI_HIT1_SEL_L = 0x00000006ul; /* set this BAR as invalid */ + LOONGSON_PCI_HIT1_SEL_H = 0x00000000ul; + LOONGSON_PCI_HIT2_SEL_L = 0x00000006ul; /* set this BAR as invalid */ + LOONGSON_PCI_HIT2_SEL_H = 0x00000000ul; + + /* avoid deadlock of PCI reading/writing lock operation */ + LOONGSON_PCI_ISR4C = 0xd2000001ul; + + /* can not change gnt to break pci transfer when device's gnt not + deassert for some broken device */ + LOONGSON_PXARB_CFG = 0x00fe0105ul; +} + +static int __init pcibios_init(void) +{ + setup_pcimap(); + + loongson2e_pci_controller.io_map_base = mips_io_port_base; + + register_pci_controller(&loongson2e_pci_controller); + + return 0; +} + +arch_initcall(pcibios_init); diff --git a/arch/mips/loongson/fuloong-2e/reset.c b/arch/mips/loongson/fuloong-2e/reset.c new file mode 100644 index 00000000000..c21299af7f6 --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/reset.c @@ -0,0 +1,44 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * Copyright (C) 2009 Lemote, Inc. & Institute of Computing Technology + * Author: Zhangjin Wu, wuzj@lemote.com + */ +#include +#include + +#include + +#include + +static void loongson2e_restart(char *command) +{ + /* do preparation for reboot */ + BONITO_BONGENCFG &= ~(1 << 2); + BONITO_BONGENCFG |= (1 << 2); + + /* reboot via jumping to boot base address */ + ((void (*)(void))ioremap_nocache(BONITO_BOOT_BASE, 4)) (); +} + +static void loongson2e_halt(void) +{ + while (1) + ; +} + +static int __init mips_reboot_setup(void) +{ + _machine_restart = loongson2e_restart; + _machine_halt = loongson2e_halt; + pm_power_off = loongson2e_halt; + + return 0; +} + +arch_initcall(mips_reboot_setup); diff --git a/arch/mips/loongson/fuloong-2e/time.c b/arch/mips/loongson/fuloong-2e/time.c new file mode 100644 index 00000000000..b13d1717465 --- /dev/null +++ b/arch/mips/loongson/fuloong-2e/time.c @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include + +#include + +void __init plat_time_init(void) +{ + /* setup mips r4k timer */ + mips_hpt_frequency = cpu_clock_freq / 2; +} + +unsigned long read_persistent_clock(void) +{ + return mc146818_get_cmos_time(); +} diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index 63d8a297c58..0d4d5ea6fac 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -26,7 +26,7 @@ obj-$(CONFIG_MIPS_COBALT) += fixup-cobalt.o obj-$(CONFIG_SOC_AU1500) += fixup-au1000.o ops-au1000.o obj-$(CONFIG_SOC_AU1550) += fixup-au1000.o ops-au1000.o obj-$(CONFIG_SOC_PNX8550) += fixup-pnx8550.o ops-pnx8550.o -obj-$(CONFIG_LEMOTE_FULONG) += fixup-lm2e.o ops-bonito64.o +obj-$(CONFIG_LEMOTE_FULOONG2E) += fixup-fuloong2e.o ops-bonito64.o obj-$(CONFIG_MIPS_MALTA) += fixup-malta.o obj-$(CONFIG_PMC_MSP7120_GW) += fixup-pmcmsp.o ops-pmcmsp.o obj-$(CONFIG_PMC_MSP7120_EVAL) += fixup-pmcmsp.o ops-pmcmsp.o diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c new file mode 100644 index 00000000000..0c4c7a81213 --- /dev/null +++ b/arch/mips/pci/fixup-fuloong2e.c @@ -0,0 +1,224 @@ +/* + * Copyright (C) 2004 ICT CAS + * Author: Li xiaoyu, ICT CAS + * lixy@ict.ac.cn + * + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include + +/* South bridge slot number is set by the pci probe process */ +static u8 sb_slot = 5; + +int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + int irq = 0; + + if (slot == sb_slot) { + switch (PCI_FUNC(dev->devfn)) { + case 2: + irq = 10; + break; + case 3: + irq = 11; + break; + case 5: + irq = 9; + break; + } + } else { + irq = BONITO_IRQ_BASE + 25 + pin; + } + return irq; + +} + +/* Do platform specific device initialization at pci_enable_device() time */ +int pcibios_plat_dev_init(struct pci_dev *dev) +{ + return 0; +} + +static void __init loongson2e_nec_fixup(struct pci_dev *pdev) +{ + unsigned int val; + + /* Configues port 1, 2, 3, 4 to be validate*/ + pci_read_config_dword(pdev, 0xe0, &val); + pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x4); + + /* System clock is 48-MHz Oscillator. */ + pci_write_config_dword(pdev, 0xe4, 1 << 5); +} + +static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev) +{ + unsigned char c; + + sb_slot = PCI_SLOT(pdev->devfn); + + printk(KERN_INFO "via686b fix: ISA bridge\n"); + + /* Enable I/O Recovery time */ + pci_write_config_byte(pdev, 0x40, 0x08); + + /* Enable ISA refresh */ + pci_write_config_byte(pdev, 0x41, 0x01); + + /* disable ISA line buffer */ + pci_write_config_byte(pdev, 0x45, 0x00); + + /* Gate INTR, and flush line buffer */ + pci_write_config_byte(pdev, 0x46, 0xe0); + + /* Disable PCI Delay Transaction, Enable EISA ports 4D0/4D1. */ + /* pci_write_config_byte(pdev, 0x47, 0x20); */ + + /* + * enable PCI Delay Transaction, Enable EISA ports 4D0/4D1. + * enable time-out timer + */ + pci_write_config_byte(pdev, 0x47, 0xe6); + + /* + * enable level trigger on pci irqs: 9,10,11,13 + * important! without this PCI interrupts won't work + */ + outb(0x2e, 0x4d1); + + /* 512 K PCI Decode */ + pci_write_config_byte(pdev, 0x48, 0x01); + + /* Wait for PGNT before grant to ISA Master/DMA */ + pci_write_config_byte(pdev, 0x4a, 0x84); + + /* + * Plug'n'Play + * + * Parallel DRQ 3, Floppy DRQ 2 (default) + */ + pci_write_config_byte(pdev, 0x50, 0x0e); + + /* + * IRQ Routing for Floppy and Parallel port + * + * IRQ 6 for floppy, IRQ 7 for parallel port + */ + pci_write_config_byte(pdev, 0x51, 0x76); + + /* IRQ Routing for serial ports (take IRQ 3 and 4) */ + pci_write_config_byte(pdev, 0x52, 0x34); + + /* All IRQ's level triggered. */ + pci_write_config_byte(pdev, 0x54, 0x00); + + /* route PIRQA-D irq */ + pci_write_config_byte(pdev, 0x55, 0x90); /* bit 7-4, PIRQA */ + pci_write_config_byte(pdev, 0x56, 0xba); /* bit 7-4, PIRQC; */ + /* 3-0, PIRQB */ + pci_write_config_byte(pdev, 0x57, 0xd0); /* bit 7-4, PIRQD */ + + /* enable function 5/6, audio/modem */ + pci_read_config_byte(pdev, 0x85, &c); + c &= ~(0x3 << 2); + pci_write_config_byte(pdev, 0x85, c); + + printk(KERN_INFO"via686b fix: ISA bridge done\n"); +} + +static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev) +{ + printk(KERN_INFO"via686b fix: IDE\n"); + + /* Modify IDE controller setup */ + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 48); + pci_write_config_byte(pdev, PCI_COMMAND, + PCI_COMMAND_IO | PCI_COMMAND_MEMORY | + PCI_COMMAND_MASTER); + pci_write_config_byte(pdev, 0x40, 0x0b); + /* legacy mode */ + pci_write_config_byte(pdev, 0x42, 0x09); + +#if 1/* play safe, otherwise we may see notebook's usb keyboard lockup */ + /* disable read prefetch/write post buffers */ + pci_write_config_byte(pdev, 0x41, 0x02); + + /* use 3/4 as fifo thresh hold */ + pci_write_config_byte(pdev, 0x43, 0x0a); + pci_write_config_byte(pdev, 0x44, 0x00); + + pci_write_config_byte(pdev, 0x45, 0x00); +#else + pci_write_config_byte(pdev, 0x41, 0xc2); + pci_write_config_byte(pdev, 0x43, 0x35); + pci_write_config_byte(pdev, 0x44, 0x1c); + + pci_write_config_byte(pdev, 0x45, 0x10); +#endif + + printk(KERN_INFO"via686b fix: IDE done\n"); +} + +static void __init loongson2e_686b_func2_fixup(struct pci_dev *pdev) +{ + /* irq routing */ + pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 10); +} + +static void __init loongson2e_686b_func3_fixup(struct pci_dev *pdev) +{ + /* irq routing */ + pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 11); +} + +static void __init loongson2e_686b_func5_fixup(struct pci_dev *pdev) +{ + unsigned int val; + unsigned char c; + + /* enable IO */ + pci_write_config_byte(pdev, PCI_COMMAND, + PCI_COMMAND_IO | PCI_COMMAND_MEMORY | + PCI_COMMAND_MASTER); + pci_read_config_dword(pdev, 0x4, &val); + pci_write_config_dword(pdev, 0x4, val | 1); + + /* route ac97 IRQ */ + pci_write_config_byte(pdev, 0x3c, 9); + + pci_read_config_byte(pdev, 0x8, &c); + + /* link control: enable link & SGD PCM output */ + pci_write_config_byte(pdev, 0x41, 0xcc); + + /* disable game port, FM, midi, sb, enable write to reg2c-2f */ + pci_write_config_byte(pdev, 0x42, 0x20); + + /* we are using Avance logic codec */ + pci_write_config_word(pdev, 0x2c, 0x1005); + pci_write_config_word(pdev, 0x2e, 0x4710); + pci_read_config_dword(pdev, 0x2c, &val); + + pci_write_config_byte(pdev, 0x42, 0x0); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, + loongson2e_686b_func0_fixup); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1, + loongson2e_686b_func1_fixup); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_2, + loongson2e_686b_func2_fixup); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3, + loongson2e_686b_func3_fixup); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, + loongson2e_686b_func5_fixup); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, + loongson2e_nec_fixup); diff --git a/arch/mips/pci/fixup-lm2e.c b/arch/mips/pci/fixup-lm2e.c deleted file mode 100644 index 0c4c7a81213..00000000000 --- a/arch/mips/pci/fixup-lm2e.c +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (C) 2004 ICT CAS - * Author: Li xiaoyu, ICT CAS - * lixy@ict.ac.cn - * - * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include -#include -#include - -/* South bridge slot number is set by the pci probe process */ -static u8 sb_slot = 5; - -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - int irq = 0; - - if (slot == sb_slot) { - switch (PCI_FUNC(dev->devfn)) { - case 2: - irq = 10; - break; - case 3: - irq = 11; - break; - case 5: - irq = 9; - break; - } - } else { - irq = BONITO_IRQ_BASE + 25 + pin; - } - return irq; - -} - -/* Do platform specific device initialization at pci_enable_device() time */ -int pcibios_plat_dev_init(struct pci_dev *dev) -{ - return 0; -} - -static void __init loongson2e_nec_fixup(struct pci_dev *pdev) -{ - unsigned int val; - - /* Configues port 1, 2, 3, 4 to be validate*/ - pci_read_config_dword(pdev, 0xe0, &val); - pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x4); - - /* System clock is 48-MHz Oscillator. */ - pci_write_config_dword(pdev, 0xe4, 1 << 5); -} - -static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev) -{ - unsigned char c; - - sb_slot = PCI_SLOT(pdev->devfn); - - printk(KERN_INFO "via686b fix: ISA bridge\n"); - - /* Enable I/O Recovery time */ - pci_write_config_byte(pdev, 0x40, 0x08); - - /* Enable ISA refresh */ - pci_write_config_byte(pdev, 0x41, 0x01); - - /* disable ISA line buffer */ - pci_write_config_byte(pdev, 0x45, 0x00); - - /* Gate INTR, and flush line buffer */ - pci_write_config_byte(pdev, 0x46, 0xe0); - - /* Disable PCI Delay Transaction, Enable EISA ports 4D0/4D1. */ - /* pci_write_config_byte(pdev, 0x47, 0x20); */ - - /* - * enable PCI Delay Transaction, Enable EISA ports 4D0/4D1. - * enable time-out timer - */ - pci_write_config_byte(pdev, 0x47, 0xe6); - - /* - * enable level trigger on pci irqs: 9,10,11,13 - * important! without this PCI interrupts won't work - */ - outb(0x2e, 0x4d1); - - /* 512 K PCI Decode */ - pci_write_config_byte(pdev, 0x48, 0x01); - - /* Wait for PGNT before grant to ISA Master/DMA */ - pci_write_config_byte(pdev, 0x4a, 0x84); - - /* - * Plug'n'Play - * - * Parallel DRQ 3, Floppy DRQ 2 (default) - */ - pci_write_config_byte(pdev, 0x50, 0x0e); - - /* - * IRQ Routing for Floppy and Parallel port - * - * IRQ 6 for floppy, IRQ 7 for parallel port - */ - pci_write_config_byte(pdev, 0x51, 0x76); - - /* IRQ Routing for serial ports (take IRQ 3 and 4) */ - pci_write_config_byte(pdev, 0x52, 0x34); - - /* All IRQ's level triggered. */ - pci_write_config_byte(pdev, 0x54, 0x00); - - /* route PIRQA-D irq */ - pci_write_config_byte(pdev, 0x55, 0x90); /* bit 7-4, PIRQA */ - pci_write_config_byte(pdev, 0x56, 0xba); /* bit 7-4, PIRQC; */ - /* 3-0, PIRQB */ - pci_write_config_byte(pdev, 0x57, 0xd0); /* bit 7-4, PIRQD */ - - /* enable function 5/6, audio/modem */ - pci_read_config_byte(pdev, 0x85, &c); - c &= ~(0x3 << 2); - pci_write_config_byte(pdev, 0x85, c); - - printk(KERN_INFO"via686b fix: ISA bridge done\n"); -} - -static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev) -{ - printk(KERN_INFO"via686b fix: IDE\n"); - - /* Modify IDE controller setup */ - pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 48); - pci_write_config_byte(pdev, PCI_COMMAND, - PCI_COMMAND_IO | PCI_COMMAND_MEMORY | - PCI_COMMAND_MASTER); - pci_write_config_byte(pdev, 0x40, 0x0b); - /* legacy mode */ - pci_write_config_byte(pdev, 0x42, 0x09); - -#if 1/* play safe, otherwise we may see notebook's usb keyboard lockup */ - /* disable read prefetch/write post buffers */ - pci_write_config_byte(pdev, 0x41, 0x02); - - /* use 3/4 as fifo thresh hold */ - pci_write_config_byte(pdev, 0x43, 0x0a); - pci_write_config_byte(pdev, 0x44, 0x00); - - pci_write_config_byte(pdev, 0x45, 0x00); -#else - pci_write_config_byte(pdev, 0x41, 0xc2); - pci_write_config_byte(pdev, 0x43, 0x35); - pci_write_config_byte(pdev, 0x44, 0x1c); - - pci_write_config_byte(pdev, 0x45, 0x10); -#endif - - printk(KERN_INFO"via686b fix: IDE done\n"); -} - -static void __init loongson2e_686b_func2_fixup(struct pci_dev *pdev) -{ - /* irq routing */ - pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 10); -} - -static void __init loongson2e_686b_func3_fixup(struct pci_dev *pdev) -{ - /* irq routing */ - pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 11); -} - -static void __init loongson2e_686b_func5_fixup(struct pci_dev *pdev) -{ - unsigned int val; - unsigned char c; - - /* enable IO */ - pci_write_config_byte(pdev, PCI_COMMAND, - PCI_COMMAND_IO | PCI_COMMAND_MEMORY | - PCI_COMMAND_MASTER); - pci_read_config_dword(pdev, 0x4, &val); - pci_write_config_dword(pdev, 0x4, val | 1); - - /* route ac97 IRQ */ - pci_write_config_byte(pdev, 0x3c, 9); - - pci_read_config_byte(pdev, 0x8, &c); - - /* link control: enable link & SGD PCM output */ - pci_write_config_byte(pdev, 0x41, 0xcc); - - /* disable game port, FM, midi, sb, enable write to reg2c-2f */ - pci_write_config_byte(pdev, 0x42, 0x20); - - /* we are using Avance logic codec */ - pci_write_config_word(pdev, 0x2c, 0x1005); - pci_write_config_word(pdev, 0x2e, 0x4710); - pci_read_config_dword(pdev, 0x2c, &val); - - pci_write_config_byte(pdev, 0x42, 0x0); -} - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, - loongson2e_686b_func0_fixup); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1, - loongson2e_686b_func1_fixup); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_2, - loongson2e_686b_func2_fixup); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3, - loongson2e_686b_func3_fixup); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, - loongson2e_686b_func5_fixup); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, - loongson2e_nec_fixup); diff --git a/arch/mips/pci/ops-bonito64.c b/arch/mips/pci/ops-bonito64.c index f742c51acf0..54e55e7a243 100644 --- a/arch/mips/pci/ops-bonito64.c +++ b/arch/mips/pci/ops-bonito64.c @@ -29,7 +29,7 @@ #define PCI_ACCESS_READ 0 #define PCI_ACCESS_WRITE 1 -#ifdef CONFIG_LEMOTE_FULONG +#ifdef CONFIG_LEMOTE_FULOONG2E #define CFG_SPACE_REG(offset) (void *)CKSEG1ADDR(BONITO_PCICFG_BASE | (offset)) #define ID_SEL_BEGIN 11 #else @@ -77,7 +77,7 @@ static int bonito64_pcibios_config_access(unsigned char access_type, addrp = CFG_SPACE_REG(addr & 0xffff); if (access_type == PCI_ACCESS_WRITE) { writel(cpu_to_le32(*data), addrp); -#ifndef CONFIG_LEMOTE_FULONG +#ifndef CONFIG_LEMOTE_FULOONG2E /* Wait till done */ while (BONITO_PCIMSTAT & 0xF); #endif -- cgit v1.2.3-70-g09d2 From 85749d24bcf90440b10394312e5b1c96d1a62cdb Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Thu, 2 Jul 2009 23:26:45 +0800 Subject: MIPS: Loongson: Split common loongson source code out To share common loongson source code between all of the loongson-based machines. there is a need to split it out of the fuloong-2e/ directory. at the same time, other according tuning is needed. the machine-specific parts are defined as macros in relative header file, pci.h, mem.h, machine.h. Signed-off-by: Wu Zhangjin Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 32 +++------- arch/mips/Makefile | 7 ++- arch/mips/include/asm/mach-loongson/loongson.h | 11 ++++ arch/mips/include/asm/mach-loongson/machine.h | 22 +++++++ arch/mips/include/asm/mach-loongson/mem.h | 30 ++++++++++ arch/mips/include/asm/mach-loongson/pci.h | 12 +++- arch/mips/loongson/Kconfig | 31 ++++++++++ arch/mips/loongson/Makefile | 11 ++++ arch/mips/loongson/common/Makefile | 11 ++++ arch/mips/loongson/common/bonito-irq.c | 51 ++++++++++++++++ arch/mips/loongson/common/cmdline.c | 52 ++++++++++++++++ arch/mips/loongson/common/early_printk.c | 38 ++++++++++++ arch/mips/loongson/common/env.c | 58 ++++++++++++++++++ arch/mips/loongson/common/init.c | 30 ++++++++++ arch/mips/loongson/common/irq.c | 74 +++++++++++++++++++++++ arch/mips/loongson/common/machtype.c | 17 ++++++ arch/mips/loongson/common/mem.c | 35 +++++++++++ arch/mips/loongson/common/pci.c | 83 ++++++++++++++++++++++++++ arch/mips/loongson/common/reset.c | 44 ++++++++++++++ arch/mips/loongson/common/setup.c | 58 ++++++++++++++++++ arch/mips/loongson/common/time.c | 27 +++++++++ arch/mips/loongson/fuloong-2e/Makefile | 8 +-- arch/mips/loongson/fuloong-2e/bonito-irq.c | 51 ---------------- arch/mips/loongson/fuloong-2e/cmdline.c | 52 ---------------- arch/mips/loongson/fuloong-2e/early_printk.c | 39 ------------ arch/mips/loongson/fuloong-2e/env.c | 58 ------------------ arch/mips/loongson/fuloong-2e/init.c | 30 ---------- arch/mips/loongson/fuloong-2e/irq.c | 52 ++-------------- arch/mips/loongson/fuloong-2e/machtype.c | 15 ----- arch/mips/loongson/fuloong-2e/mem.c | 36 ----------- arch/mips/loongson/fuloong-2e/pci.c | 83 -------------------------- arch/mips/loongson/fuloong-2e/reset.c | 37 +++--------- arch/mips/loongson/fuloong-2e/time.c | 27 --------- 33 files changed, 720 insertions(+), 502 deletions(-) create mode 100644 arch/mips/include/asm/mach-loongson/machine.h create mode 100644 arch/mips/include/asm/mach-loongson/mem.h create mode 100644 arch/mips/loongson/Kconfig create mode 100644 arch/mips/loongson/Makefile create mode 100644 arch/mips/loongson/common/Makefile create mode 100644 arch/mips/loongson/common/bonito-irq.c create mode 100644 arch/mips/loongson/common/cmdline.c create mode 100644 arch/mips/loongson/common/early_printk.c create mode 100644 arch/mips/loongson/common/env.c create mode 100644 arch/mips/loongson/common/init.c create mode 100644 arch/mips/loongson/common/irq.c create mode 100644 arch/mips/loongson/common/machtype.c create mode 100644 arch/mips/loongson/common/mem.c create mode 100644 arch/mips/loongson/common/pci.c create mode 100644 arch/mips/loongson/common/reset.c create mode 100644 arch/mips/loongson/common/setup.c create mode 100644 arch/mips/loongson/common/time.c delete mode 100644 arch/mips/loongson/fuloong-2e/bonito-irq.c delete mode 100644 arch/mips/loongson/fuloong-2e/cmdline.c delete mode 100644 arch/mips/loongson/fuloong-2e/early_printk.c delete mode 100644 arch/mips/loongson/fuloong-2e/env.c delete mode 100644 arch/mips/loongson/fuloong-2e/init.c delete mode 100644 arch/mips/loongson/fuloong-2e/machtype.c delete mode 100644 arch/mips/loongson/fuloong-2e/mem.c delete mode 100644 arch/mips/loongson/fuloong-2e/pci.c delete mode 100644 arch/mips/loongson/fuloong-2e/time.c (limited to 'arch/mips/include/asm') diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 3414e230182..482dcc3b91e 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -174,30 +174,15 @@ config LASAT select SYS_SUPPORTS_64BIT_KERNEL if BROKEN select SYS_SUPPORTS_LITTLE_ENDIAN -config LEMOTE_FULOONG2E - bool "Lemote Fuloong2e mini-PC" - select ARCH_SPARSEMEM_ENABLE - select CEVT_R4K - select CSRC_R4K - select SYS_HAS_CPU_LOONGSON2 - select DMA_NONCOHERENT - select BOOT_ELF32 - select BOARD_SCACHE - select HAVE_STD_PC_SERIAL_PORT - select HW_HAS_PCI - select I8259 - select ISA - select IRQ_CPU - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_64BIT_KERNEL - select SYS_SUPPORTS_LITTLE_ENDIAN - select SYS_SUPPORTS_HIGHMEM - select SYS_HAS_EARLY_PRINTK - select GENERIC_ISA_DMA_SUPPORT_BROKEN - select CPU_HAS_WB +config MACH_LOONGSON + bool "Loongson family of machines" help - Lemote Fuloong2e mini-PC board based on the Chinese Loongson-2E CPU and - an FPGA northbridge + This enables the support of Loongson family of machines. + + Loongson is a family of general-purpose MIPS-compatible CPUs. + developed at Institute of Computing Technology (ICT), + Chinese Academy of Sciences (CAS) in the People's Republic + of China. The chief architect is Professor Weiwu Hu. config MIPS_MALTA bool "MIPS Malta board" @@ -668,6 +653,7 @@ source "arch/mips/sibyte/Kconfig" source "arch/mips/txx9/Kconfig" source "arch/mips/vr41xx/Kconfig" source "arch/mips/cavium-octeon/Kconfig" +source "arch/mips/loongson/Kconfig" endmenu diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 7754cbbbf4e..94d6f581386 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -314,11 +314,12 @@ cflags-$(CONFIG_WR_PPMC) += -I$(srctree)/arch/mips/include/asm/mach-wrppmc load-$(CONFIG_WR_PPMC) += 0xffffffff80100000 # -# lemote fuloong2e mini-PC board +# Loongson family # -core-$(CONFIG_LEMOTE_FULOONG2E) +=arch/mips/loongson/fuloong-2e/ +core-$(CONFIG_MACH_LOONGSON) +=arch/mips/loongson/ +cflags-$(CONFIG_MACH_LOONGSON) += -I$(srctree)/arch/mips/include/asm/mach-loongson \ + -mno-branch-likely load-$(CONFIG_LEMOTE_FULOONG2E) +=0xffffffff80100000 -cflags-$(CONFIG_LEMOTE_FULOONG2E) += -I$(srctree)/arch/mips/include/asm/mach-loongson/ # # MIPS Malta board diff --git a/arch/mips/include/asm/mach-loongson/loongson.h b/arch/mips/include/asm/mach-loongson/loongson.h index e9f74dee24e..da70bcf2304 100644 --- a/arch/mips/include/asm/mach-loongson/loongson.h +++ b/arch/mips/include/asm/mach-loongson/loongson.h @@ -21,6 +21,10 @@ /* loongson internal northbridge initialization */ extern void bonito_irq_init(void); +/* machine-specific reboot/halt operation */ +extern void mach_prepare_reboot(void); +extern void mach_prepare_shutdown(void); + /* environment arguments from bootloader */ extern unsigned long bus_clock, cpu_clock_freq; extern unsigned long memsize, highmemsize; @@ -30,6 +34,13 @@ extern void __init prom_init_memory(void); extern void __init prom_init_cmdline(void); extern void __init prom_init_env(void); +/* irq operation functions */ +extern void bonito_irqdispatch(void); +extern void __init bonito_irq_init(void); +extern void __init set_irq_trigger_mode(void); +extern void __init mach_init_irq(void); +extern void mach_irq_dispatch(unsigned int pending); + /* PCI Configuration Registers */ #define LOONGSON_PCI_ISR4C BONITO_PCI_REG(0x4c) diff --git a/arch/mips/include/asm/mach-loongson/machine.h b/arch/mips/include/asm/mach-loongson/machine.h new file mode 100644 index 00000000000..8e60d363594 --- /dev/null +++ b/arch/mips/include/asm/mach-loongson/machine.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2009 Lemote, Inc. & Institute of Computing Technology + * Author: Wu Zhangjin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __ASM_MACH_LOONGSON_MACHINE_H +#define __ASM_MACH_LOONGSON_MACHINE_H + +#ifdef CONFIG_LEMOTE_FULOONG2E + +#define LOONGSON_UART_BASE (BONITO_PCIIO_BASE + 0x3f8) + +#define LOONGSON_MACHNAME "lemote-fuloong-2e-box" + +#endif + +#endif /* __ASM_MACH_LOONGSON_MACHINE_H */ diff --git a/arch/mips/include/asm/mach-loongson/mem.h b/arch/mips/include/asm/mach-loongson/mem.h new file mode 100644 index 00000000000..bd7b3cba7e3 --- /dev/null +++ b/arch/mips/include/asm/mach-loongson/mem.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2009 Lemote, Inc. & Institute of Computing Technology + * Author: Wu Zhangjin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __ASM_MACH_LOONGSON_MEM_H +#define __ASM_MACH_LOONGSON_MEM_H + +/* + * On Lemote Loongson 2e + * + * the high memory space starts from 512M. + * the peripheral registers reside between 0x1000:0000 and 0x2000:0000. + */ + +#ifdef CONFIG_LEMOTE_FULOONG2E + +#define LOONGSON_HIGHMEM_START 0x20000000 + +#define LOONGSON_MMIO_MEM_START 0x10000000 +#define LOONGSON_MMIO_MEM_END 0x20000000 + +#endif + +#endif /* __ASM_MACH_LOONGSON_MEM_H */ diff --git a/arch/mips/include/asm/mach-loongson/pci.h b/arch/mips/include/asm/mach-loongson/pci.h index e229b2904cc..f1663ca81da 100644 --- a/arch/mips/include/asm/mach-loongson/pci.h +++ b/arch/mips/include/asm/mach-loongson/pci.h @@ -24,8 +24,14 @@ extern struct pci_ops bonito64_pci_ops; -#define LOONGSON2E_PCI_MEM_START BONITO_PCILO1_BASE -#define LOONGSON2E_PCI_MEM_END (BONITO_PCILO1_BASE + 0x04000000 * 2) -#define LOONGSON2E_PCI_IO_START 0x00004000UL +#ifdef CONFIG_LEMOTE_FULOONG2E + +/* this pci memory space is mapped by pcimap in pci.c */ +#define LOONGSON_PCI_MEM_START BONITO_PCILO1_BASE +#define LOONGSON_PCI_MEM_END (BONITO_PCILO1_BASE + 0x04000000 * 2) +/* this is an offset from mips_io_port_base */ +#define LOONGSON_PCI_IO_START 0x00004000UL + +#endif #endif /* !__ASM_MACH_LOONGSON_PCI_H_ */ diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson/Kconfig new file mode 100644 index 00000000000..376712a0e2f --- /dev/null +++ b/arch/mips/loongson/Kconfig @@ -0,0 +1,31 @@ +choice + prompt "Machine Type" + depends on MACH_LOONGSON + +config LEMOTE_FULOONG2E + bool "Lemote Fuloong(2e) mini-PC" + select ARCH_SPARSEMEM_ENABLE + select CEVT_R4K + select CSRC_R4K + select SYS_HAS_CPU_LOONGSON2 + select DMA_NONCOHERENT + select BOOT_ELF32 + select BOARD_SCACHE + select HW_HAS_PCI + select I8259 + select ISA + select IRQ_CPU + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_LITTLE_ENDIAN + select SYS_SUPPORTS_HIGHMEM + select SYS_HAS_EARLY_PRINTK + select GENERIC_HARDIRQS_NO__DO_IRQ + select GENERIC_ISA_DMA_SUPPORT_BROKEN + select CPU_HAS_WB + help + Lemote Fuloong(2e) mini-PC board based on the Chinese Loongson-2E CPU and + an FPGA northbridge + + Lemote Fuloong(2e) mini PC have a VIA686B south bridge. +endchoice diff --git a/arch/mips/loongson/Makefile b/arch/mips/loongson/Makefile new file mode 100644 index 00000000000..39048c455d7 --- /dev/null +++ b/arch/mips/loongson/Makefile @@ -0,0 +1,11 @@ +# +# Common code for all Loongson based systems +# + +obj-$(CONFIG_MACH_LOONGSON) += common/ + +# +# Lemote Fuloong mini-PC (Loongson 2E-based) +# + +obj-$(CONFIG_LEMOTE_FULOONG2E) += fuloong-2e/ diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson/common/Makefile new file mode 100644 index 00000000000..4e3889dec39 --- /dev/null +++ b/arch/mips/loongson/common/Makefile @@ -0,0 +1,11 @@ +# +# Makefile for loongson based machines. +# + +obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \ + pci.o bonito-irq.o mem.o + +# +# Early printk support +# +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o diff --git a/arch/mips/loongson/common/bonito-irq.c b/arch/mips/loongson/common/bonito-irq.c new file mode 100644 index 00000000000..3e31e7ad713 --- /dev/null +++ b/arch/mips/loongson/common/bonito-irq.c @@ -0,0 +1,51 @@ +/* + * Copyright 2001 MontaVista Software Inc. + * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net + * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org) + * + * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +#include + +static inline void bonito_irq_enable(unsigned int irq) +{ + BONITO_INTENSET = (1 << (irq - BONITO_IRQ_BASE)); + mmiowb(); +} + +static inline void bonito_irq_disable(unsigned int irq) +{ + BONITO_INTENCLR = (1 << (irq - BONITO_IRQ_BASE)); + mmiowb(); +} + +static struct irq_chip bonito_irq_type = { + .name = "bonito_irq", + .ack = bonito_irq_disable, + .mask = bonito_irq_disable, + .mask_ack = bonito_irq_disable, + .unmask = bonito_irq_enable, +}; + +static struct irqaction dma_timeout_irqaction = { + .handler = no_action, + .name = "dma_timeout", +}; + +void bonito_irq_init(void) +{ + u32 i; + + for (i = BONITO_IRQ_BASE; i < BONITO_IRQ_BASE + 32; i++) + set_irq_chip_and_handler(i, &bonito_irq_type, handle_level_irq); + + setup_irq(BONITO_IRQ_BASE + 10, &dma_timeout_irqaction); +} diff --git a/arch/mips/loongson/common/cmdline.c b/arch/mips/loongson/common/cmdline.c new file mode 100644 index 00000000000..75f1b243ee4 --- /dev/null +++ b/arch/mips/loongson/common/cmdline.c @@ -0,0 +1,52 @@ +/* + * Based on Ocelot Linux port, which is + * Copyright 2001 MontaVista Software Inc. + * Author: jsun@mvista.com or jsun@junsun.net + * + * Copyright 2003 ICT CAS + * Author: Michael Guo + * + * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +#include + +int prom_argc; +/* pmon passes arguments in 32bit pointers */ +int *_prom_argv; + +void __init prom_init_cmdline(void) +{ + int i; + long l; + + /* firmware arguments are initialized in head.S */ + prom_argc = fw_arg0; + _prom_argv = (int *)fw_arg1; + + /* arg[0] is "g", the rest is boot parameters */ + arcs_cmdline[0] = '\0'; + for (i = 1; i < prom_argc; i++) { + l = (long)_prom_argv[i]; + if (strlen(arcs_cmdline) + strlen(((char *)l) + 1) + >= sizeof(arcs_cmdline)) + break; + strcat(arcs_cmdline, ((char *)l)); + strcat(arcs_cmdline, " "); + } + + if ((strstr(arcs_cmdline, "console=")) == NULL) + strcat(arcs_cmdline, " console=ttyS0,115200"); + if ((strstr(arcs_cmdline, "root=")) == NULL) + strcat(arcs_cmdline, " root=/dev/hda1"); +} diff --git a/arch/mips/loongson/common/early_printk.c b/arch/mips/loongson/common/early_printk.c new file mode 100644 index 00000000000..bc73edc0cfd --- /dev/null +++ b/arch/mips/loongson/common/early_printk.c @@ -0,0 +1,38 @@ +/* early printk support + * + * Copyright (c) 2009 Philippe Vachon + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +#include +#include + +#define PORT(base, offset) (u8 *)(base + offset) + +static inline unsigned int serial_in(phys_addr_t base, int offset) +{ + return readb(PORT(base, offset)); +} + +static inline void serial_out(phys_addr_t base, int offset, int value) +{ + writeb(value, PORT(base, offset)); +} + +void prom_putchar(char c) +{ + phys_addr_t uart_base = + (phys_addr_t) ioremap_nocache(LOONGSON_UART_BASE, 8); + + while ((serial_in(uart_base, UART_LSR) & UART_LSR_THRE) == 0) + ; + + serial_out(uart_base, UART_TX, c); +} diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c new file mode 100644 index 00000000000..b9ef5038554 --- /dev/null +++ b/arch/mips/loongson/common/env.c @@ -0,0 +1,58 @@ +/* + * Based on Ocelot Linux port, which is + * Copyright 2001 MontaVista Software Inc. + * Author: jsun@mvista.com or jsun@junsun.net + * + * Copyright 2003 ICT CAS + * Author: Michael Guo + * + * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +#include + +unsigned long bus_clock, cpu_clock_freq; +unsigned long memsize, highmemsize; + +/* pmon passes arguments in 32bit pointers */ +int *_prom_envp; + +#define parse_even_earlier(res, option, p) \ +do { \ + if (strncmp(option, (char *)p, strlen(option)) == 0) \ + strict_strtol((char *)p + strlen(option"="), \ + 10, &res); \ +} while (0) + +void __init prom_init_env(void) +{ + long l; + + /* firmware arguments are initialized in head.S */ + _prom_envp = (int *)fw_arg2; + + l = (long)*_prom_envp; + while (l != 0) { + parse_even_earlier(bus_clock, "busclock", l); + parse_even_earlier(cpu_clock_freq, "cpuclock", l); + parse_even_earlier(memsize, "memsize", l); + parse_even_earlier(highmemsize, "highmemsize", l); + _prom_envp++; + l = (long)*_prom_envp; + } + if (memsize == 0) + memsize = 256; + + pr_info("busclock=%ld, cpuclock=%ld, memsize=%ld, highmemsize=%ld\n", + bus_clock, cpu_clock_freq, memsize, highmemsize); +} diff --git a/arch/mips/loongson/common/init.c b/arch/mips/loongson/common/init.c new file mode 100644 index 00000000000..3abe927422a --- /dev/null +++ b/arch/mips/loongson/common/init.c @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include + +#include + +#include + +void __init prom_init(void) +{ + /* init base address of io space */ + set_io_port_base((unsigned long) + ioremap(BONITO_PCIIO_BASE, BONITO_PCIIO_SIZE)); + + prom_init_cmdline(); + prom_init_env(); + prom_init_memory(); +} + +void __init prom_free_prom_memory(void) +{ +} diff --git a/arch/mips/loongson/common/irq.c b/arch/mips/loongson/common/irq.c new file mode 100644 index 00000000000..f368c735cbd --- /dev/null +++ b/arch/mips/loongson/common/irq.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include + +#include +/* + * the first level int-handler will jump here if it is a bonito irq + */ +void bonito_irqdispatch(void) +{ + u32 int_status; + int i; + + /* workaround the IO dma problem: let cpu looping to allow DMA finish */ + int_status = BONITO_INTISR; + if (int_status & (1 << 10)) { + while (int_status & (1 << 10)) { + udelay(1); + int_status = BONITO_INTISR; + } + } + + /* Get pending sources, masked by current enables */ + int_status = BONITO_INTISR & BONITO_INTEN; + + if (int_status != 0) { + i = __ffs(int_status); + int_status &= ~(1 << i); + do_IRQ(BONITO_IRQ_BASE + i); + } +} + +asmlinkage void plat_irq_dispatch(void) +{ + unsigned int pending; + + pending = read_c0_cause() & read_c0_status() & ST0_IM; + + /* machine-specific plat_irq_dispatch */ + mach_irq_dispatch(pending); +} + +void __init arch_init_irq(void) +{ + /* + * Clear all of the interrupts while we change the able around a bit. + * int-handler is not on bootstrap + */ + clear_c0_status(ST0_IM | ST0_BEV); + local_irq_disable(); + + /* setting irq trigger mode */ + set_irq_trigger_mode(); + + /* no steer */ + BONITO_INTSTEER = 0; + + /* + * Mask out all interrupt by writing "1" to all bit position in + * the interrupt reset reg. + */ + BONITO_INTENCLR = ~0; + + /* machine specific irq init */ + mach_init_irq(); +} diff --git a/arch/mips/loongson/common/machtype.c b/arch/mips/loongson/common/machtype.c new file mode 100644 index 00000000000..845b3fb47e0 --- /dev/null +++ b/arch/mips/loongson/common/machtype.c @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include + +const char *get_system_type(void) +{ + return LOONGSON_MACHNAME; +} + diff --git a/arch/mips/loongson/common/mem.c b/arch/mips/loongson/common/mem.c new file mode 100644 index 00000000000..7c92f79b648 --- /dev/null +++ b/arch/mips/loongson/common/mem.c @@ -0,0 +1,35 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include + +#include + +#include +#include + +void __init prom_init_memory(void) +{ + add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM); +#ifdef CONFIG_64BIT + if (highmemsize > 0) + add_memory_region(LOONGSON_HIGHMEM_START, + highmemsize << 20, BOOT_MEM_RAM); +#endif /* CONFIG_64BIT */ +} + +/* override of arch/mips/mm/cache.c: __uncached_access */ +int __uncached_access(struct file *file, unsigned long addr) +{ + if (file->f_flags & O_SYNC) + return 1; + + return addr >= __pa(high_memory) || + ((addr >= LOONGSON_MMIO_MEM_START) && + (addr < LOONGSON_MMIO_MEM_END)); +} diff --git a/arch/mips/loongson/common/pci.c b/arch/mips/loongson/common/pci.c new file mode 100644 index 00000000000..a3a4abfb6c9 --- /dev/null +++ b/arch/mips/loongson/common/pci.c @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +#include +#include + +static struct resource loongson_pci_mem_resource = { + .name = "pci memory space", + .start = LOONGSON_PCI_MEM_START, + .end = LOONGSON_PCI_MEM_END, + .flags = IORESOURCE_MEM, +}; + +static struct resource loongson_pci_io_resource = { + .name = "pci io space", + .start = LOONGSON_PCI_IO_START, + .end = IO_SPACE_LIMIT, + .flags = IORESOURCE_IO, +}; + +static struct pci_controller loongson_pci_controller = { + .pci_ops = &bonito64_pci_ops, + .io_resource = &loongson_pci_io_resource, + .mem_resource = &loongson_pci_mem_resource, + .mem_offset = 0x00000000UL, + .io_offset = 0x00000000UL, +}; + +static void __init setup_pcimap(void) +{ + /* + * local to PCI mapping for CPU accessing PCI space + * CPU address space [256M,448M] is window for accessing pci space + * we set pcimap_lo[0,1,2] to map it to pci space[0M,64M], [320M,448M] + * + * pcimap: PCI_MAP2 PCI_Mem_Lo2 PCI_Mem_Lo1 PCI_Mem_Lo0 + * [<2G] [384M,448M] [320M,384M] [0M,64M] + */ + BONITO_PCIMAP = BONITO_PCIMAP_PCIMAP_2 | + BONITO_PCIMAP_WIN(2, BONITO_PCILO2_BASE) | + BONITO_PCIMAP_WIN(1, BONITO_PCILO1_BASE) | + BONITO_PCIMAP_WIN(0, 0); + + /* + * PCI-DMA to local mapping: [2G,2G+256M] -> [0M,256M] + */ + BONITO_PCIBASE0 = 0x80000000ul; /* base: 2G -> mmap: 0M */ + /* size: 256M, burst transmission, pre-fetch enable, 64bit */ + LOONGSON_PCI_HIT0_SEL_L = 0xc000000cul; + LOONGSON_PCI_HIT0_SEL_H = 0xfffffffful; + LOONGSON_PCI_HIT1_SEL_L = 0x00000006ul; /* set this BAR as invalid */ + LOONGSON_PCI_HIT1_SEL_H = 0x00000000ul; + LOONGSON_PCI_HIT2_SEL_L = 0x00000006ul; /* set this BAR as invalid */ + LOONGSON_PCI_HIT2_SEL_H = 0x00000000ul; + + /* avoid deadlock of PCI reading/writing lock operation */ + LOONGSON_PCI_ISR4C = 0xd2000001ul; + + /* can not change gnt to break pci transfer when device's gnt not + deassert for some broken device */ + LOONGSON_PXARB_CFG = 0x00fe0105ul; +} + +static int __init pcibios_init(void) +{ + setup_pcimap(); + + loongson_pci_controller.io_map_base = mips_io_port_base; + + register_pci_controller(&loongson_pci_controller); + + return 0; +} + +arch_initcall(pcibios_init); diff --git a/arch/mips/loongson/common/reset.c b/arch/mips/loongson/common/reset.c new file mode 100644 index 00000000000..97e918251ed --- /dev/null +++ b/arch/mips/loongson/common/reset.c @@ -0,0 +1,44 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * Copyright (C) 2009 Lemote, Inc. & Institute of Computing Technology + * Author: Zhangjin Wu, wuzj@lemote.com + */ +#include +#include + +#include + +#include + +static void loongson_restart(char *command) +{ + /* do preparation for reboot */ + mach_prepare_reboot(); + + /* reboot via jumping to boot base address */ + ((void (*)(void))ioremap_nocache(BONITO_BOOT_BASE, 4)) (); +} + +static void loongson_halt(void) +{ + mach_prepare_shutdown(); + while (1) + ; +} + +static int __init mips_reboot_setup(void) +{ + _machine_restart = loongson_restart; + _machine_halt = loongson_halt; + pm_power_off = loongson_halt; + + return 0; +} + +arch_initcall(mips_reboot_setup); diff --git a/arch/mips/loongson/common/setup.c b/arch/mips/loongson/common/setup.c new file mode 100644 index 00000000000..4cd2aa9a342 --- /dev/null +++ b/arch/mips/loongson/common/setup.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +#include + +#include + +#ifdef CONFIG_VT +#include +#include +#endif + +void (*__wbflush)(void); +EXPORT_SYMBOL(__wbflush); + +static void wbflush_loongson(void) +{ + asm(".set\tpush\n\t" + ".set\tnoreorder\n\t" + ".set mips3\n\t" + "sync\n\t" + "nop\n\t" + ".set\tpop\n\t" + ".set mips0\n\t"); +} + +void __init plat_mem_setup(void) +{ + __wbflush = wbflush_loongson; + +#ifdef CONFIG_VT +#if defined(CONFIG_VGA_CONSOLE) + conswitchp = &vga_con; + + screen_info = (struct screen_info) { + 0, 25, /* orig-x, orig-y */ + 0, /* unused */ + 0, /* orig-video-page */ + 0, /* orig-video-mode */ + 80, /* orig-video-cols */ + 0, 0, 0, /* ega_ax, ega_bx, ega_cx */ + 25, /* orig-video-lines */ + VIDEO_TYPE_VGAC, /* orig-video-isVGA */ + 16 /* orig-video-points */ + }; +#elif defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +#endif +#endif +} diff --git a/arch/mips/loongson/common/time.c b/arch/mips/loongson/common/time.c new file mode 100644 index 00000000000..b13d1717465 --- /dev/null +++ b/arch/mips/loongson/common/time.c @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology + * Author: Fuxin Zhang, zhangfx@lemote.com + * + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include + +#include + +void __init plat_time_init(void) +{ + /* setup mips r4k timer */ + mips_hpt_frequency = cpu_clock_freq / 2; +} + +unsigned long read_persistent_clock(void) +{ + return mc146818_get_cmos_time(); +} diff --git a/arch/mips/loongson/fuloong-2e/Makefile b/arch/mips/loongson/fuloong-2e/Makefile index feb1d6bba49..96e45c13c85 100644 --- a/arch/mips/loongson/fuloong-2e/Makefile +++ b/arch/mips/loongson/fuloong-2e/Makefile @@ -2,12 +2,6 @@ # Makefile for Lemote Fuloong2e mini-PC board. # -obj-y += setup.o init.o reset.o irq.o pci.o bonito-irq.o mem.o \ - env.o cmdline.o time.o machtype.o - -# -# Early printk support -# -obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-y += irq.o reset.o machtype.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/loongson/fuloong-2e/bonito-irq.c b/arch/mips/loongson/fuloong-2e/bonito-irq.c deleted file mode 100644 index 3e31e7ad713..00000000000 --- a/arch/mips/loongson/fuloong-2e/bonito-irq.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2001 MontaVista Software Inc. - * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net - * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org) - * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include - -#include - -static inline void bonito_irq_enable(unsigned int irq) -{ - BONITO_INTENSET = (1 << (irq - BONITO_IRQ_BASE)); - mmiowb(); -} - -static inline void bonito_irq_disable(unsigned int irq) -{ - BONITO_INTENCLR = (1 << (irq - BONITO_IRQ_BASE)); - mmiowb(); -} - -static struct irq_chip bonito_irq_type = { - .name = "bonito_irq", - .ack = bonito_irq_disable, - .mask = bonito_irq_disable, - .mask_ack = bonito_irq_disable, - .unmask = bonito_irq_enable, -}; - -static struct irqaction dma_timeout_irqaction = { - .handler = no_action, - .name = "dma_timeout", -}; - -void bonito_irq_init(void) -{ - u32 i; - - for (i = BONITO_IRQ_BASE; i < BONITO_IRQ_BASE + 32; i++) - set_irq_chip_and_handler(i, &bonito_irq_type, handle_level_irq); - - setup_irq(BONITO_IRQ_BASE + 10, &dma_timeout_irqaction); -} diff --git a/arch/mips/loongson/fuloong-2e/cmdline.c b/arch/mips/loongson/fuloong-2e/cmdline.c deleted file mode 100644 index 75f1b243ee4..00000000000 --- a/arch/mips/loongson/fuloong-2e/cmdline.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Based on Ocelot Linux port, which is - * Copyright 2001 MontaVista Software Inc. - * Author: jsun@mvista.com or jsun@junsun.net - * - * Copyright 2003 ICT CAS - * Author: Michael Guo - * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include - -#include - -int prom_argc; -/* pmon passes arguments in 32bit pointers */ -int *_prom_argv; - -void __init prom_init_cmdline(void) -{ - int i; - long l; - - /* firmware arguments are initialized in head.S */ - prom_argc = fw_arg0; - _prom_argv = (int *)fw_arg1; - - /* arg[0] is "g", the rest is boot parameters */ - arcs_cmdline[0] = '\0'; - for (i = 1; i < prom_argc; i++) { - l = (long)_prom_argv[i]; - if (strlen(arcs_cmdline) + strlen(((char *)l) + 1) - >= sizeof(arcs_cmdline)) - break; - strcat(arcs_cmdline, ((char *)l)); - strcat(arcs_cmdline, " "); - } - - if ((strstr(arcs_cmdline, "console=")) == NULL) - strcat(arcs_cmdline, " console=ttyS0,115200"); - if ((strstr(arcs_cmdline, "root=")) == NULL) - strcat(arcs_cmdline, " root=/dev/hda1"); -} diff --git a/arch/mips/loongson/fuloong-2e/early_printk.c b/arch/mips/loongson/fuloong-2e/early_printk.c deleted file mode 100644 index 3e0a6eaa404..00000000000 --- a/arch/mips/loongson/fuloong-2e/early_printk.c +++ /dev/null @@ -1,39 +0,0 @@ -/* early printk support - * - * Copyright (c) 2009 Philippe Vachon - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include - -#include - -#define UART_BASE (BONITO_PCIIO_BASE + 0x3f8) - -#define PORT(base, offset) (u8 *)(base + offset) - -static inline unsigned int serial_in(phys_addr_t base, int offset) -{ - return readb(PORT(base, offset)); -} - -static inline void serial_out(phys_addr_t base, int offset, int value) -{ - writeb(value, PORT(base, offset)); -} - -void prom_putchar(char c) -{ - phys_addr_t uart_base = - (phys_addr_t) ioremap_nocache(UART_BASE, 8); - - while ((serial_in(uart_base, UART_LSR) & UART_LSR_THRE) == 0) - ; - - serial_out(uart_base, UART_TX, c); -} diff --git a/arch/mips/loongson/fuloong-2e/env.c b/arch/mips/loongson/fuloong-2e/env.c deleted file mode 100644 index b9ef5038554..00000000000 --- a/arch/mips/loongson/fuloong-2e/env.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Based on Ocelot Linux port, which is - * Copyright 2001 MontaVista Software Inc. - * Author: jsun@mvista.com or jsun@junsun.net - * - * Copyright 2003 ICT CAS - * Author: Michael Guo - * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include - -#include - -unsigned long bus_clock, cpu_clock_freq; -unsigned long memsize, highmemsize; - -/* pmon passes arguments in 32bit pointers */ -int *_prom_envp; - -#define parse_even_earlier(res, option, p) \ -do { \ - if (strncmp(option, (char *)p, strlen(option)) == 0) \ - strict_strtol((char *)p + strlen(option"="), \ - 10, &res); \ -} while (0) - -void __init prom_init_env(void) -{ - long l; - - /* firmware arguments are initialized in head.S */ - _prom_envp = (int *)fw_arg2; - - l = (long)*_prom_envp; - while (l != 0) { - parse_even_earlier(bus_clock, "busclock", l); - parse_even_earlier(cpu_clock_freq, "cpuclock", l); - parse_even_earlier(memsize, "memsize", l); - parse_even_earlier(highmemsize, "highmemsize", l); - _prom_envp++; - l = (long)*_prom_envp; - } - if (memsize == 0) - memsize = 256; - - pr_info("busclock=%ld, cpuclock=%ld, memsize=%ld, highmemsize=%ld\n", - bus_clock, cpu_clock_freq, memsize, highmemsize); -} diff --git a/arch/mips/loongson/fuloong-2e/init.c b/arch/mips/loongson/fuloong-2e/init.c deleted file mode 100644 index 3abe927422a..00000000000 --- a/arch/mips/loongson/fuloong-2e/init.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include - -#include - -#include - -void __init prom_init(void) -{ - /* init base address of io space */ - set_io_port_base((unsigned long) - ioremap(BONITO_PCIIO_BASE, BONITO_PCIIO_SIZE)); - - prom_init_cmdline(); - prom_init_env(); - prom_init_memory(); -} - -void __init prom_free_prom_memory(void) -{ -} diff --git a/arch/mips/loongson/fuloong-2e/irq.c b/arch/mips/loongson/fuloong-2e/irq.c index 9585f5aa7cc..7888cf69424 100644 --- a/arch/mips/loongson/fuloong-2e/irq.c +++ b/arch/mips/loongson/fuloong-2e/irq.c @@ -7,39 +7,12 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ -#include #include #include #include #include -/* - * the first level int-handler will jump here if it is a bonito irq - */ -static void bonito_irqdispatch(void) -{ - u32 int_status; - int i; - - /* workaround the IO dma problem: let cpu looping to allow DMA finish */ - int_status = BONITO_INTISR; - if (int_status & (1 << 10)) { - while (int_status & (1 << 10)) { - udelay(1); - int_status = BONITO_INTISR; - } - } - - /* Get pending sources, masked by current enables */ - int_status = BONITO_INTISR & BONITO_INTEN; - - if (int_status != 0) { - i = __ffs(int_status); - int_status &= ~(1 << i); - do_IRQ(BONITO_IRQ_BASE + i); - } -} static void i8259_irqdispatch(void) { @@ -52,10 +25,8 @@ static void i8259_irqdispatch(void) spurious_interrupt(); } -asmlinkage void plat_irq_dispatch(void) +asmlinkage void mach_irq_dispatch(unsigned int pending) { - unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; - if (pending & CAUSEF_IP7) do_IRQ(MIPS_CPU_IRQ_BASE + 7); else if (pending & CAUSEF_IP6) /* perf counter loverflow */ @@ -73,26 +44,15 @@ static struct irqaction cascade_irqaction = { .name = "cascade", }; -void __init arch_init_irq(void) +void __init set_irq_trigger_mode(void) { - /* - * Clear all of the interrupts while we change the able around a bit. - * int-handler is not on bootstrap - */ - clear_c0_status(ST0_IM | ST0_BEV); - local_irq_disable(); - /* most bonito irq should be level triggered */ BONITO_INTEDGE = BONITO_ICU_SYSTEMERR | BONITO_ICU_MASTERERR | - BONITO_ICU_RETRYERR | BONITO_ICU_MBOXES; - BONITO_INTSTEER = 0; - - /* - * Mask out all interrupt by writing "1" to all bit position in - * the interrupt reset reg. - */ - BONITO_INTENCLR = ~0; + BONITO_ICU_RETRYERR | BONITO_ICU_MBOXES; +} +void __init mach_init_irq(void) +{ /* init all controller * 0-15 ------> i8259 interrupt * 16-23 ------> mips cpu interrupt diff --git a/arch/mips/loongson/fuloong-2e/machtype.c b/arch/mips/loongson/fuloong-2e/machtype.c deleted file mode 100644 index e03aa0de617..00000000000 --- a/arch/mips/loongson/fuloong-2e/machtype.c +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -const char *get_system_type(void) -{ - return "lemote-fuloong-2e-box"; -} - diff --git a/arch/mips/loongson/fuloong-2e/mem.c b/arch/mips/loongson/fuloong-2e/mem.c deleted file mode 100644 index 6a7feb178fa..00000000000 --- a/arch/mips/loongson/fuloong-2e/mem.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include -#include -#include - -#include - -#include - -void __init prom_init_memory(void) -{ - add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM); -#ifdef CONFIG_64BIT - if (highmemsize > 0) - add_memory_region(0x20000000, highmemsize << 20, BOOT_MEM_RAM); -#endif /* CONFIG_64BIT */ -} - -/* override of arch/mips/mm/cache.c: __uncached_access */ -int __uncached_access(struct file *file, unsigned long addr) -{ - if (file->f_flags & O_SYNC) - return 1; - - /* - * On the Lemote Loongson 2e system, the peripheral registers - * reside between 0x1000:0000 and 0x2000:0000. - */ - return addr >= __pa(high_memory) || - ((addr >= 0x10000000) && (addr < 0x20000000)); -} diff --git a/arch/mips/loongson/fuloong-2e/pci.c b/arch/mips/loongson/fuloong-2e/pci.c deleted file mode 100644 index 9812c30cc6e..00000000000 --- a/arch/mips/loongson/fuloong-2e/pci.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include - -#include -#include - -static struct resource loongson2e_pci_mem_resource = { - .name = "LOONGSON2E PCI MEM", - .start = LOONGSON2E_PCI_MEM_START, - .end = LOONGSON2E_PCI_MEM_END, - .flags = IORESOURCE_MEM, -}; - -static struct resource loongson2e_pci_io_resource = { - .name = "LOONGSON2E PCI IO MEM", - .start = LOONGSON2E_PCI_IO_START, - .end = IO_SPACE_LIMIT, - .flags = IORESOURCE_IO, -}; - -static struct pci_controller loongson2e_pci_controller = { - .pci_ops = &bonito64_pci_ops, - .io_resource = &loongson2e_pci_io_resource, - .mem_resource = &loongson2e_pci_mem_resource, - .mem_offset = 0x00000000UL, - .io_offset = 0x00000000UL, -}; - -static void __init setup_pcimap(void) -{ - /* - * local to PCI mapping for CPU accessing PCI space - * CPU address space [256M,448M] is window for accessing pci space - * we set pcimap_lo[0,1,2] to map it to pci space[0M,64M], [320M,448M] - * - * pcimap: PCI_MAP2 PCI_Mem_Lo2 PCI_Mem_Lo1 PCI_Mem_Lo0 - * [<2G] [384M,448M] [320M,384M] [0M,64M] - */ - BONITO_PCIMAP = BONITO_PCIMAP_PCIMAP_2 | - BONITO_PCIMAP_WIN(2, BONITO_PCILO2_BASE) | - BONITO_PCIMAP_WIN(1, BONITO_PCILO1_BASE) | - BONITO_PCIMAP_WIN(0, 0); - - /* - * PCI-DMA to local mapping: [2G,2G+256M] -> [0M,256M] - */ - BONITO_PCIBASE0 = 0x80000000ul; /* base: 2G -> mmap: 0M */ - /* size: 256M, burst transmission, pre-fetch enable, 64bit */ - LOONGSON_PCI_HIT0_SEL_L = 0xc000000cul; - LOONGSON_PCI_HIT0_SEL_H = 0xfffffffful; - LOONGSON_PCI_HIT1_SEL_L = 0x00000006ul; /* set this BAR as invalid */ - LOONGSON_PCI_HIT1_SEL_H = 0x00000000ul; - LOONGSON_PCI_HIT2_SEL_L = 0x00000006ul; /* set this BAR as invalid */ - LOONGSON_PCI_HIT2_SEL_H = 0x00000000ul; - - /* avoid deadlock of PCI reading/writing lock operation */ - LOONGSON_PCI_ISR4C = 0xd2000001ul; - - /* can not change gnt to break pci transfer when device's gnt not - deassert for some broken device */ - LOONGSON_PXARB_CFG = 0x00fe0105ul; -} - -static int __init pcibios_init(void) -{ - setup_pcimap(); - - loongson2e_pci_controller.io_map_base = mips_io_port_base; - - register_pci_controller(&loongson2e_pci_controller); - - return 0; -} - -arch_initcall(pcibios_init); diff --git a/arch/mips/loongson/fuloong-2e/reset.c b/arch/mips/loongson/fuloong-2e/reset.c index c21299af7f6..677fe186db9 100644 --- a/arch/mips/loongson/fuloong-2e/reset.c +++ b/arch/mips/loongson/fuloong-2e/reset.c @@ -1,44 +1,23 @@ -/* +/* Board-specific reboot/shutdown routines + * Copyright (c) 2009 Philippe Vachon + * + * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology + * Author: Wu Zhangjin, wuzj@lemote.com + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * - * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * Copyright (C) 2009 Lemote, Inc. & Institute of Computing Technology - * Author: Zhangjin Wu, wuzj@lemote.com */ -#include -#include - -#include #include -static void loongson2e_restart(char *command) +void mach_prepare_reboot(void) { - /* do preparation for reboot */ BONITO_BONGENCFG &= ~(1 << 2); BONITO_BONGENCFG |= (1 << 2); - - /* reboot via jumping to boot base address */ - ((void (*)(void))ioremap_nocache(BONITO_BOOT_BASE, 4)) (); } -static void loongson2e_halt(void) +void mach_prepare_shutdown(void) { - while (1) - ; } - -static int __init mips_reboot_setup(void) -{ - _machine_restart = loongson2e_restart; - _machine_halt = loongson2e_halt; - pm_power_off = loongson2e_halt; - - return 0; -} - -arch_initcall(mips_reboot_setup); diff --git a/arch/mips/loongson/fuloong-2e/time.c b/arch/mips/loongson/fuloong-2e/time.c deleted file mode 100644 index b13d1717465..00000000000 --- a/arch/mips/loongson/fuloong-2e/time.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - * - * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology - * Author: Wu Zhangjin, wuzj@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include -#include - -#include - -void __init plat_time_init(void) -{ - /* setup mips r4k timer */ - mips_hpt_frequency = cpu_clock_freq / 2; -} - -unsigned long read_persistent_clock(void) -{ - return mc146818_get_cmos_time(); -} -- cgit v1.2.3-70-g09d2 From 3209e70e5ed1821be8d9b87fe9e8bd6cffa4b4c7 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Thu, 2 Jul 2009 23:27:12 +0800 Subject: MIPS: Loongson: Add a machtype kernel command line argument The difference between some loongson-based machines is very small, so, if there is no necessary to add new kernel config options to cope with this difference, it will be better to share the same kernel image file between them, benefit from this, the linux distribution developers only have a need to compile the kernel one time. This machtype kernel command line argument will be used later to share the same kernel image file between two different machines(menglong & yeeloong) made by lemote. Thanks very much to Zhang Le for cleaning up the machtype implementation. Signed-off-by: Wu Zhangjin Signed-off-by: Ralf Baechle --- Documentation/kernel-parameters.txt | 4 +++ arch/mips/include/asm/bootinfo.h | 12 +++++++++ arch/mips/include/asm/mach-loongson/machine.h | 2 +- arch/mips/loongson/common/Makefile | 2 +- arch/mips/loongson/common/machtype.c | 35 ++++++++++++++++++++++++++- arch/mips/loongson/fuloong-2e/Makefile | 2 +- 6 files changed, 53 insertions(+), 4 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4c12a290bee..f45d0d8e71d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1286,6 +1286,10 @@ and is between 256 and 4096 characters. It is defined in the file (machvec) in a generic kernel. Example: machvec=hpzx1_swiotlb + machtype= [Loongson] Share the same kernel image file between different + yeeloong laptop. + Example: machtype=lemote-yeeloong-2f-7inch + max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater than or equal to this physical address is ignored. diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h index 610fe3af7a0..f5dfaf6a160 100644 --- a/arch/mips/include/asm/bootinfo.h +++ b/arch/mips/include/asm/bootinfo.h @@ -7,6 +7,7 @@ * Copyright (C) 1995, 1996 Andreas Busse * Copyright (C) 1995, 1996 Stoned Elipot * Copyright (C) 1995, 1996 Paul M. Antoine. + * Copyright (C) 2009 Zhang Le */ #ifndef _ASM_BOOTINFO_H #define _ASM_BOOTINFO_H @@ -57,6 +58,17 @@ #define MACH_MIKROTIK_RB532 0 /* Mikrotik RouterBoard 532 */ #define MACH_MIKROTIK_RB532A 1 /* Mikrotik RouterBoard 532A */ +/* + * Valid machtype for Loongson family + */ +#define MACH_LOONGSON_UNKNOWN 0 +#define MACH_LEMOTE_FL2E 1 +#define MACH_LEMOTE_FL2F 2 +#define MACH_LEMOTE_ML2F7 3 +#define MACH_LEMOTE_YL2F89 4 +#define MACH_DEXXON_GDIUM2F10 5 +#define MACH_LOONGSON_END 6 + #define CL_SIZE COMMAND_LINE_SIZE extern char *system_type; diff --git a/arch/mips/include/asm/mach-loongson/machine.h b/arch/mips/include/asm/mach-loongson/machine.h index 8e60d363594..206ea206791 100644 --- a/arch/mips/include/asm/mach-loongson/machine.h +++ b/arch/mips/include/asm/mach-loongson/machine.h @@ -15,7 +15,7 @@ #define LOONGSON_UART_BASE (BONITO_PCIIO_BASE + 0x3f8) -#define LOONGSON_MACHNAME "lemote-fuloong-2e-box" +#define LOONGSON_MACHTYPE MACH_LEMOTE_FL2E #endif diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson/common/Makefile index 4e3889dec39..656b3cc0a2a 100644 --- a/arch/mips/loongson/common/Makefile +++ b/arch/mips/loongson/common/Makefile @@ -3,7 +3,7 @@ # obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \ - pci.o bonito-irq.o mem.o + pci.o bonito-irq.o mem.o machtype.o # # Early printk support diff --git a/arch/mips/loongson/common/machtype.c b/arch/mips/loongson/common/machtype.c index 845b3fb47e0..7b348248de7 100644 --- a/arch/mips/loongson/common/machtype.c +++ b/arch/mips/loongson/common/machtype.c @@ -2,16 +2,49 @@ * Copyright (C) 2009 Lemote Inc. & Insititute of Computing Technology * Author: Wu Zhangjin, wuzj@lemote.com * + * Copyright (c) 2009 Zhang Le + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ +#include +#include +#include #include +static const char *system_types[] = { + [MACH_LOONGSON_UNKNOWN] "unknown loongson machine", + [MACH_LEMOTE_FL2E] "lemote-fuloong-2e-box", + [MACH_LEMOTE_FL2F] "lemote-fuloong-2f-box", + [MACH_LEMOTE_ML2F7] "lemote-mengloong-2f-7inches", + [MACH_LEMOTE_YL2F89] "lemote-yeeloong-2f-8.9inches", + [MACH_DEXXON_GDIUM2F10] "dexxon-gidum-2f-10inches", + [MACH_LOONGSON_END] NULL, +}; + const char *get_system_type(void) { - return LOONGSON_MACHNAME; + if (mips_machtype == MACH_UNKNOWN) + mips_machtype = LOONGSON_MACHTYPE; + + return system_types[mips_machtype]; } +static __init int machtype_setup(char *str) +{ + int machtype = MACH_LEMOTE_FL2E; + + if (!str) + return -EINVAL; + + for (; system_types[machtype]; machtype++) + if (strstr(system_types[machtype], str)) { + mips_machtype = machtype; + break; + } + return 0; +} +__setup("machtype=", machtype_setup); diff --git a/arch/mips/loongson/fuloong-2e/Makefile b/arch/mips/loongson/fuloong-2e/Makefile index 96e45c13c85..3aba5fcc09d 100644 --- a/arch/mips/loongson/fuloong-2e/Makefile +++ b/arch/mips/loongson/fuloong-2e/Makefile @@ -2,6 +2,6 @@ # Makefile for Lemote Fuloong2e mini-PC board. # -obj-y += irq.o reset.o machtype.o +obj-y += irq.o reset.o EXTRA_CFLAGS += -Werror -- cgit v1.2.3-70-g09d2 From 24ffce18a4b6b5e9769200582c09df7ff044259f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 4 Aug 2009 16:54:04 +0200 Subject: MIPS: Convert to asm-generic/hardirq.h Signed-off-by: Christoph Hellwig Signed-off-by: Ralf Baechle --- arch/mips/include/asm/hardirq.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/hardirq.h b/arch/mips/include/asm/hardirq.h index 90bf399e6dd..c977a86c2c6 100644 --- a/arch/mips/include/asm/hardirq.h +++ b/arch/mips/include/asm/hardirq.h @@ -10,15 +10,9 @@ #ifndef _ASM_HARDIRQ_H #define _ASM_HARDIRQ_H -#include -#include - -typedef struct { - unsigned int __softirq_pending; -} ____cacheline_aligned irq_cpustat_t; - -#include /* Standard mappings for irq_cpustat_t above */ - extern void ack_bad_irq(unsigned int irq); +#define ack_bad_irq ack_bad_irq + +#include #endif /* _ASM_HARDIRQ_H */ -- cgit v1.2.3-70-g09d2 From f4c6b6bc5a4fc8d607f2d89369008c85a3a12a8b Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 17 Sep 2009 02:25:05 +0200 Subject: MIPS: Consolidate all CONFIG_CPU_HAS_LLSC use in a single C file. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/system.h | 12 ++++++++++++ arch/mips/kernel/octeon_switch.S | 3 --- arch/mips/kernel/r2300_switch.S | 3 --- arch/mips/kernel/r4k_switch.S | 3 --- 4 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/system.h b/arch/mips/include/asm/system.h index cd30f83235b..a2e9239b45a 100644 --- a/arch/mips/include/asm/system.h +++ b/arch/mips/include/asm/system.h @@ -63,11 +63,23 @@ do { \ #define __mips_mt_fpaff_switch_to(prev) do { (void) (prev); } while (0) #endif +#ifdef CONFIG_CPU_HAS_LLSC +#define __clear_software_ll_bit() do { } while (0) +#else +extern unsigned long ll_bit; + +#define __clear_software_ll_bit() \ +do { \ + ll_bit = 0; \ +} while (0) +#endif + #define switch_to(prev, next, last) \ do { \ __mips_mt_fpaff_switch_to(prev); \ if (cpu_has_dsp) \ __save_dsp(prev); \ + __clear_software_ll_bit(); \ (last) = resume(prev, next, task_thread_info(next)); \ } while (0) diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S index d52389672b0..3952b8323ef 100644 --- a/arch/mips/kernel/octeon_switch.S +++ b/arch/mips/kernel/octeon_switch.S @@ -36,9 +36,6 @@ .align 7 LEAF(resume) .set arch=octeon -#ifndef CONFIG_CPU_HAS_LLSC - sw zero, ll_bit -#endif mfc0 t1, CP0_STATUS LONG_S t1, THREAD_STATUS(a0) cpu_save_nonscratch a0 diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S index 656bde2e11b..698414b7a25 100644 --- a/arch/mips/kernel/r2300_switch.S +++ b/arch/mips/kernel/r2300_switch.S @@ -46,9 +46,6 @@ * struct thread_info *next_ti) ) */ LEAF(resume) -#ifndef CONFIG_CPU_HAS_LLSC - sw zero, ll_bit -#endif mfc0 t1, CP0_STATUS sw t1, THREAD_STATUS(a0) cpu_save_nonscratch a0 diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S index d9bfae53c43..8893ee1a236 100644 --- a/arch/mips/kernel/r4k_switch.S +++ b/arch/mips/kernel/r4k_switch.S @@ -45,9 +45,6 @@ */ .align 5 LEAF(resume) -#ifndef CONFIG_CPU_HAS_LLSC - sw zero, ll_bit -#endif mfc0 t1, CP0_STATUS LONG_S t1, THREAD_STATUS(a0) cpu_save_nonscratch a0 -- cgit v1.2.3-70-g09d2 From f1e39a4a616cd9981a9decfd5332fd07a01abb8b Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 17 Sep 2009 02:25:05 +0200 Subject: MIPS: Rewrite sysmips(MIPS_ATOMIC_SET, ...) in C with inline assembler This way it doesn't have to use CONFIG_CPU_HAS_LLSC anymore. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/system.h | 3 ++ arch/mips/kernel/scall32-o32.S | 72 -------------------------- arch/mips/kernel/scall64-64.S | 72 -------------------------- arch/mips/kernel/syscall.c | 112 +++++++++++++++++++++++++++++++++++++++-- arch/mips/kernel/traps.c | 5 +- 5 files changed, 114 insertions(+), 150 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/system.h b/arch/mips/include/asm/system.h index a2e9239b45a..23f68b40d4b 100644 --- a/arch/mips/include/asm/system.h +++ b/arch/mips/include/asm/system.h @@ -32,6 +32,9 @@ extern asmlinkage void *resume(void *last, void *next, void *next_ti); struct task_struct; +extern unsigned int ll_bit; +extern struct task_struct *ll_task; + #ifdef CONFIG_MIPS_MT_FPAFF /* diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index b5708212353..7c2de4f091c 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -187,78 +187,6 @@ illegal_syscall: j o32_syscall_exit END(handle_sys) - LEAF(mips_atomic_set) - andi v0, a1, 3 # must be word aligned - bnez v0, bad_alignment - - lw v1, TI_ADDR_LIMIT($28) # in legal address range? - addiu a0, a1, 4 - or a0, a0, a1 - and a0, a0, v1 - bltz a0, bad_address - -#ifdef CONFIG_CPU_HAS_LLSC - /* Ok, this is the ll/sc case. World is sane :-) */ -1: ll v0, (a1) - move a0, a2 -2: sc a0, (a1) -#if R10000_LLSC_WAR - beqzl a0, 1b -#else - beqz a0, 1b -#endif - - .section __ex_table,"a" - PTR 1b, bad_stack - PTR 2b, bad_stack - .previous -#else - sw a1, 16(sp) - sw a2, 20(sp) - - move a0, sp - move a2, a1 - li a1, 1 - jal do_page_fault - - lw a1, 16(sp) - lw a2, 20(sp) - - /* - * At this point the page should be readable and writable unless - * there was no more memory available. - */ -1: lw v0, (a1) -2: sw a2, (a1) - - .section __ex_table,"a" - PTR 1b, no_mem - PTR 2b, no_mem - .previous -#endif - - sw zero, PT_R7(sp) # success - sw v0, PT_R2(sp) # result - - j o32_syscall_exit # continue like a normal syscall - -no_mem: li v0, -ENOMEM - jr ra - -bad_address: - li v0, -EFAULT - jr ra - -bad_alignment: - li v0, -EINVAL - jr ra - END(mips_atomic_set) - - LEAF(sys_sysmips) - beq a0, MIPS_ATOMIC_SET, mips_atomic_set - j _sys_sysmips - END(sys_sysmips) - LEAF(sys_syscall) subu t0, a0, __NR_O32_Linux # check syscall number sltiu v0, t0, __NR_O32_Linux_syscalls + 1 diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 3d866f24e06..b97b993846d 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -124,78 +124,6 @@ illegal_syscall: j n64_syscall_exit END(handle_sys64) - LEAF(mips_atomic_set) - andi v0, a1, 3 # must be word aligned - bnez v0, bad_alignment - - LONG_L v1, TI_ADDR_LIMIT($28) # in legal address range? - LONG_ADDIU a0, a1, 4 - or a0, a0, a1 - and a0, a0, v1 - bltz a0, bad_address - -#ifdef CONFIG_CPU_HAS_LLSC - /* Ok, this is the ll/sc case. World is sane :-) */ -1: ll v0, (a1) - move a0, a2 -2: sc a0, (a1) -#if R10000_LLSC_WAR - beqzl a0, 1b -#else - beqz a0, 1b -#endif - - .section __ex_table,"a" - PTR 1b, bad_stack - PTR 2b, bad_stack - .previous -#else - sw a1, 16(sp) - sw a2, 20(sp) - - move a0, sp - move a2, a1 - li a1, 1 - jal do_page_fault - - lw a1, 16(sp) - lw a2, 20(sp) - - /* - * At this point the page should be readable and writable unless - * there was no more memory available. - */ -1: lw v0, (a1) -2: sw a2, (a1) - - .section __ex_table,"a" - PTR 1b, no_mem - PTR 2b, no_mem - .previous -#endif - - sd zero, PT_R7(sp) # success - sd v0, PT_R2(sp) # result - - j n64_syscall_exit # continue like a normal syscall - -no_mem: li v0, -ENOMEM - jr ra - -bad_address: - li v0, -EFAULT - jr ra - -bad_alignment: - li v0, -EINVAL - jr ra - END(mips_atomic_set) - - LEAF(sys_sysmips) - beq a0, MIPS_ATOMIC_SET, mips_atomic_set - j _sys_sysmips - END(sys_sysmips) - .align 3 sys_call_table: PTR sys_read /* 5000 */ diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 8cf38464404..3fe1fcfa2e7 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -28,7 +28,9 @@ #include #include #include +#include +#include #include #include #include @@ -290,12 +292,116 @@ SYSCALL_DEFINE1(set_thread_area, unsigned long, addr) return 0; } -asmlinkage int _sys_sysmips(long cmd, long arg1, long arg2, long arg3) +static inline int mips_atomic_set(struct pt_regs *regs, + unsigned long addr, unsigned long new) { + unsigned long old, tmp; + unsigned int err; + + if (unlikely(addr & 3)) + return -EINVAL; + + if (unlikely(!access_ok(VERIFY_WRITE, addr, 4))) + return -EINVAL; + + if (cpu_has_llsc && R10000_LLSC_WAR) { + __asm__ __volatile__ ( + " li %[err], 0 \n" + "1: ll %[old], (%[addr]) \n" + " move %[tmp], %[new] \n" + "2: sc %[tmp], (%[addr]) \n" + " beqzl %[tmp], 1b \n" + "3: \n" + " .section .fixup,\"ax\" \n" + "4: li %[err], %[efault] \n" + " j 3b \n" + " .previous \n" + " .section __ex_table,\"a\" \n" + " "STR(PTR)" 1b, 4b \n" + " "STR(PTR)" 2b, 4b \n" + " .previous \n" + : [old] "=&r" (old), + [err] "=&r" (err), + [tmp] "=&r" (tmp) + : [addr] "r" (addr), + [new] "r" (new), + [efault] "i" (-EFAULT) + : "memory"); + } else if (cpu_has_llsc) { + __asm__ __volatile__ ( + " li %[err], 0 \n" + "1: ll %[old], (%[addr]) \n" + " move %[tmp], %[new] \n" + "2: sc %[tmp], (%[addr]) \n" + " bnez %[tmp], 4f \n" + "3: \n" + " .subsection 2 \n" + "4: b 1b \n" + " .previous \n" + " \n" + " .section .fixup,\"ax\" \n" + "5: li %[err], %[efault] \n" + " j 3b \n" + " .previous \n" + " .section __ex_table,\"a\" \n" + " "STR(PTR)" 1b, 5b \n" + " "STR(PTR)" 2b, 5b \n" + " .previous \n" + : [old] "=&r" (old), + [err] "=&r" (err), + [tmp] "=&r" (tmp) + : [addr] "r" (addr), + [new] "r" (new), + [efault] "i" (-EFAULT) + : "memory"); + } else { + do { + preempt_disable(); + ll_bit = 1; + ll_task = current; + preempt_enable(); + + err = __get_user(old, (unsigned int *) addr); + err |= __put_user(new, (unsigned int *) addr); + if (err) + break; + rmb(); + } while (!ll_bit); + } + + if (unlikely(err)) + return err; + + regs->regs[2] = old; + regs->regs[7] = 0; /* No error */ + + /* + * Don't let your children do this ... + */ + __asm__ __volatile__( + " move $29, %0 \n" + " j syscall_exit \n" + : /* no outputs */ + : "r" (regs)); + + /* unreached. Honestly. */ + while (1); +} + +save_static_function(sys_sysmips); +static int __used noinline +_sys_sysmips(nabi_no_regargs struct pt_regs regs) +{ + long cmd, arg1, arg2, arg3; + + cmd = regs.regs[4]; + arg1 = regs.regs[5]; + arg2 = regs.regs[6]; + arg3 = regs.regs[7]; + switch (cmd) { case MIPS_ATOMIC_SET: - printk(KERN_CRIT "How did I get here?\n"); - return -EINVAL; + return mips_atomic_set(®s, arg1, arg2); case MIPS_FIXADE: if (arg1 & ~3) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 08f1edf355e..0a18b4c62af 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -466,9 +466,8 @@ asmlinkage void do_be(struct pt_regs *regs) * The ll_bit is cleared by r*_switch.S */ -unsigned long ll_bit; - -static struct task_struct *ll_task = NULL; +unsigned int ll_bit; +struct task_struct *ll_task; static inline int simulate_ll(struct pt_regs *regs, unsigned int opcode) { -- cgit v1.2.3-70-g09d2 From 43e6ae6d9f08304682294c14c6b7f2b2441668e7 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 17 Sep 2009 02:25:05 +0200 Subject: MIPS: Rewrite clearing of ll_bit on context switch in C This also means there is now only one implementation not 3 left. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/system.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/system.h b/arch/mips/include/asm/system.h index 23f68b40d4b..cc7262ff076 100644 --- a/arch/mips/include/asm/system.h +++ b/arch/mips/include/asm/system.h @@ -66,16 +66,11 @@ do { \ #define __mips_mt_fpaff_switch_to(prev) do { (void) (prev); } while (0) #endif -#ifdef CONFIG_CPU_HAS_LLSC -#define __clear_software_ll_bit() do { } while (0) -#else -extern unsigned long ll_bit; - #define __clear_software_ll_bit() \ do { \ - ll_bit = 0; \ + if (!__builtin_constant_p(cpu_has_llsc) || !cpu_has_llsc) \ + ll_bit = 0; \ } while (0) -#endif #define switch_to(prev, next, last) \ do { \ -- cgit v1.2.3-70-g09d2 From b8d6f78cd058e34ec706f7cb353fdb2eb743c050 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 17 Sep 2009 02:25:06 +0200 Subject: MIPS: Malta: Remove pointless use use of CONFIG_CPU_HAS_LLSC All CPUs for Malta support LL/SC. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-malta/cpu-feature-overrides.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/mach-malta/cpu-feature-overrides.h b/arch/mips/include/asm/mach-malta/cpu-feature-overrides.h index 7f3e3f9bd23..2848cea42bc 100644 --- a/arch/mips/include/asm/mach-malta/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-malta/cpu-feature-overrides.h @@ -28,11 +28,7 @@ /* #define cpu_has_prefetch ? */ #define cpu_has_mcheck 1 /* #define cpu_has_ejtag ? */ -#ifdef CONFIG_CPU_HAS_LLSC #define cpu_has_llsc 1 -#else -#define cpu_has_llsc 0 -#endif /* #define cpu_has_vtag_icache ? */ /* #define cpu_has_dc_aliases ? */ /* #define cpu_has_ic_fills_f_dc ? */ -- cgit v1.2.3-70-g09d2 From b791d1193af9772040e592d5aa161790f800b762 Mon Sep 17 00:00:00 2001 From: David Daney Date: Mon, 13 Jul 2009 11:15:19 -0700 Subject: MIPS: Allow kernel use of LL/SC to be separate from the presence of LL/SC. On some CPUs, it is more efficient to disable and enable interrupts in the kernel rather than use ll/sc for atomic operations. But if we were to set cpu_has_llsc to false, we would break the userspace futex interface (in asm/futex.h). We separate the two concepts, with a new predicate kernel_uses_llsc, that lets us disable the kernel's use of ll/sc while still allowing the futex code to use it. Also there were a couple of cases in bitops.h where we were using ll/sc unconditionally even if cpu_has_llsc were false. Signed-off-by: David Daney Signed-off-by: Ralf Baechle --- arch/mips/include/asm/atomic.h | 40 ++++++++++++++++++------------------ arch/mips/include/asm/bitops.h | 34 +++++++++++++++--------------- arch/mips/include/asm/cmpxchg.h | 4 ++-- arch/mips/include/asm/cpu-features.h | 3 +++ arch/mips/include/asm/local.h | 8 ++++---- arch/mips/include/asm/system.h | 8 ++++---- 6 files changed, 50 insertions(+), 47 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index eb7f01cfd1a..dd75d673447 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -49,7 +49,7 @@ */ static __inline__ void atomic_add(int i, atomic_t * v) { - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { int temp; __asm__ __volatile__( @@ -61,7 +61,7 @@ static __inline__ void atomic_add(int i, atomic_t * v) " .set mips0 \n" : "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter)); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { int temp; __asm__ __volatile__( @@ -94,7 +94,7 @@ static __inline__ void atomic_add(int i, atomic_t * v) */ static __inline__ void atomic_sub(int i, atomic_t * v) { - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { int temp; __asm__ __volatile__( @@ -106,7 +106,7 @@ static __inline__ void atomic_sub(int i, atomic_t * v) " .set mips0 \n" : "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter)); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { int temp; __asm__ __volatile__( @@ -139,7 +139,7 @@ static __inline__ int atomic_add_return(int i, atomic_t * v) smp_llsc_mb(); - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { int temp; __asm__ __volatile__( @@ -153,7 +153,7 @@ static __inline__ int atomic_add_return(int i, atomic_t * v) : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { int temp; __asm__ __volatile__( @@ -191,7 +191,7 @@ static __inline__ int atomic_sub_return(int i, atomic_t * v) smp_llsc_mb(); - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { int temp; __asm__ __volatile__( @@ -205,7 +205,7 @@ static __inline__ int atomic_sub_return(int i, atomic_t * v) : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { int temp; __asm__ __volatile__( @@ -251,7 +251,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) smp_llsc_mb(); - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { int temp; __asm__ __volatile__( @@ -269,7 +269,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { int temp; __asm__ __volatile__( @@ -428,7 +428,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) */ static __inline__ void atomic64_add(long i, atomic64_t * v) { - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { long temp; __asm__ __volatile__( @@ -440,7 +440,7 @@ static __inline__ void atomic64_add(long i, atomic64_t * v) " .set mips0 \n" : "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter)); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { long temp; __asm__ __volatile__( @@ -473,7 +473,7 @@ static __inline__ void atomic64_add(long i, atomic64_t * v) */ static __inline__ void atomic64_sub(long i, atomic64_t * v) { - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { long temp; __asm__ __volatile__( @@ -485,7 +485,7 @@ static __inline__ void atomic64_sub(long i, atomic64_t * v) " .set mips0 \n" : "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter)); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { long temp; __asm__ __volatile__( @@ -518,7 +518,7 @@ static __inline__ long atomic64_add_return(long i, atomic64_t * v) smp_llsc_mb(); - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { long temp; __asm__ __volatile__( @@ -532,7 +532,7 @@ static __inline__ long atomic64_add_return(long i, atomic64_t * v) : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { long temp; __asm__ __volatile__( @@ -570,7 +570,7 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v) smp_llsc_mb(); - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { long temp; __asm__ __volatile__( @@ -584,7 +584,7 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v) : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { long temp; __asm__ __volatile__( @@ -630,7 +630,7 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) smp_llsc_mb(); - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { long temp; __asm__ __volatile__( @@ -648,7 +648,7 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { long temp; __asm__ __volatile__( diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index b1e9e97a9c7..84a383806b2 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -61,7 +61,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) unsigned short bit = nr & SZLONG_MASK; unsigned long temp; - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { __asm__ __volatile__( " .set mips3 \n" "1: " __LL "%0, %1 # set_bit \n" @@ -72,7 +72,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "=m" (*m) : "ir" (1UL << bit), "m" (*m)); #ifdef CONFIG_CPU_MIPSR2 - } else if (__builtin_constant_p(bit)) { + } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { __asm__ __volatile__( "1: " __LL "%0, %1 # set_bit \n" " " __INS "%0, %4, %2, 1 \n" @@ -84,7 +84,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "=m" (*m) : "ir" (bit), "m" (*m), "r" (~0)); #endif /* CONFIG_CPU_MIPSR2 */ - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { __asm__ __volatile__( " .set mips3 \n" "1: " __LL "%0, %1 # set_bit \n" @@ -126,7 +126,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) unsigned short bit = nr & SZLONG_MASK; unsigned long temp; - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { __asm__ __volatile__( " .set mips3 \n" "1: " __LL "%0, %1 # clear_bit \n" @@ -137,7 +137,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "=m" (*m) : "ir" (~(1UL << bit)), "m" (*m)); #ifdef CONFIG_CPU_MIPSR2 - } else if (__builtin_constant_p(bit)) { + } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { __asm__ __volatile__( "1: " __LL "%0, %1 # clear_bit \n" " " __INS "%0, $0, %2, 1 \n" @@ -149,7 +149,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "=m" (*m) : "ir" (bit), "m" (*m)); #endif /* CONFIG_CPU_MIPSR2 */ - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { __asm__ __volatile__( " .set mips3 \n" "1: " __LL "%0, %1 # clear_bit \n" @@ -202,7 +202,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) { unsigned short bit = nr & SZLONG_MASK; - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; @@ -215,7 +215,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) " .set mips0 \n" : "=&r" (temp), "=m" (*m) : "ir" (1UL << bit), "m" (*m)); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; @@ -260,7 +260,7 @@ static inline int test_and_set_bit(unsigned long nr, smp_llsc_mb(); - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; @@ -275,7 +275,7 @@ static inline int test_and_set_bit(unsigned long nr, : "=&r" (temp), "=m" (*m), "=&r" (res) : "r" (1UL << bit), "m" (*m) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; @@ -328,7 +328,7 @@ static inline int test_and_set_bit_lock(unsigned long nr, unsigned short bit = nr & SZLONG_MASK; unsigned long res; - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; @@ -343,7 +343,7 @@ static inline int test_and_set_bit_lock(unsigned long nr, : "=&r" (temp), "=m" (*m), "=&r" (res) : "r" (1UL << bit), "m" (*m) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; @@ -397,7 +397,7 @@ static inline int test_and_clear_bit(unsigned long nr, smp_llsc_mb(); - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; @@ -414,7 +414,7 @@ static inline int test_and_clear_bit(unsigned long nr, : "r" (1UL << bit), "m" (*m) : "memory"); #ifdef CONFIG_CPU_MIPSR2 - } else if (__builtin_constant_p(nr)) { + } else if (kernel_uses_llsc && __builtin_constant_p(nr)) { unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; @@ -431,7 +431,7 @@ static inline int test_and_clear_bit(unsigned long nr, : "ir" (bit), "m" (*m) : "memory"); #endif - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; @@ -487,7 +487,7 @@ static inline int test_and_change_bit(unsigned long nr, smp_llsc_mb(); - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; @@ -502,7 +502,7 @@ static inline int test_and_change_bit(unsigned long nr, : "=&r" (temp), "=m" (*m), "=&r" (res) : "r" (1UL << bit), "m" (*m) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 4a812c3ceb9..815a438a268 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -16,7 +16,7 @@ ({ \ __typeof(*(m)) __ret; \ \ - if (cpu_has_llsc && R10000_LLSC_WAR) { \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ __asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ @@ -33,7 +33,7 @@ : "=&r" (__ret), "=R" (*m) \ : "R" (*m), "Jr" (old), "Jr" (new) \ : "memory"); \ - } else if (cpu_has_llsc) { \ + } else if (kernel_uses_llsc) { \ __asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 8ab1d12ba7f..1f4df647c38 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -80,6 +80,9 @@ #ifndef cpu_has_llsc #define cpu_has_llsc (cpu_data[0].options & MIPS_CPU_LLSC) #endif +#ifndef kernel_uses_llsc +#define kernel_uses_llsc cpu_has_llsc +#endif #ifndef cpu_has_mips16 #define cpu_has_mips16 (cpu_data[0].ases & MIPS_ASE_MIPS16) #endif diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h index f96fd59e084..361f4f16c30 100644 --- a/arch/mips/include/asm/local.h +++ b/arch/mips/include/asm/local.h @@ -29,7 +29,7 @@ static __inline__ long local_add_return(long i, local_t * l) { unsigned long result; - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { unsigned long temp; __asm__ __volatile__( @@ -43,7 +43,7 @@ static __inline__ long local_add_return(long i, local_t * l) : "=&r" (result), "=&r" (temp), "=m" (l->a.counter) : "Ir" (i), "m" (l->a.counter) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { unsigned long temp; __asm__ __volatile__( @@ -74,7 +74,7 @@ static __inline__ long local_sub_return(long i, local_t * l) { unsigned long result; - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { unsigned long temp; __asm__ __volatile__( @@ -88,7 +88,7 @@ static __inline__ long local_sub_return(long i, local_t * l) : "=&r" (result), "=&r" (temp), "=m" (l->a.counter) : "Ir" (i), "m" (l->a.counter) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { unsigned long temp; __asm__ __volatile__( diff --git a/arch/mips/include/asm/system.h b/arch/mips/include/asm/system.h index cc7262ff076..fcf5f98d90c 100644 --- a/arch/mips/include/asm/system.h +++ b/arch/mips/include/asm/system.h @@ -94,7 +94,7 @@ static inline unsigned long __xchg_u32(volatile int * m, unsigned int val) { __u32 retval; - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { unsigned long dummy; __asm__ __volatile__( @@ -109,7 +109,7 @@ static inline unsigned long __xchg_u32(volatile int * m, unsigned int val) : "=&r" (retval), "=m" (*m), "=&r" (dummy) : "R" (*m), "Jr" (val) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { unsigned long dummy; __asm__ __volatile__( @@ -146,7 +146,7 @@ static inline __u64 __xchg_u64(volatile __u64 * m, __u64 val) { __u64 retval; - if (cpu_has_llsc && R10000_LLSC_WAR) { + if (kernel_uses_llsc && R10000_LLSC_WAR) { unsigned long dummy; __asm__ __volatile__( @@ -159,7 +159,7 @@ static inline __u64 __xchg_u64(volatile __u64 * m, __u64 val) : "=&r" (retval), "=m" (*m), "=&r" (dummy) : "R" (*m), "Jr" (val) : "memory"); - } else if (cpu_has_llsc) { + } else if (kernel_uses_llsc) { unsigned long dummy; __asm__ __volatile__( -- cgit v1.2.3-70-g09d2 From 9d24bafb0d1ecf636f71a56f9d6f071f5c7a882d Mon Sep 17 00:00:00 2001 From: David Daney Date: Mon, 13 Jul 2009 11:15:20 -0700 Subject: MIPS: Octeon: Set kernel_uses_llsc to false on non-SMP builds. Signed-off-by: David Daney Signed-off-by: Ralf Baechle --- .../include/asm/mach-cavium-octeon/cpu-feature-overrides.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h index 3d830756b13..425e708d4fb 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h @@ -31,12 +31,16 @@ #define cpu_has_cache_cdex_s 0 #define cpu_has_prefetch 1 +#define cpu_has_llsc 1 /* - * We should disable LL/SC on non SMP systems as it is faster to - * disable interrupts for atomic access than a LL/SC. Unfortunatly we - * cannot as this breaks asm/futex.h + * We Disable LL/SC on non SMP systems as it is faster to disable + * interrupts for atomic access than a LL/SC. */ -#define cpu_has_llsc 1 +#ifdef CONFIG_SMP +# define kernel_uses_llsc 1 +#else +# define kernel_uses_llsc 0 +#endif #define cpu_has_vtag_icache 1 #define cpu_has_dc_aliases 0 #define cpu_has_ic_fills_f_dc 0 -- cgit v1.2.3-70-g09d2 From e26449153c386904d2801d6348d66d00e5ba2211 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 20 Aug 2009 14:10:22 -0700 Subject: MIPS: Octeon: Add hardware RNG platform device. Add a platform device for the Octeon Random Number Generator (RNG). Signed-off-by: David Daney Acked-by: Herbert Xu Signed-off-by: Ralf Baechle --- arch/mips/cavium-octeon/setup.c | 43 ++++++++++++++ arch/mips/include/asm/octeon/cvmx-rnm-defs.h | 88 ++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 arch/mips/include/asm/octeon/cvmx-rnm-defs.h (limited to 'arch/mips/include/asm') diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index da559249cc2..468a1209833 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -33,6 +33,7 @@ #include #include +#include #ifdef CONFIG_CAVIUM_DECODE_RSL extern void cvmx_interrupt_rsl_decode(void); @@ -926,3 +927,45 @@ out: return ret; } device_initcall(octeon_cf_device_init); + +/* Octeon Random Number Generator. */ +static int __init octeon_rng_device_init(void) +{ + struct platform_device *pd; + int ret = 0; + + struct resource rng_resources[] = { + { + .flags = IORESOURCE_MEM, + .start = XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS), + .end = XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS) + 0xf + }, { + .flags = IORESOURCE_MEM, + .start = cvmx_build_io_address(8, 0), + .end = cvmx_build_io_address(8, 0) + 0x7 + } + }; + + pd = platform_device_alloc("octeon_rng", -1); + if (!pd) { + ret = -ENOMEM; + goto out; + } + + ret = platform_device_add_resources(pd, rng_resources, + ARRAY_SIZE(rng_resources)); + if (ret) + goto fail; + + ret = platform_device_add(pd); + if (ret) + goto fail; + + return ret; +fail: + platform_device_put(pd); + +out: + return ret; +} +device_initcall(octeon_rng_device_init); diff --git a/arch/mips/include/asm/octeon/cvmx-rnm-defs.h b/arch/mips/include/asm/octeon/cvmx-rnm-defs.h new file mode 100644 index 00000000000..4586958c97b --- /dev/null +++ b/arch/mips/include/asm/octeon/cvmx-rnm-defs.h @@ -0,0 +1,88 @@ +/***********************license start*************** + * Author: Cavium Networks + * + * Contact: support@caviumnetworks.com + * This file is part of the OCTEON SDK + * + * Copyright (c) 2003-2008 Cavium Networks + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, Version 2, as + * published by the Free Software Foundation. + * + * This file is distributed in the hope that it will be useful, but + * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or + * NONINFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this file; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * or visit http://www.gnu.org/licenses/. + * + * This file may also be available under a different license from Cavium. + * Contact Cavium Networks for more information + ***********************license end**************************************/ + +#ifndef __CVMX_RNM_DEFS_H__ +#define __CVMX_RNM_DEFS_H__ + +#include + +#define CVMX_RNM_BIST_STATUS \ + CVMX_ADD_IO_SEG(0x0001180040000008ull) +#define CVMX_RNM_CTL_STATUS \ + CVMX_ADD_IO_SEG(0x0001180040000000ull) + +union cvmx_rnm_bist_status { + uint64_t u64; + struct cvmx_rnm_bist_status_s { + uint64_t reserved_2_63:62; + uint64_t rrc:1; + uint64_t mem:1; + } s; + struct cvmx_rnm_bist_status_s cn30xx; + struct cvmx_rnm_bist_status_s cn31xx; + struct cvmx_rnm_bist_status_s cn38xx; + struct cvmx_rnm_bist_status_s cn38xxp2; + struct cvmx_rnm_bist_status_s cn50xx; + struct cvmx_rnm_bist_status_s cn52xx; + struct cvmx_rnm_bist_status_s cn52xxp1; + struct cvmx_rnm_bist_status_s cn56xx; + struct cvmx_rnm_bist_status_s cn56xxp1; + struct cvmx_rnm_bist_status_s cn58xx; + struct cvmx_rnm_bist_status_s cn58xxp1; +}; + +union cvmx_rnm_ctl_status { + uint64_t u64; + struct cvmx_rnm_ctl_status_s { + uint64_t reserved_9_63:55; + uint64_t ent_sel:4; + uint64_t exp_ent:1; + uint64_t rng_rst:1; + uint64_t rnm_rst:1; + uint64_t rng_en:1; + uint64_t ent_en:1; + } s; + struct cvmx_rnm_ctl_status_cn30xx { + uint64_t reserved_4_63:60; + uint64_t rng_rst:1; + uint64_t rnm_rst:1; + uint64_t rng_en:1; + uint64_t ent_en:1; + } cn30xx; + struct cvmx_rnm_ctl_status_cn30xx cn31xx; + struct cvmx_rnm_ctl_status_cn30xx cn38xx; + struct cvmx_rnm_ctl_status_cn30xx cn38xxp2; + struct cvmx_rnm_ctl_status_s cn50xx; + struct cvmx_rnm_ctl_status_s cn52xx; + struct cvmx_rnm_ctl_status_s cn52xxp1; + struct cvmx_rnm_ctl_status_s cn56xx; + struct cvmx_rnm_ctl_status_s cn56xxp1; + struct cvmx_rnm_ctl_status_s cn58xx; + struct cvmx_rnm_ctl_status_s cn58xxp1; +}; + +#endif -- cgit v1.2.3-70-g09d2 From e0cc87f59490d7d62a8ab2a76498dc8a2b64927a Mon Sep 17 00:00:00 2001 From: Wu Fei Date: Thu, 3 Sep 2009 22:29:53 +0800 Subject: MIPS: Shrink the size of tlb handler By combining swapper_pg_dir and module_pg_dir, several if conditions can be eliminated from the tlb exception handler. The reason they can be combined is that, the effective virtual address of vmalloc returned is at the bottom, and of module_alloc returned is at the top. It also fixes the bug in vmalloc(), which happens when its return address is not covered by the first pgd. Signed-off-by: Wu Fei Signed-off-by: Ralf Baechle --- arch/mips/include/asm/pgtable-64.h | 11 +++------ arch/mips/mm/init.c | 3 --- arch/mips/mm/pgtable-64.c | 3 --- arch/mips/mm/tlbex.c | 49 -------------------------------------- 4 files changed, 3 insertions(+), 63 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 4ed9d1bba2b..9cd50899395 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -109,13 +109,13 @@ #define VMALLOC_START MAP_BASE #define VMALLOC_END \ - (VMALLOC_START + PTRS_PER_PGD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE) + (VMALLOC_START + \ + PTRS_PER_PGD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE - (1UL << 32)) #if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \ VMALLOC_START != CKSSEG /* Load modules into 32bit-compatible segment. */ #define MODULE_START CKSSEG #define MODULE_END (FIXADDR_START-2*PAGE_SIZE) -extern pgd_t module_pg_dir[PTRS_PER_PGD]; #endif #define pte_ERROR(e) \ @@ -188,12 +188,7 @@ static inline void pud_clear(pud_t *pudp) #define __pmd_offset(address) pmd_index(address) /* to find an entry in a kernel page-table-directory */ -#ifdef MODULE_START -#define pgd_offset_k(address) \ - ((address) >= MODULE_START ? module_pg_dir : pgd_offset(&init_mm, 0UL)) -#else -#define pgd_offset_k(address) pgd_offset(&init_mm, 0UL) -#endif +#define pgd_offset_k(address) pgd_offset(&init_mm, address) #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 0e820508ff2..38c79c55b06 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -475,9 +475,6 @@ unsigned long pgd_current[NR_CPUS]; */ pgd_t swapper_pg_dir[_PTRS_PER_PGD] __page_aligned(_PGD_ORDER); #ifdef CONFIG_64BIT -#ifdef MODULE_START -pgd_t module_pg_dir[PTRS_PER_PGD] __page_aligned(PGD_ORDER); -#endif pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned(PMD_ORDER); #endif pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned(PTE_ORDER); diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c index e4b565aeb00..1121019fa45 100644 --- a/arch/mips/mm/pgtable-64.c +++ b/arch/mips/mm/pgtable-64.c @@ -59,9 +59,6 @@ void __init pagetable_init(void) /* Initialize the entire pgd. */ pgd_init((unsigned long)swapper_pg_dir); -#ifdef MODULE_START - pgd_init((unsigned long)module_pg_dir); -#endif pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table); pgd_base = swapper_pg_dir; diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 9a17bf8395d..bc66f57f325 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -499,11 +499,7 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, * The vmalloc handling is not in the hotpath. */ uasm_i_dmfc0(p, tmp, C0_BADVADDR); -#ifdef MODULE_START - uasm_il_bltz(p, r, tmp, label_module_alloc); -#else uasm_il_bltz(p, r, tmp, label_vmalloc); -#endif /* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */ #ifdef CONFIG_SMP @@ -556,52 +552,7 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, { long swpd = (long)swapper_pg_dir; -#ifdef MODULE_START - long modd = (long)module_pg_dir; - - uasm_l_module_alloc(l, *p); - /* - * Assumption: - * VMALLOC_START >= 0xc000000000000000UL - * MODULE_START >= 0xe000000000000000UL - */ - UASM_i_SLL(p, ptr, bvaddr, 2); - uasm_il_bgez(p, r, ptr, label_vmalloc); - - if (uasm_in_compat_space_p(MODULE_START) && - !uasm_rel_lo(MODULE_START)) { - uasm_i_lui(p, ptr, uasm_rel_hi(MODULE_START)); /* delay slot */ - } else { - /* unlikely configuration */ - uasm_i_nop(p); /* delay slot */ - UASM_i_LA(p, ptr, MODULE_START); - } - uasm_i_dsubu(p, bvaddr, bvaddr, ptr); - - if (uasm_in_compat_space_p(modd) && !uasm_rel_lo(modd)) { - uasm_il_b(p, r, label_vmalloc_done); - uasm_i_lui(p, ptr, uasm_rel_hi(modd)); - } else { - UASM_i_LA_mostly(p, ptr, modd); - uasm_il_b(p, r, label_vmalloc_done); - if (uasm_in_compat_space_p(modd)) - uasm_i_addiu(p, ptr, ptr, uasm_rel_lo(modd)); - else - uasm_i_daddiu(p, ptr, ptr, uasm_rel_lo(modd)); - } - uasm_l_vmalloc(l, *p); - if (uasm_in_compat_space_p(MODULE_START) && - !uasm_rel_lo(MODULE_START) && - MODULE_START << 32 == VMALLOC_START) - uasm_i_dsll32(p, ptr, ptr, 0); /* typical case */ - else - UASM_i_LA(p, ptr, VMALLOC_START); -#else - uasm_l_vmalloc(l, *p); - UASM_i_LA(p, ptr, VMALLOC_START); -#endif - uasm_i_dsubu(p, bvaddr, bvaddr, ptr); if (uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd)) { uasm_il_b(p, r, label_vmalloc_done); -- cgit v1.2.3-70-g09d2 From 0de663ef8627f35fda9106a8faaca512f29e493e Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Tue, 18 Aug 2009 13:23:37 +0100 Subject: MIPS: BCM63xx: Add Broadcom 63xx CPU definitions. Todo: Nothing ever detects CPU_BCM6338 but the code tests for it anyway. Signed-off-by: Maxime Bizon Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cpu.h | 7 +++++++ arch/mips/kernel/cpu-probe.c | 23 +++++++++++++++++++++++ arch/mips/mm/tlbex.c | 4 ++++ 3 files changed, 34 insertions(+) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 3bdc0e3d89c..4b96d1a3605 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -113,6 +113,12 @@ #define PRID_IMP_BCM4710 0x4000 #define PRID_IMP_BCM3302 0x9000 +#define PRID_IMP_BCM6338 0x9000 +#define PRID_IMP_BCM6345 0x8000 +#define PRID_IMP_BCM6348 0x9100 +#define PRID_IMP_BCM4350 0xA000 +#define PRID_REV_BCM6358 0x0010 +#define PRID_REV_BCM6368 0x0030 /* * These are the PRID's for when 23:16 == PRID_COMP_CAVIUM @@ -210,6 +216,7 @@ enum cpu_type_enum { */ CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K, CPU_ALCHEMY, CPU_PR4450, CPU_BCM3302, CPU_BCM4710, + CPU_BCM6338, CPU_BCM6345, CPU_BCM6348, CPU_BCM6358, /* * MIPS64 class processors diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 4579d5c12da..f709657e4dc 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -156,6 +156,9 @@ void __init check_wait(void) case CPU_25KF: case CPU_PR4450: case CPU_BCM3302: + case CPU_BCM6338: + case CPU_BCM6348: + case CPU_BCM6358: case CPU_CAVIUM_OCTEON: cpu_wait = r4k_wait; break; @@ -854,6 +857,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) decode_configs(c); switch (c->processor_id & 0xff00) { case PRID_IMP_BCM3302: + /* same as PRID_IMP_BCM6338 */ c->cputype = CPU_BCM3302; __cpu_name[cpu] = "Broadcom BCM3302"; break; @@ -861,6 +865,25 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_BCM4710; __cpu_name[cpu] = "Broadcom BCM4710"; break; + case PRID_IMP_BCM6345: + c->cputype = CPU_BCM6345; + __cpu_name[cpu] = "Broadcom BCM6345"; + break; + case PRID_IMP_BCM6348: + c->cputype = CPU_BCM6348; + __cpu_name[cpu] = "Broadcom BCM6348"; + break; + case PRID_IMP_BCM4350: + switch (c->processor_id & 0xf0) { + case PRID_REV_BCM6358: + c->cputype = CPU_BCM6358; + __cpu_name[cpu] = "Broadcom BCM6358"; + break; + default: + c->cputype = CPU_UNKNOWN; + break; + } + break; } } diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index bc66f57f325..bb1719a55d2 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -321,6 +321,10 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, case CPU_BCM3302: case CPU_BCM4710: case CPU_LOONGSON2: + case CPU_BCM6338: + case CPU_BCM6345: + case CPU_BCM6348: + case CPU_BCM6358: case CPU_R5500: if (m4kc_tlbp_war()) uasm_i_nop(p); -- cgit v1.2.3-70-g09d2 From e7300d04bd0809eb7ea10a2ed8c729459f816e36 Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Tue, 18 Aug 2009 13:23:37 +0100 Subject: MIPS: BCM63xx: Add support for the Broadcom BCM63xx family of SOCs. Signed-off-by: Maxime Bizon Signed-off-by: Florian Fainelli Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 16 + arch/mips/Makefile | 7 + arch/mips/bcm63xx/Kconfig | 25 + arch/mips/bcm63xx/Makefile | 7 + arch/mips/bcm63xx/boards/Kconfig | 11 + arch/mips/bcm63xx/boards/Makefile | 3 + arch/mips/bcm63xx/boards/board_bcm963xx.c | 837 ++++++++++++++++++ arch/mips/bcm63xx/clk.c | 226 +++++ arch/mips/bcm63xx/cpu.c | 345 ++++++++ arch/mips/bcm63xx/cs.c | 144 +++ arch/mips/bcm63xx/dev-dsp.c | 56 ++ arch/mips/bcm63xx/early_printk.c | 30 + arch/mips/bcm63xx/gpio.c | 134 +++ arch/mips/bcm63xx/irq.c | 253 ++++++ arch/mips/bcm63xx/prom.c | 55 ++ arch/mips/bcm63xx/setup.c | 125 +++ arch/mips/bcm63xx/timer.c | 205 +++++ arch/mips/configs/bcm63xx_defconfig | 972 +++++++++++++++++++++ arch/mips/include/asm/fixmap.h | 4 + arch/mips/include/asm/mach-bcm63xx/bcm63xx_board.h | 12 + arch/mips/include/asm/mach-bcm63xx/bcm63xx_clk.h | 11 + arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h | 538 ++++++++++++ arch/mips/include/asm/mach-bcm63xx/bcm63xx_cs.h | 10 + .../include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h | 13 + .../include/asm/mach-bcm63xx/bcm63xx_dev_enet.h | 45 + .../include/asm/mach-bcm63xx/bcm63xx_dev_pci.h | 6 + arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h | 22 + arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h | 93 ++ arch/mips/include/asm/mach-bcm63xx/bcm63xx_irq.h | 15 + arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h | 773 ++++++++++++++++ arch/mips/include/asm/mach-bcm63xx/bcm63xx_timer.h | 11 + .../mips/include/asm/mach-bcm63xx/board_bcm963xx.h | 60 ++ .../asm/mach-bcm63xx/cpu-feature-overrides.h | 51 ++ arch/mips/include/asm/mach-bcm63xx/gpio.h | 15 + arch/mips/include/asm/mach-bcm63xx/war.h | 25 + arch/mips/pci/Makefile | 2 + arch/mips/pci/fixup-bcm63xx.c | 21 + arch/mips/pci/ops-bcm63xx.c | 467 ++++++++++ arch/mips/pci/pci-bcm63xx.c | 224 +++++ arch/mips/pci/pci-bcm63xx.h | 27 + 40 files changed, 5896 insertions(+) create mode 100644 arch/mips/bcm63xx/Kconfig create mode 100644 arch/mips/bcm63xx/Makefile create mode 100644 arch/mips/bcm63xx/boards/Kconfig create mode 100644 arch/mips/bcm63xx/boards/Makefile create mode 100644 arch/mips/bcm63xx/boards/board_bcm963xx.c create mode 100644 arch/mips/bcm63xx/clk.c create mode 100644 arch/mips/bcm63xx/cpu.c create mode 100644 arch/mips/bcm63xx/cs.c create mode 100644 arch/mips/bcm63xx/dev-dsp.c create mode 100644 arch/mips/bcm63xx/early_printk.c create mode 100644 arch/mips/bcm63xx/gpio.c create mode 100644 arch/mips/bcm63xx/irq.c create mode 100644 arch/mips/bcm63xx/prom.c create mode 100644 arch/mips/bcm63xx/setup.c create mode 100644 arch/mips/bcm63xx/timer.c create mode 100644 arch/mips/configs/bcm63xx_defconfig create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_board.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_clk.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_cs.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_enet.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_pci.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_irq.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_timer.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/gpio.h create mode 100644 arch/mips/include/asm/mach-bcm63xx/war.h create mode 100644 arch/mips/pci/fixup-bcm63xx.c create mode 100644 arch/mips/pci/ops-bcm63xx.c create mode 100644 arch/mips/pci/pci-bcm63xx.c create mode 100644 arch/mips/pci/pci-bcm63xx.h (limited to 'arch/mips/include/asm') diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 224e548f0fc..705a7a9170f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -80,6 +80,21 @@ config BCM47XX help Support for BCM47XX based boards +config BCM63XX + bool "Broadcom BCM63XX based boards" + select CEVT_R4K + select CSRC_R4K + select DMA_NONCOHERENT + select IRQ_CPU + select SYS_HAS_CPU_MIPS32_R1 + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_BIG_ENDIAN + select SYS_HAS_EARLY_PRINTK + select SWAP_IO_SPACE + select ARCH_REQUIRE_GPIOLIB + help + Support for BCM63XX based boards + config MIPS_COBALT bool "Cobalt Server" select CEVT_R4K @@ -645,6 +660,7 @@ endchoice source "arch/mips/alchemy/Kconfig" source "arch/mips/basler/excite/Kconfig" +source "arch/mips/bcm63xx/Kconfig" source "arch/mips/jazz/Kconfig" source "arch/mips/lasat/Kconfig" source "arch/mips/pmc-sierra/Kconfig" diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 1efa9aa6488..c825b14b4ed 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -564,6 +564,13 @@ core-$(CONFIG_BCM47XX) += arch/mips/bcm47xx/ cflags-$(CONFIG_BCM47XX) += -I$(srctree)/arch/mips/include/asm/mach-bcm47xx load-$(CONFIG_BCM47XX) := 0xffffffff80001000 +# +# Broadcom BCM63XX boards +# +core-$(CONFIG_BCM63XX) += arch/mips/bcm63xx/ +cflags-$(CONFIG_BCM63XX) += -I$(srctree)/arch/mips/include/asm/mach-bcm63xx/ +load-$(CONFIG_BCM63XX) := 0xffffffff80010000 + # # SNI RM # diff --git a/arch/mips/bcm63xx/Kconfig b/arch/mips/bcm63xx/Kconfig new file mode 100644 index 00000000000..fb177d6df06 --- /dev/null +++ b/arch/mips/bcm63xx/Kconfig @@ -0,0 +1,25 @@ +menu "CPU support" + depends on BCM63XX + +config BCM63XX_CPU_6338 + bool "support 6338 CPU" + select HW_HAS_PCI + select USB_ARCH_HAS_OHCI + select USB_OHCI_BIG_ENDIAN_DESC + select USB_OHCI_BIG_ENDIAN_MMIO + +config BCM63XX_CPU_6345 + bool "support 6345 CPU" + select USB_OHCI_BIG_ENDIAN_DESC + select USB_OHCI_BIG_ENDIAN_MMIO + +config BCM63XX_CPU_6348 + bool "support 6348 CPU" + select HW_HAS_PCI + +config BCM63XX_CPU_6358 + bool "support 6358 CPU" + select HW_HAS_PCI +endmenu + +source "arch/mips/bcm63xx/boards/Kconfig" diff --git a/arch/mips/bcm63xx/Makefile b/arch/mips/bcm63xx/Makefile new file mode 100644 index 00000000000..99bbc8753a2 --- /dev/null +++ b/arch/mips/bcm63xx/Makefile @@ -0,0 +1,7 @@ +obj-y += clk.o cpu.o cs.o gpio.o irq.o prom.o setup.o timer.o \ + dev-dsp.o +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o + +obj-y += boards/ + +EXTRA_CFLAGS += -Werror diff --git a/arch/mips/bcm63xx/boards/Kconfig b/arch/mips/bcm63xx/boards/Kconfig new file mode 100644 index 00000000000..c6aed33d893 --- /dev/null +++ b/arch/mips/bcm63xx/boards/Kconfig @@ -0,0 +1,11 @@ +choice + prompt "Board support" + depends on BCM63XX + default BOARD_BCM963XX + +config BOARD_BCM963XX + bool "Generic Broadcom 963xx boards" + select SSB + help + +endchoice diff --git a/arch/mips/bcm63xx/boards/Makefile b/arch/mips/bcm63xx/boards/Makefile new file mode 100644 index 00000000000..e5cc86dc1da --- /dev/null +++ b/arch/mips/bcm63xx/boards/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_BOARD_BCM963XX) += board_bcm963xx.o + +EXTRA_CFLAGS += -Werror diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c new file mode 100644 index 00000000000..fd77f548207 --- /dev/null +++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c @@ -0,0 +1,837 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + * Copyright (C) 2008 Florian Fainelli + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PFX "board_bcm963xx: " + +static struct bcm963xx_nvram nvram; +static unsigned int mac_addr_used; +static struct board_info board; + +/* + * known 6338 boards + */ +#ifdef CONFIG_BCM63XX_CPU_6338 +static struct board_info __initdata board_96338gw = { + .name = "96338GW", + .expected_cpu_id = 0x6338, + + .has_enet0 = 1, + .enet0 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, + + .has_ohci0 = 1, + + .leds = { + { + .name = "adsl", + .gpio = 3, + .active_low = 1, + }, + { + .name = "ses", + .gpio = 5, + .active_low = 1, + }, + { + .name = "ppp-fail", + .gpio = 4, + .active_low = 1, + }, + { + .name = "power", + .gpio = 0, + .active_low = 1, + .default_trigger = "default-on", + }, + { + .name = "stop", + .gpio = 1, + .active_low = 1, + } + }, +}; + +static struct board_info __initdata board_96338w = { + .name = "96338W", + .expected_cpu_id = 0x6338, + + .has_enet0 = 1, + .enet0 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, + + .leds = { + { + .name = "adsl", + .gpio = 3, + .active_low = 1, + }, + { + .name = "ses", + .gpio = 5, + .active_low = 1, + }, + { + .name = "ppp-fail", + .gpio = 4, + .active_low = 1, + }, + { + .name = "power", + .gpio = 0, + .active_low = 1, + .default_trigger = "default-on", + }, + { + .name = "stop", + .gpio = 1, + .active_low = 1, + }, + }, +}; +#endif + +/* + * known 6345 boards + */ +#ifdef CONFIG_BCM63XX_CPU_6345 +static struct board_info __initdata board_96345gw2 = { + .name = "96345GW2", + .expected_cpu_id = 0x6345, +}; +#endif + +/* + * known 6348 boards + */ +#ifdef CONFIG_BCM63XX_CPU_6348 +static struct board_info __initdata board_96348r = { + .name = "96348R", + .expected_cpu_id = 0x6348, + + .has_enet0 = 1, + .has_pci = 1, + + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + + .leds = { + { + .name = "adsl-fail", + .gpio = 2, + .active_low = 1, + }, + { + .name = "ppp", + .gpio = 3, + .active_low = 1, + }, + { + .name = "ppp-fail", + .gpio = 4, + .active_low = 1, + }, + { + .name = "power", + .gpio = 0, + .active_low = 1, + .default_trigger = "default-on", + + }, + { + .name = "stop", + .gpio = 1, + .active_low = 1, + }, + }, +}; + +static struct board_info __initdata board_96348gw_10 = { + .name = "96348GW-10", + .expected_cpu_id = 0x6348, + + .has_enet0 = 1, + .has_enet1 = 1, + .has_pci = 1, + + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + .enet1 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, + + .has_ohci0 = 1, + .has_pccard = 1, + .has_ehci0 = 1, + + .has_dsp = 1, + .dsp = { + .gpio_rst = 6, + .gpio_int = 34, + .cs = 2, + .ext_irq = 2, + }, + + .leds = { + { + .name = "adsl-fail", + .gpio = 2, + .active_low = 1, + }, + { + .name = "ppp", + .gpio = 3, + .active_low = 1, + }, + { + .name = "ppp-fail", + .gpio = 4, + .active_low = 1, + }, + { + .name = "power", + .gpio = 0, + .active_low = 1, + .default_trigger = "default-on", + }, + { + .name = "stop", + .gpio = 1, + .active_low = 1, + }, + }, +}; + +static struct board_info __initdata board_96348gw_11 = { + .name = "96348GW-11", + .expected_cpu_id = 0x6348, + + .has_enet0 = 1, + .has_enet1 = 1, + .has_pci = 1, + + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + + .enet1 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, + + + .has_ohci0 = 1, + .has_pccard = 1, + .has_ehci0 = 1, + + .leds = { + { + .name = "adsl-fail", + .gpio = 2, + .active_low = 1, + }, + { + .name = "ppp", + .gpio = 3, + .active_low = 1, + }, + { + .name = "ppp-fail", + .gpio = 4, + .active_low = 1, + }, + { + .name = "power", + .gpio = 0, + .active_low = 1, + .default_trigger = "default-on", + }, + { + .name = "stop", + .gpio = 1, + .active_low = 1, + }, + }, +}; + +static struct board_info __initdata board_96348gw = { + .name = "96348GW", + .expected_cpu_id = 0x6348, + + .has_enet0 = 1, + .has_enet1 = 1, + .has_pci = 1, + + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + .enet1 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, + + .has_ohci0 = 1, + + .has_dsp = 1, + .dsp = { + .gpio_rst = 6, + .gpio_int = 34, + .ext_irq = 2, + .cs = 2, + }, + + .leds = { + { + .name = "adsl-fail", + .gpio = 2, + .active_low = 1, + }, + { + .name = "ppp", + .gpio = 3, + .active_low = 1, + }, + { + .name = "ppp-fail", + .gpio = 4, + .active_low = 1, + }, + { + .name = "power", + .gpio = 0, + .active_low = 1, + .default_trigger = "default-on", + }, + { + .name = "stop", + .gpio = 1, + .active_low = 1, + }, + }, +}; + +static struct board_info __initdata board_FAST2404 = { + .name = "F@ST2404", + .expected_cpu_id = 0x6348, + + .has_enet0 = 1, + .has_enet1 = 1, + .has_pci = 1, + + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + + .enet1 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, + + + .has_ohci0 = 1, + .has_pccard = 1, + .has_ehci0 = 1, +}; + +static struct board_info __initdata board_DV201AMR = { + .name = "DV201AMR", + .expected_cpu_id = 0x6348, + + .has_pci = 1, + .has_ohci0 = 1, + + .has_enet0 = 1, + .has_enet1 = 1, + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + .enet1 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, +}; + +static struct board_info __initdata board_96348gw_a = { + .name = "96348GW-A", + .expected_cpu_id = 0x6348, + + .has_enet0 = 1, + .has_enet1 = 1, + .has_pci = 1, + + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + .enet1 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, + + .has_ohci0 = 1, +}; +#endif + +/* + * known 6358 boards + */ +#ifdef CONFIG_BCM63XX_CPU_6358 +static struct board_info __initdata board_96358vw = { + .name = "96358VW", + .expected_cpu_id = 0x6358, + + .has_enet0 = 1, + .has_enet1 = 1, + .has_pci = 1, + + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + + .enet1 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, + + + .has_ohci0 = 1, + .has_pccard = 1, + .has_ehci0 = 1, + + .leds = { + { + .name = "adsl-fail", + .gpio = 15, + .active_low = 1, + }, + { + .name = "ppp", + .gpio = 22, + .active_low = 1, + }, + { + .name = "ppp-fail", + .gpio = 23, + .active_low = 1, + }, + { + .name = "power", + .gpio = 4, + .default_trigger = "default-on", + }, + { + .name = "stop", + .gpio = 5, + }, + }, +}; + +static struct board_info __initdata board_96358vw2 = { + .name = "96358VW2", + .expected_cpu_id = 0x6358, + + .has_enet0 = 1, + .has_enet1 = 1, + .has_pci = 1, + + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + + .enet1 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, + + + .has_ohci0 = 1, + .has_pccard = 1, + .has_ehci0 = 1, + + .leds = { + { + .name = "adsl", + .gpio = 22, + .active_low = 1, + }, + { + .name = "ppp-fail", + .gpio = 23, + }, + { + .name = "power", + .gpio = 5, + .active_low = 1, + .default_trigger = "default-on", + }, + { + .name = "stop", + .gpio = 4, + .active_low = 1, + }, + }, +}; + +static struct board_info __initdata board_AGPFS0 = { + .name = "AGPF-S0", + .expected_cpu_id = 0x6358, + + .has_enet0 = 1, + .has_enet1 = 1, + .has_pci = 1, + + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + + .enet1 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, + + .has_ohci0 = 1, + .has_ehci0 = 1, +}; +#endif + +/* + * all boards + */ +static const struct board_info __initdata *bcm963xx_boards[] = { +#ifdef CONFIG_BCM63XX_CPU_6338 + &board_96338gw, + &board_96338w, +#endif +#ifdef CONFIG_BCM63XX_CPU_6345 + &board_96345gw2, +#endif +#ifdef CONFIG_BCM63XX_CPU_6348 + &board_96348r, + &board_96348gw, + &board_96348gw_10, + &board_96348gw_11, + &board_FAST2404, + &board_DV201AMR, + &board_96348gw_a, +#endif + +#ifdef CONFIG_BCM63XX_CPU_6358 + &board_96358vw, + &board_96358vw2, + &board_AGPFS0, +#endif +}; + +/* + * early init callback, read nvram data from flash and checksum it + */ +void __init board_prom_init(void) +{ + unsigned int check_len, i; + u8 *boot_addr, *cfe, *p; + char cfe_version[32]; + u32 val; + + /* read base address of boot chip select (0) + * 6345 does not have MPI but boots from standard + * MIPS Flash address */ + if (BCMCPU_IS_6345()) + val = 0x1fc00000; + else { + val = bcm_mpi_readl(MPI_CSBASE_REG(0)); + val &= MPI_CSBASE_BASE_MASK; + } + boot_addr = (u8 *)KSEG1ADDR(val); + + /* dump cfe version */ + cfe = boot_addr + BCM963XX_CFE_VERSION_OFFSET; + if (!memcmp(cfe, "cfe-v", 5)) + snprintf(cfe_version, sizeof(cfe_version), "%u.%u.%u-%u.%u", + cfe[5], cfe[6], cfe[7], cfe[8], cfe[9]); + else + strcpy(cfe_version, "unknown"); + printk(KERN_INFO PFX "CFE version: %s\n", cfe_version); + + /* extract nvram data */ + memcpy(&nvram, boot_addr + BCM963XX_NVRAM_OFFSET, sizeof(nvram)); + + /* check checksum before using data */ + if (nvram.version <= 4) + check_len = offsetof(struct bcm963xx_nvram, checksum_old); + else + check_len = sizeof(nvram); + val = 0; + p = (u8 *)&nvram; + while (check_len--) + val += *p; + if (val) { + printk(KERN_ERR PFX "invalid nvram checksum\n"); + return; + } + + /* find board by name */ + for (i = 0; i < ARRAY_SIZE(bcm963xx_boards); i++) { + if (strncmp(nvram.name, bcm963xx_boards[i]->name, + sizeof(nvram.name))) + continue; + /* copy, board desc array is marked initdata */ + memcpy(&board, bcm963xx_boards[i], sizeof(board)); + break; + } + + /* bail out if board is not found, will complain later */ + if (!board.name[0]) { + char name[17]; + memcpy(name, nvram.name, 16); + name[16] = 0; + printk(KERN_ERR PFX "unknown bcm963xx board: %s\n", + name); + return; + } + + /* setup pin multiplexing depending on board enabled device, + * this has to be done this early since PCI init is done + * inside arch_initcall */ + val = 0; + +#ifdef CONFIG_PCI + if (board.has_pci) { + bcm63xx_pci_enabled = 1; + if (BCMCPU_IS_6348()) + val |= GPIO_MODE_6348_G2_PCI; + } +#endif + + if (board.has_pccard) { + if (BCMCPU_IS_6348()) + val |= GPIO_MODE_6348_G1_MII_PCCARD; + } + + if (board.has_enet0 && !board.enet0.use_internal_phy) { + if (BCMCPU_IS_6348()) + val |= GPIO_MODE_6348_G3_EXT_MII | + GPIO_MODE_6348_G0_EXT_MII; + } + + if (board.has_enet1 && !board.enet1.use_internal_phy) { + if (BCMCPU_IS_6348()) + val |= GPIO_MODE_6348_G3_EXT_MII | + GPIO_MODE_6348_G0_EXT_MII; + } + + bcm_gpio_writel(val, GPIO_MODE_REG); +} + +/* + * second stage init callback, good time to panic if we couldn't + * identify on which board we're running since early printk is working + */ +void __init board_setup(void) +{ + if (!board.name[0]) + panic("unable to detect bcm963xx board"); + printk(KERN_INFO PFX "board name: %s\n", board.name); + + /* make sure we're running on expected cpu */ + if (bcm63xx_get_cpu_id() != board.expected_cpu_id) + panic("unexpected CPU for bcm963xx board"); +} + +/* + * return board name for /proc/cpuinfo + */ +const char *board_get_name(void) +{ + return board.name; +} + +/* + * register & return a new board mac address + */ +static int board_get_mac_address(u8 *mac) +{ + u8 *p; + int count; + + if (mac_addr_used >= nvram.mac_addr_count) { + printk(KERN_ERR PFX "not enough mac address\n"); + return -ENODEV; + } + + memcpy(mac, nvram.mac_addr_base, ETH_ALEN); + p = mac + ETH_ALEN - 1; + count = mac_addr_used; + + while (count--) { + do { + (*p)++; + if (*p != 0) + break; + p--; + } while (p != mac); + } + + if (p == mac) { + printk(KERN_ERR PFX "unable to fetch mac address\n"); + return -ENODEV; + } + + mac_addr_used++; + return 0; +} + +static struct mtd_partition mtd_partitions[] = { + { + .name = "cfe", + .offset = 0x0, + .size = 0x40000, + } +}; + +static struct physmap_flash_data flash_data = { + .width = 2, + .nr_parts = ARRAY_SIZE(mtd_partitions), + .parts = mtd_partitions, +}; + +static struct resource mtd_resources[] = { + { + .start = 0, /* filled at runtime */ + .end = 0, /* filled at runtime */ + .flags = IORESOURCE_MEM, + } +}; + +static struct platform_device mtd_dev = { + .name = "physmap-flash", + .resource = mtd_resources, + .num_resources = ARRAY_SIZE(mtd_resources), + .dev = { + .platform_data = &flash_data, + }, +}; + +/* + * Register a sane SPROMv2 to make the on-board + * bcm4318 WLAN work + */ +#ifdef CONFIG_SSB_PCIHOST +static struct ssb_sprom bcm63xx_sprom = { + .revision = 0x02, + .board_rev = 0x17, + .country_code = 0x0, + .ant_available_bg = 0x3, + .pa0b0 = 0x15ae, + .pa0b1 = 0xfa85, + .pa0b2 = 0xfe8d, + .pa1b0 = 0xffff, + .pa1b1 = 0xffff, + .pa1b2 = 0xffff, + .gpio0 = 0xff, + .gpio1 = 0xff, + .gpio2 = 0xff, + .gpio3 = 0xff, + .maxpwr_bg = 0x004c, + .itssi_bg = 0x00, + .boardflags_lo = 0x2848, + .boardflags_hi = 0x0000, +}; +#endif + +static struct gpio_led_platform_data bcm63xx_led_data; + +static struct platform_device bcm63xx_gpio_leds = { + .name = "leds-gpio", + .id = 0, + .dev.platform_data = &bcm63xx_led_data, +}; + +/* + * third stage init callback, register all board devices. + */ +int __init board_register_devices(void) +{ + u32 val; + + if (board.has_enet0 && + !board_get_mac_address(board.enet0.mac_addr)) + bcm63xx_enet_register(0, &board.enet0); + + if (board.has_enet1 && + !board_get_mac_address(board.enet1.mac_addr)) + bcm63xx_enet_register(1, &board.enet1); + + if (board.has_dsp) + bcm63xx_dsp_register(&board.dsp); + + /* Generate MAC address for WLAN and + * register our SPROM */ +#ifdef CONFIG_SSB_PCIHOST + if (!board_get_mac_address(bcm63xx_sprom.il0mac)) { + memcpy(bcm63xx_sprom.et0mac, bcm63xx_sprom.il0mac, ETH_ALEN); + memcpy(bcm63xx_sprom.et1mac, bcm63xx_sprom.il0mac, ETH_ALEN); + if (ssb_arch_set_fallback_sprom(&bcm63xx_sprom) < 0) + printk(KERN_ERR "failed to register fallback SPROM\n"); + } +#endif + + /* read base address of boot chip select (0) */ + if (BCMCPU_IS_6345()) + val = 0x1fc00000; + else { + val = bcm_mpi_readl(MPI_CSBASE_REG(0)); + val &= MPI_CSBASE_BASE_MASK; + } + mtd_resources[0].start = val; + mtd_resources[0].end = 0x1FFFFFFF; + + platform_device_register(&mtd_dev); + + bcm63xx_led_data.num_leds = ARRAY_SIZE(board.leds); + bcm63xx_led_data.leds = board.leds; + + platform_device_register(&bcm63xx_gpio_leds); + + return 0; +} + diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c new file mode 100644 index 00000000000..2c68ee9ccee --- /dev/null +++ b/arch/mips/bcm63xx/clk.c @@ -0,0 +1,226 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_MUTEX(clocks_mutex); + + +static void clk_enable_unlocked(struct clk *clk) +{ + if (clk->set && (clk->usage++) == 0) + clk->set(clk, 1); +} + +static void clk_disable_unlocked(struct clk *clk) +{ + if (clk->set && (--clk->usage) == 0) + clk->set(clk, 0); +} + +static void bcm_hwclock_set(u32 mask, int enable) +{ + u32 reg; + + reg = bcm_perf_readl(PERF_CKCTL_REG); + if (enable) + reg |= mask; + else + reg &= ~mask; + bcm_perf_writel(reg, PERF_CKCTL_REG); +} + +/* + * Ethernet MAC "misc" clock: dma clocks and main clock on 6348 + */ +static void enet_misc_set(struct clk *clk, int enable) +{ + u32 mask; + + if (BCMCPU_IS_6338()) + mask = CKCTL_6338_ENET_EN; + else if (BCMCPU_IS_6345()) + mask = CKCTL_6345_ENET_EN; + else if (BCMCPU_IS_6348()) + mask = CKCTL_6348_ENET_EN; + else + /* BCMCPU_IS_6358 */ + mask = CKCTL_6358_EMUSB_EN; + bcm_hwclock_set(mask, enable); +} + +static struct clk clk_enet_misc = { + .set = enet_misc_set, +}; + +/* + * Ethernet MAC clocks: only revelant on 6358, silently enable misc + * clocks + */ +static void enetx_set(struct clk *clk, int enable) +{ + if (enable) + clk_enable_unlocked(&clk_enet_misc); + else + clk_disable_unlocked(&clk_enet_misc); + + if (BCMCPU_IS_6358()) { + u32 mask; + + if (clk->id == 0) + mask = CKCTL_6358_ENET0_EN; + else + mask = CKCTL_6358_ENET1_EN; + bcm_hwclock_set(mask, enable); + } +} + +static struct clk clk_enet0 = { + .id = 0, + .set = enetx_set, +}; + +static struct clk clk_enet1 = { + .id = 1, + .set = enetx_set, +}; + +/* + * Ethernet PHY clock + */ +static void ephy_set(struct clk *clk, int enable) +{ + if (!BCMCPU_IS_6358()) + return; + bcm_hwclock_set(CKCTL_6358_EPHY_EN, enable); +} + + +static struct clk clk_ephy = { + .set = ephy_set, +}; + +/* + * PCM clock + */ +static void pcm_set(struct clk *clk, int enable) +{ + if (!BCMCPU_IS_6358()) + return; + bcm_hwclock_set(CKCTL_6358_PCM_EN, enable); +} + +static struct clk clk_pcm = { + .set = pcm_set, +}; + +/* + * USB host clock + */ +static void usbh_set(struct clk *clk, int enable) +{ + if (!BCMCPU_IS_6348()) + return; + bcm_hwclock_set(CKCTL_6348_USBH_EN, enable); +} + +static struct clk clk_usbh = { + .set = usbh_set, +}; + +/* + * SPI clock + */ +static void spi_set(struct clk *clk, int enable) +{ + u32 mask; + + if (BCMCPU_IS_6338()) + mask = CKCTL_6338_SPI_EN; + else if (BCMCPU_IS_6348()) + mask = CKCTL_6348_SPI_EN; + else + /* BCMCPU_IS_6358 */ + mask = CKCTL_6358_SPI_EN; + bcm_hwclock_set(mask, enable); +} + +static struct clk clk_spi = { + .set = spi_set, +}; + +/* + * Internal peripheral clock + */ +static struct clk clk_periph = { + .rate = (50 * 1000 * 1000), +}; + + +/* + * Linux clock API implementation + */ +int clk_enable(struct clk *clk) +{ + mutex_lock(&clocks_mutex); + clk_enable_unlocked(clk); + mutex_unlock(&clocks_mutex); + return 0; +} + +EXPORT_SYMBOL(clk_enable); + +void clk_disable(struct clk *clk) +{ + mutex_lock(&clocks_mutex); + clk_disable_unlocked(clk); + mutex_unlock(&clocks_mutex); +} + +EXPORT_SYMBOL(clk_disable); + +unsigned long clk_get_rate(struct clk *clk) +{ + return clk->rate; +} + +EXPORT_SYMBOL(clk_get_rate); + +struct clk *clk_get(struct device *dev, const char *id) +{ + if (!strcmp(id, "enet0")) + return &clk_enet0; + if (!strcmp(id, "enet1")) + return &clk_enet1; + if (!strcmp(id, "ephy")) + return &clk_ephy; + if (!strcmp(id, "usbh")) + return &clk_usbh; + if (!strcmp(id, "spi")) + return &clk_spi; + if (!strcmp(id, "periph")) + return &clk_periph; + if (BCMCPU_IS_6358() && !strcmp(id, "pcm")) + return &clk_pcm; + return ERR_PTR(-ENOENT); +} + +EXPORT_SYMBOL(clk_get); + +void clk_put(struct clk *clk) +{ +} + +EXPORT_SYMBOL(clk_put); diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c new file mode 100644 index 00000000000..6dc43f0483e --- /dev/null +++ b/arch/mips/bcm63xx/cpu.c @@ -0,0 +1,345 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + * Copyright (C) 2009 Florian Fainelli + */ + +#include +#include +#include +#include +#include +#include +#include + +const unsigned long *bcm63xx_regs_base; +EXPORT_SYMBOL(bcm63xx_regs_base); + +const int *bcm63xx_irqs; +EXPORT_SYMBOL(bcm63xx_irqs); + +static u16 bcm63xx_cpu_id; +static u16 bcm63xx_cpu_rev; +static unsigned int bcm63xx_cpu_freq; +static unsigned int bcm63xx_memory_size; + +/* + * 6338 register sets and irqs + */ +static const unsigned long bcm96338_regs_base[] = { + [RSET_DSL_LMEM] = BCM_6338_DSL_LMEM_BASE, + [RSET_PERF] = BCM_6338_PERF_BASE, + [RSET_TIMER] = BCM_6338_TIMER_BASE, + [RSET_WDT] = BCM_6338_WDT_BASE, + [RSET_UART0] = BCM_6338_UART0_BASE, + [RSET_GPIO] = BCM_6338_GPIO_BASE, + [RSET_SPI] = BCM_6338_SPI_BASE, + [RSET_OHCI0] = BCM_6338_OHCI0_BASE, + [RSET_OHCI_PRIV] = BCM_6338_OHCI_PRIV_BASE, + [RSET_USBH_PRIV] = BCM_6338_USBH_PRIV_BASE, + [RSET_UDC0] = BCM_6338_UDC0_BASE, + [RSET_MPI] = BCM_6338_MPI_BASE, + [RSET_PCMCIA] = BCM_6338_PCMCIA_BASE, + [RSET_SDRAM] = BCM_6338_SDRAM_BASE, + [RSET_DSL] = BCM_6338_DSL_BASE, + [RSET_ENET0] = BCM_6338_ENET0_BASE, + [RSET_ENET1] = BCM_6338_ENET1_BASE, + [RSET_ENETDMA] = BCM_6338_ENETDMA_BASE, + [RSET_MEMC] = BCM_6338_MEMC_BASE, + [RSET_DDR] = BCM_6338_DDR_BASE, +}; + +static const int bcm96338_irqs[] = { + [IRQ_TIMER] = BCM_6338_TIMER_IRQ, + [IRQ_UART0] = BCM_6338_UART0_IRQ, + [IRQ_DSL] = BCM_6338_DSL_IRQ, + [IRQ_ENET0] = BCM_6338_ENET0_IRQ, + [IRQ_ENET_PHY] = BCM_6338_ENET_PHY_IRQ, + [IRQ_ENET0_RXDMA] = BCM_6338_ENET0_RXDMA_IRQ, + [IRQ_ENET0_TXDMA] = BCM_6338_ENET0_TXDMA_IRQ, +}; + +/* + * 6345 register sets and irqs + */ +static const unsigned long bcm96345_regs_base[] = { + [RSET_DSL_LMEM] = BCM_6345_DSL_LMEM_BASE, + [RSET_PERF] = BCM_6345_PERF_BASE, + [RSET_TIMER] = BCM_6345_TIMER_BASE, + [RSET_WDT] = BCM_6345_WDT_BASE, + [RSET_UART0] = BCM_6345_UART0_BASE, + [RSET_GPIO] = BCM_6345_GPIO_BASE, + [RSET_SPI] = BCM_6345_SPI_BASE, + [RSET_UDC0] = BCM_6345_UDC0_BASE, + [RSET_OHCI0] = BCM_6345_OHCI0_BASE, + [RSET_OHCI_PRIV] = BCM_6345_OHCI_PRIV_BASE, + [RSET_USBH_PRIV] = BCM_6345_USBH_PRIV_BASE, + [RSET_MPI] = BCM_6345_MPI_BASE, + [RSET_PCMCIA] = BCM_6345_PCMCIA_BASE, + [RSET_DSL] = BCM_6345_DSL_BASE, + [RSET_ENET0] = BCM_6345_ENET0_BASE, + [RSET_ENET1] = BCM_6345_ENET1_BASE, + [RSET_ENETDMA] = BCM_6345_ENETDMA_BASE, + [RSET_EHCI0] = BCM_6345_EHCI0_BASE, + [RSET_SDRAM] = BCM_6345_SDRAM_BASE, + [RSET_MEMC] = BCM_6345_MEMC_BASE, + [RSET_DDR] = BCM_6345_DDR_BASE, +}; + +static const int bcm96345_irqs[] = { + [IRQ_TIMER] = BCM_6345_TIMER_IRQ, + [IRQ_UART0] = BCM_6345_UART0_IRQ, + [IRQ_DSL] = BCM_6345_DSL_IRQ, + [IRQ_ENET0] = BCM_6345_ENET0_IRQ, + [IRQ_ENET_PHY] = BCM_6345_ENET_PHY_IRQ, + [IRQ_ENET0_RXDMA] = BCM_6345_ENET0_RXDMA_IRQ, + [IRQ_ENET0_TXDMA] = BCM_6345_ENET0_TXDMA_IRQ, +}; + +/* + * 6348 register sets and irqs + */ +static const unsigned long bcm96348_regs_base[] = { + [RSET_DSL_LMEM] = BCM_6348_DSL_LMEM_BASE, + [RSET_PERF] = BCM_6348_PERF_BASE, + [RSET_TIMER] = BCM_6348_TIMER_BASE, + [RSET_WDT] = BCM_6348_WDT_BASE, + [RSET_UART0] = BCM_6348_UART0_BASE, + [RSET_GPIO] = BCM_6348_GPIO_BASE, + [RSET_SPI] = BCM_6348_SPI_BASE, + [RSET_OHCI0] = BCM_6348_OHCI0_BASE, + [RSET_OHCI_PRIV] = BCM_6348_OHCI_PRIV_BASE, + [RSET_USBH_PRIV] = BCM_6348_USBH_PRIV_BASE, + [RSET_MPI] = BCM_6348_MPI_BASE, + [RSET_PCMCIA] = BCM_6348_PCMCIA_BASE, + [RSET_SDRAM] = BCM_6348_SDRAM_BASE, + [RSET_DSL] = BCM_6348_DSL_BASE, + [RSET_ENET0] = BCM_6348_ENET0_BASE, + [RSET_ENET1] = BCM_6348_ENET1_BASE, + [RSET_ENETDMA] = BCM_6348_ENETDMA_BASE, + [RSET_MEMC] = BCM_6348_MEMC_BASE, + [RSET_DDR] = BCM_6348_DDR_BASE, +}; + +static const int bcm96348_irqs[] = { + [IRQ_TIMER] = BCM_6348_TIMER_IRQ, + [IRQ_UART0] = BCM_6348_UART0_IRQ, + [IRQ_DSL] = BCM_6348_DSL_IRQ, + [IRQ_ENET0] = BCM_6348_ENET0_IRQ, + [IRQ_ENET1] = BCM_6348_ENET1_IRQ, + [IRQ_ENET_PHY] = BCM_6348_ENET_PHY_IRQ, + [IRQ_OHCI0] = BCM_6348_OHCI0_IRQ, + [IRQ_PCMCIA] = BCM_6348_PCMCIA_IRQ, + [IRQ_ENET0_RXDMA] = BCM_6348_ENET0_RXDMA_IRQ, + [IRQ_ENET0_TXDMA] = BCM_6348_ENET0_TXDMA_IRQ, + [IRQ_ENET1_RXDMA] = BCM_6348_ENET1_RXDMA_IRQ, + [IRQ_ENET1_TXDMA] = BCM_6348_ENET1_TXDMA_IRQ, + [IRQ_PCI] = BCM_6348_PCI_IRQ, +}; + +/* + * 6358 register sets and irqs + */ +static const unsigned long bcm96358_regs_base[] = { + [RSET_DSL_LMEM] = BCM_6358_DSL_LMEM_BASE, + [RSET_PERF] = BCM_6358_PERF_BASE, + [RSET_TIMER] = BCM_6358_TIMER_BASE, + [RSET_WDT] = BCM_6358_WDT_BASE, + [RSET_UART0] = BCM_6358_UART0_BASE, + [RSET_GPIO] = BCM_6358_GPIO_BASE, + [RSET_SPI] = BCM_6358_SPI_BASE, + [RSET_OHCI0] = BCM_6358_OHCI0_BASE, + [RSET_EHCI0] = BCM_6358_EHCI0_BASE, + [RSET_OHCI_PRIV] = BCM_6358_OHCI_PRIV_BASE, + [RSET_USBH_PRIV] = BCM_6358_USBH_PRIV_BASE, + [RSET_MPI] = BCM_6358_MPI_BASE, + [RSET_PCMCIA] = BCM_6358_PCMCIA_BASE, + [RSET_SDRAM] = BCM_6358_SDRAM_BASE, + [RSET_DSL] = BCM_6358_DSL_BASE, + [RSET_ENET0] = BCM_6358_ENET0_BASE, + [RSET_ENET1] = BCM_6358_ENET1_BASE, + [RSET_ENETDMA] = BCM_6358_ENETDMA_BASE, + [RSET_MEMC] = BCM_6358_MEMC_BASE, + [RSET_DDR] = BCM_6358_DDR_BASE, +}; + +static const int bcm96358_irqs[] = { + [IRQ_TIMER] = BCM_6358_TIMER_IRQ, + [IRQ_UART0] = BCM_6358_UART0_IRQ, + [IRQ_DSL] = BCM_6358_DSL_IRQ, + [IRQ_ENET0] = BCM_6358_ENET0_IRQ, + [IRQ_ENET1] = BCM_6358_ENET1_IRQ, + [IRQ_ENET_PHY] = BCM_6358_ENET_PHY_IRQ, + [IRQ_OHCI0] = BCM_6358_OHCI0_IRQ, + [IRQ_EHCI0] = BCM_6358_EHCI0_IRQ, + [IRQ_PCMCIA] = BCM_6358_PCMCIA_IRQ, + [IRQ_ENET0_RXDMA] = BCM_6358_ENET0_RXDMA_IRQ, + [IRQ_ENET0_TXDMA] = BCM_6358_ENET0_TXDMA_IRQ, + [IRQ_ENET1_RXDMA] = BCM_6358_ENET1_RXDMA_IRQ, + [IRQ_ENET1_TXDMA] = BCM_6358_ENET1_TXDMA_IRQ, + [IRQ_PCI] = BCM_6358_PCI_IRQ, +}; + +u16 __bcm63xx_get_cpu_id(void) +{ + return bcm63xx_cpu_id; +} + +EXPORT_SYMBOL(__bcm63xx_get_cpu_id); + +u16 bcm63xx_get_cpu_rev(void) +{ + return bcm63xx_cpu_rev; +} + +EXPORT_SYMBOL(bcm63xx_get_cpu_rev); + +unsigned int bcm63xx_get_cpu_freq(void) +{ + return bcm63xx_cpu_freq; +} + +unsigned int bcm63xx_get_memory_size(void) +{ + return bcm63xx_memory_size; +} + +static unsigned int detect_cpu_clock(void) +{ + unsigned int tmp, n1 = 0, n2 = 0, m1 = 0; + + /* BCM6338 has a fixed 240 Mhz frequency */ + if (BCMCPU_IS_6338()) + return 240000000; + + /* BCM6345 has a fixed 140Mhz frequency */ + if (BCMCPU_IS_6345()) + return 140000000; + + /* + * frequency depends on PLL configuration: + */ + if (BCMCPU_IS_6348()) { + /* 16MHz * (N1 + 1) * (N2 + 2) / (M1_CPU + 1) */ + tmp = bcm_perf_readl(PERF_MIPSPLLCTL_REG); + n1 = (tmp & MIPSPLLCTL_N1_MASK) >> MIPSPLLCTL_N1_SHIFT; + n2 = (tmp & MIPSPLLCTL_N2_MASK) >> MIPSPLLCTL_N2_SHIFT; + m1 = (tmp & MIPSPLLCTL_M1CPU_MASK) >> MIPSPLLCTL_M1CPU_SHIFT; + n1 += 1; + n2 += 2; + m1 += 1; + } + + if (BCMCPU_IS_6358()) { + /* 16MHz * N1 * N2 / M1_CPU */ + tmp = bcm_ddr_readl(DDR_DMIPSPLLCFG_REG); + n1 = (tmp & DMIPSPLLCFG_N1_MASK) >> DMIPSPLLCFG_N1_SHIFT; + n2 = (tmp & DMIPSPLLCFG_N2_MASK) >> DMIPSPLLCFG_N2_SHIFT; + m1 = (tmp & DMIPSPLLCFG_M1_MASK) >> DMIPSPLLCFG_M1_SHIFT; + } + + return (16 * 1000000 * n1 * n2) / m1; +} + +/* + * attempt to detect the amount of memory installed + */ +static unsigned int detect_memory_size(void) +{ + unsigned int cols = 0, rows = 0, is_32bits = 0, banks = 0; + u32 val; + + if (BCMCPU_IS_6345()) + return (8 * 1024 * 1024); + + if (BCMCPU_IS_6338() || BCMCPU_IS_6348()) { + val = bcm_sdram_readl(SDRAM_CFG_REG); + rows = (val & SDRAM_CFG_ROW_MASK) >> SDRAM_CFG_ROW_SHIFT; + cols = (val & SDRAM_CFG_COL_MASK) >> SDRAM_CFG_COL_SHIFT; + is_32bits = (val & SDRAM_CFG_32B_MASK) ? 1 : 0; + banks = (val & SDRAM_CFG_BANK_MASK) ? 2 : 1; + } + + if (BCMCPU_IS_6358()) { + val = bcm_memc_readl(MEMC_CFG_REG); + rows = (val & MEMC_CFG_ROW_MASK) >> MEMC_CFG_ROW_SHIFT; + cols = (val & MEMC_CFG_COL_MASK) >> MEMC_CFG_COL_SHIFT; + is_32bits = (val & MEMC_CFG_32B_MASK) ? 0 : 1; + banks = 2; + } + + /* 0 => 11 address bits ... 2 => 13 address bits */ + rows += 11; + + /* 0 => 8 address bits ... 2 => 10 address bits */ + cols += 8; + + return 1 << (cols + rows + (is_32bits + 1) + banks); +} + +void __init bcm63xx_cpu_init(void) +{ + unsigned int tmp, expected_cpu_id; + struct cpuinfo_mips *c = ¤t_cpu_data; + + /* soc registers location depends on cpu type */ + expected_cpu_id = 0; + + switch (c->cputype) { + /* + * BCM6338 as the same PrId as BCM3302 see arch/mips/kernel/cpu-probe.c + */ + case CPU_BCM3302: + expected_cpu_id = BCM6338_CPU_ID; + bcm63xx_regs_base = bcm96338_regs_base; + bcm63xx_irqs = bcm96338_irqs; + break; + case CPU_BCM6345: + expected_cpu_id = BCM6345_CPU_ID; + bcm63xx_regs_base = bcm96345_regs_base; + bcm63xx_irqs = bcm96345_irqs; + break; + case CPU_BCM6348: + expected_cpu_id = BCM6348_CPU_ID; + bcm63xx_regs_base = bcm96348_regs_base; + bcm63xx_irqs = bcm96348_irqs; + break; + case CPU_BCM6358: + expected_cpu_id = BCM6358_CPU_ID; + bcm63xx_regs_base = bcm96358_regs_base; + bcm63xx_irqs = bcm96358_irqs; + break; + } + + /* + * really early to panic, but delaying panic would not help since we + * will never get any working console + */ + if (!expected_cpu_id) + panic("unsupported Broadcom CPU"); + + /* + * bcm63xx_regs_base is set, we can access soc registers + */ + + /* double check CPU type */ + tmp = bcm_perf_readl(PERF_REV_REG); + bcm63xx_cpu_id = (tmp & REV_CHIPID_MASK) >> REV_CHIPID_SHIFT; + bcm63xx_cpu_rev = (tmp & REV_REVID_MASK) >> REV_REVID_SHIFT; + + if (bcm63xx_cpu_id != expected_cpu_id) + panic("bcm63xx CPU id mismatch"); + + bcm63xx_cpu_freq = detect_cpu_clock(); + bcm63xx_memory_size = detect_memory_size(); + + printk(KERN_INFO "Detected Broadcom 0x%04x CPU revision %02x\n", + bcm63xx_cpu_id, bcm63xx_cpu_rev); + printk(KERN_INFO "CPU frequency is %u MHz\n", + bcm63xx_cpu_freq / 1000000); + printk(KERN_INFO "%uMB of RAM installed\n", + bcm63xx_memory_size >> 20); +} diff --git a/arch/mips/bcm63xx/cs.c b/arch/mips/bcm63xx/cs.c new file mode 100644 index 00000000000..50d8190bbf7 --- /dev/null +++ b/arch/mips/bcm63xx/cs.c @@ -0,0 +1,144 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(bcm63xx_cs_lock); + +/* + * check if given chip select exists + */ +static int is_valid_cs(unsigned int cs) +{ + if (cs > 6) + return 0; + return 1; +} + +/* + * Configure chipselect base address and size (bytes). + * Size must be a power of two between 8k and 256M. + */ +int bcm63xx_set_cs_base(unsigned int cs, u32 base, unsigned int size) +{ + unsigned long flags; + u32 val; + + if (!is_valid_cs(cs)) + return -EINVAL; + + /* sanity check on size */ + if (size != roundup_pow_of_two(size)) + return -EINVAL; + + if (size < 8 * 1024 || size > 256 * 1024 * 1024) + return -EINVAL; + + val = (base & MPI_CSBASE_BASE_MASK); + /* 8k => 0 - 256M => 15 */ + val |= (ilog2(size) - ilog2(8 * 1024)) << MPI_CSBASE_SIZE_SHIFT; + + spin_lock_irqsave(&bcm63xx_cs_lock, flags); + bcm_mpi_writel(val, MPI_CSBASE_REG(cs)); + spin_unlock_irqrestore(&bcm63xx_cs_lock, flags); + + return 0; +} + +EXPORT_SYMBOL(bcm63xx_set_cs_base); + +/* + * configure chipselect timing (ns) + */ +int bcm63xx_set_cs_timing(unsigned int cs, unsigned int wait, + unsigned int setup, unsigned int hold) +{ + unsigned long flags; + u32 val; + + if (!is_valid_cs(cs)) + return -EINVAL; + + spin_lock_irqsave(&bcm63xx_cs_lock, flags); + val = bcm_mpi_readl(MPI_CSCTL_REG(cs)); + val &= ~(MPI_CSCTL_WAIT_MASK); + val &= ~(MPI_CSCTL_SETUP_MASK); + val &= ~(MPI_CSCTL_HOLD_MASK); + val |= wait << MPI_CSCTL_WAIT_SHIFT; + val |= setup << MPI_CSCTL_SETUP_SHIFT; + val |= hold << MPI_CSCTL_HOLD_SHIFT; + bcm_mpi_writel(val, MPI_CSCTL_REG(cs)); + spin_unlock_irqrestore(&bcm63xx_cs_lock, flags); + + return 0; +} + +EXPORT_SYMBOL(bcm63xx_set_cs_timing); + +/* + * configure other chipselect parameter (data bus size, ...) + */ +int bcm63xx_set_cs_param(unsigned int cs, u32 params) +{ + unsigned long flags; + u32 val; + + if (!is_valid_cs(cs)) + return -EINVAL; + + /* none of this fields apply to pcmcia */ + if (cs == MPI_CS_PCMCIA_COMMON || + cs == MPI_CS_PCMCIA_ATTR || + cs == MPI_CS_PCMCIA_IO) + return -EINVAL; + + spin_lock_irqsave(&bcm63xx_cs_lock, flags); + val = bcm_mpi_readl(MPI_CSCTL_REG(cs)); + val &= ~(MPI_CSCTL_DATA16_MASK); + val &= ~(MPI_CSCTL_SYNCMODE_MASK); + val &= ~(MPI_CSCTL_TSIZE_MASK); + val &= ~(MPI_CSCTL_ENDIANSWAP_MASK); + val |= params; + bcm_mpi_writel(val, MPI_CSCTL_REG(cs)); + spin_unlock_irqrestore(&bcm63xx_cs_lock, flags); + + return 0; +} + +EXPORT_SYMBOL(bcm63xx_set_cs_param); + +/* + * set cs status (enable/disable) + */ +int bcm63xx_set_cs_status(unsigned int cs, int enable) +{ + unsigned long flags; + u32 val; + + if (!is_valid_cs(cs)) + return -EINVAL; + + spin_lock_irqsave(&bcm63xx_cs_lock, flags); + val = bcm_mpi_readl(MPI_CSCTL_REG(cs)); + if (enable) + val |= MPI_CSCTL_ENABLE_MASK; + else + val &= ~MPI_CSCTL_ENABLE_MASK; + bcm_mpi_writel(val, MPI_CSCTL_REG(cs)); + spin_unlock_irqrestore(&bcm63xx_cs_lock, flags); + return 0; +} + +EXPORT_SYMBOL(bcm63xx_set_cs_status); diff --git a/arch/mips/bcm63xx/dev-dsp.c b/arch/mips/bcm63xx/dev-dsp.c new file mode 100644 index 00000000000..da46d1d3c77 --- /dev/null +++ b/arch/mips/bcm63xx/dev-dsp.c @@ -0,0 +1,56 @@ +/* + * Broadcom BCM63xx VoIP DSP registration + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2009 Florian Fainelli + */ + +#include +#include +#include + +#include +#include +#include +#include + +static struct resource voip_dsp_resources[] = { + { + .start = -1, /* filled at runtime */ + .end = -1, /* filled at runtime */ + .flags = IORESOURCE_MEM, + }, + { + .start = -1, /* filled at runtime */ + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device bcm63xx_voip_dsp_device = { + .name = "bcm63xx-voip-dsp", + .id = 0, + .num_resources = ARRAY_SIZE(voip_dsp_resources), + .resource = voip_dsp_resources, +}; + +int __init bcm63xx_dsp_register(const struct bcm63xx_dsp_platform_data *pd) +{ + struct bcm63xx_dsp_platform_data *dpd; + u32 val; + + /* Get the memory window */ + val = bcm_mpi_readl(MPI_CSBASE_REG(pd->cs - 1)); + val &= MPI_CSBASE_BASE_MASK; + voip_dsp_resources[0].start = val; + voip_dsp_resources[0].end = val + 0xFFFFFFF; + voip_dsp_resources[1].start = pd->ext_irq; + + /* copy given platform data */ + dpd = bcm63xx_voip_dsp_device.dev.platform_data; + memcpy(dpd, pd, sizeof (*pd)); + + return platform_device_register(&bcm63xx_voip_dsp_device); +} diff --git a/arch/mips/bcm63xx/early_printk.c b/arch/mips/bcm63xx/early_printk.c new file mode 100644 index 00000000000..bf353c937df --- /dev/null +++ b/arch/mips/bcm63xx/early_printk.c @@ -0,0 +1,30 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + */ + +#include +#include +#include + +static void __init wait_xfered(void) +{ + unsigned int val; + + /* wait for any previous char to be transmitted */ + do { + val = bcm_uart0_readl(UART_IR_REG); + if (val & UART_IR_STAT(UART_IR_TXEMPTY)) + break; + } while (1); +} + +void __init prom_putchar(char c) +{ + wait_xfered(); + bcm_uart0_writel(c, UART_FIFO_REG); + wait_xfered(); +} diff --git a/arch/mips/bcm63xx/gpio.c b/arch/mips/bcm63xx/gpio.c new file mode 100644 index 00000000000..87ca3904633 --- /dev/null +++ b/arch/mips/bcm63xx/gpio.c @@ -0,0 +1,134 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + * Copyright (C) 2008 Florian Fainelli + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_SPINLOCK(bcm63xx_gpio_lock); +static u32 gpio_out_low, gpio_out_high; + +static void bcm63xx_gpio_set(struct gpio_chip *chip, + unsigned gpio, int val) +{ + u32 reg; + u32 mask; + u32 *v; + unsigned long flags; + + if (gpio >= chip->ngpio) + BUG(); + + if (gpio < 32) { + reg = GPIO_DATA_LO_REG; + mask = 1 << gpio; + v = &gpio_out_low; + } else { + reg = GPIO_DATA_HI_REG; + mask = 1 << (gpio - 32); + v = &gpio_out_high; + } + + spin_lock_irqsave(&bcm63xx_gpio_lock, flags); + if (val) + *v |= mask; + else + *v &= ~mask; + bcm_gpio_writel(*v, reg); + spin_unlock_irqrestore(&bcm63xx_gpio_lock, flags); +} + +static int bcm63xx_gpio_get(struct gpio_chip *chip, unsigned gpio) +{ + u32 reg; + u32 mask; + + if (gpio >= chip->ngpio) + BUG(); + + if (gpio < 32) { + reg = GPIO_DATA_LO_REG; + mask = 1 << gpio; + } else { + reg = GPIO_DATA_HI_REG; + mask = 1 << (gpio - 32); + } + + return !!(bcm_gpio_readl(reg) & mask); +} + +static int bcm63xx_gpio_set_direction(struct gpio_chip *chip, + unsigned gpio, int dir) +{ + u32 reg; + u32 mask; + u32 tmp; + unsigned long flags; + + if (gpio >= chip->ngpio) + BUG(); + + if (gpio < 32) { + reg = GPIO_CTL_LO_REG; + mask = 1 << gpio; + } else { + reg = GPIO_CTL_HI_REG; + mask = 1 << (gpio - 32); + } + + spin_lock_irqsave(&bcm63xx_gpio_lock, flags); + tmp = bcm_gpio_readl(reg); + if (dir == GPIO_DIR_IN) + tmp &= ~mask; + else + tmp |= mask; + bcm_gpio_writel(tmp, reg); + spin_unlock_irqrestore(&bcm63xx_gpio_lock, flags); + + return 0; +} + +static int bcm63xx_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) +{ + return bcm63xx_gpio_set_direction(chip, gpio, GPIO_DIR_IN); +} + +static int bcm63xx_gpio_direction_output(struct gpio_chip *chip, + unsigned gpio, int value) +{ + bcm63xx_gpio_set(chip, gpio, value); + return bcm63xx_gpio_set_direction(chip, gpio, GPIO_DIR_OUT); +} + + +static struct gpio_chip bcm63xx_gpio_chip = { + .label = "bcm63xx-gpio", + .direction_input = bcm63xx_gpio_direction_input, + .direction_output = bcm63xx_gpio_direction_output, + .get = bcm63xx_gpio_get, + .set = bcm63xx_gpio_set, + .base = 0, +}; + +int __init bcm63xx_gpio_init(void) +{ + bcm63xx_gpio_chip.ngpio = bcm63xx_gpio_count(); + pr_info("registering %d GPIOs\n", bcm63xx_gpio_chip.ngpio); + + return gpiochip_add(&bcm63xx_gpio_chip); +} + +arch_initcall(bcm63xx_gpio_init); diff --git a/arch/mips/bcm63xx/irq.c b/arch/mips/bcm63xx/irq.c new file mode 100644 index 00000000000..a0c5cd18c19 --- /dev/null +++ b/arch/mips/bcm63xx/irq.c @@ -0,0 +1,253 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + * Copyright (C) 2008 Nicolas Schichan + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * dispatch internal devices IRQ (uart, enet, watchdog, ...). do not + * prioritize any interrupt relatively to another. the static counter + * will resume the loop where it ended the last time we left this + * function. + */ +static void bcm63xx_irq_dispatch_internal(void) +{ + u32 pending; + static int i; + + pending = bcm_perf_readl(PERF_IRQMASK_REG) & + bcm_perf_readl(PERF_IRQSTAT_REG); + + if (!pending) + return ; + + while (1) { + int to_call = i; + + i = (i + 1) & 0x1f; + if (pending & (1 << to_call)) { + do_IRQ(to_call + IRQ_INTERNAL_BASE); + break; + } + } +} + +asmlinkage void plat_irq_dispatch(void) +{ + u32 cause; + + do { + cause = read_c0_cause() & read_c0_status() & ST0_IM; + + if (!cause) + break; + + if (cause & CAUSEF_IP7) + do_IRQ(7); + if (cause & CAUSEF_IP2) + bcm63xx_irq_dispatch_internal(); + if (cause & CAUSEF_IP3) + do_IRQ(IRQ_EXT_0); + if (cause & CAUSEF_IP4) + do_IRQ(IRQ_EXT_1); + if (cause & CAUSEF_IP5) + do_IRQ(IRQ_EXT_2); + if (cause & CAUSEF_IP6) + do_IRQ(IRQ_EXT_3); + } while (1); +} + +/* + * internal IRQs operations: only mask/unmask on PERF irq mask + * register. + */ +static inline void bcm63xx_internal_irq_mask(unsigned int irq) +{ + u32 mask; + + irq -= IRQ_INTERNAL_BASE; + mask = bcm_perf_readl(PERF_IRQMASK_REG); + mask &= ~(1 << irq); + bcm_perf_writel(mask, PERF_IRQMASK_REG); +} + +static void bcm63xx_internal_irq_unmask(unsigned int irq) +{ + u32 mask; + + irq -= IRQ_INTERNAL_BASE; + mask = bcm_perf_readl(PERF_IRQMASK_REG); + mask |= (1 << irq); + bcm_perf_writel(mask, PERF_IRQMASK_REG); +} + +static unsigned int bcm63xx_internal_irq_startup(unsigned int irq) +{ + bcm63xx_internal_irq_unmask(irq); + return 0; +} + +/* + * external IRQs operations: mask/unmask and clear on PERF external + * irq control register. + */ +static void bcm63xx_external_irq_mask(unsigned int irq) +{ + u32 reg; + + irq -= IRQ_EXT_BASE; + reg = bcm_perf_readl(PERF_EXTIRQ_CFG_REG); + reg &= ~EXTIRQ_CFG_MASK(irq); + bcm_perf_writel(reg, PERF_EXTIRQ_CFG_REG); +} + +static void bcm63xx_external_irq_unmask(unsigned int irq) +{ + u32 reg; + + irq -= IRQ_EXT_BASE; + reg = bcm_perf_readl(PERF_EXTIRQ_CFG_REG); + reg |= EXTIRQ_CFG_MASK(irq); + bcm_perf_writel(reg, PERF_EXTIRQ_CFG_REG); +} + +static void bcm63xx_external_irq_clear(unsigned int irq) +{ + u32 reg; + + irq -= IRQ_EXT_BASE; + reg = bcm_perf_readl(PERF_EXTIRQ_CFG_REG); + reg |= EXTIRQ_CFG_CLEAR(irq); + bcm_perf_writel(reg, PERF_EXTIRQ_CFG_REG); +} + +static unsigned int bcm63xx_external_irq_startup(unsigned int irq) +{ + set_c0_status(0x100 << (irq - IRQ_MIPS_BASE)); + irq_enable_hazard(); + bcm63xx_external_irq_unmask(irq); + return 0; +} + +static void bcm63xx_external_irq_shutdown(unsigned int irq) +{ + bcm63xx_external_irq_mask(irq); + clear_c0_status(0x100 << (irq - IRQ_MIPS_BASE)); + irq_disable_hazard(); +} + +static int bcm63xx_external_irq_set_type(unsigned int irq, + unsigned int flow_type) +{ + u32 reg; + struct irq_desc *desc = irq_desc + irq; + + irq -= IRQ_EXT_BASE; + + flow_type &= IRQ_TYPE_SENSE_MASK; + + if (flow_type == IRQ_TYPE_NONE) + flow_type = IRQ_TYPE_LEVEL_LOW; + + reg = bcm_perf_readl(PERF_EXTIRQ_CFG_REG); + switch (flow_type) { + case IRQ_TYPE_EDGE_BOTH: + reg &= ~EXTIRQ_CFG_LEVELSENSE(irq); + reg |= EXTIRQ_CFG_BOTHEDGE(irq); + break; + + case IRQ_TYPE_EDGE_RISING: + reg &= ~EXTIRQ_CFG_LEVELSENSE(irq); + reg |= EXTIRQ_CFG_SENSE(irq); + reg &= ~EXTIRQ_CFG_BOTHEDGE(irq); + break; + + case IRQ_TYPE_EDGE_FALLING: + reg &= ~EXTIRQ_CFG_LEVELSENSE(irq); + reg &= ~EXTIRQ_CFG_SENSE(irq); + reg &= ~EXTIRQ_CFG_BOTHEDGE(irq); + break; + + case IRQ_TYPE_LEVEL_HIGH: + reg |= EXTIRQ_CFG_LEVELSENSE(irq); + reg |= EXTIRQ_CFG_SENSE(irq); + break; + + case IRQ_TYPE_LEVEL_LOW: + reg |= EXTIRQ_CFG_LEVELSENSE(irq); + reg &= ~EXTIRQ_CFG_SENSE(irq); + break; + + default: + printk(KERN_ERR "bogus flow type combination given !\n"); + return -EINVAL; + } + bcm_perf_writel(reg, PERF_EXTIRQ_CFG_REG); + + if (flow_type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) { + desc->status |= IRQ_LEVEL; + desc->handle_irq = handle_level_irq; + } else { + desc->handle_irq = handle_edge_irq; + } + + return 0; +} + +static struct irq_chip bcm63xx_internal_irq_chip = { + .name = "bcm63xx_ipic", + .startup = bcm63xx_internal_irq_startup, + .shutdown = bcm63xx_internal_irq_mask, + + .mask = bcm63xx_internal_irq_mask, + .mask_ack = bcm63xx_internal_irq_mask, + .unmask = bcm63xx_internal_irq_unmask, +}; + +static struct irq_chip bcm63xx_external_irq_chip = { + .name = "bcm63xx_epic", + .startup = bcm63xx_external_irq_startup, + .shutdown = bcm63xx_external_irq_shutdown, + + .ack = bcm63xx_external_irq_clear, + + .mask = bcm63xx_external_irq_mask, + .unmask = bcm63xx_external_irq_unmask, + + .set_type = bcm63xx_external_irq_set_type, +}; + +static struct irqaction cpu_ip2_cascade_action = { + .handler = no_action, + .name = "cascade_ip2", +}; + +void __init arch_init_irq(void) +{ + int i; + + mips_cpu_irq_init(); + for (i = IRQ_INTERNAL_BASE; i < NR_IRQS; ++i) + set_irq_chip_and_handler(i, &bcm63xx_internal_irq_chip, + handle_level_irq); + + for (i = IRQ_EXT_BASE; i < IRQ_EXT_BASE + 4; ++i) + set_irq_chip_and_handler(i, &bcm63xx_external_irq_chip, + handle_edge_irq); + + setup_irq(IRQ_MIPS_BASE + 2, &cpu_ip2_cascade_action); +} diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c new file mode 100644 index 00000000000..fb284fbc585 --- /dev/null +++ b/arch/mips/bcm63xx/prom.c @@ -0,0 +1,55 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +void __init prom_init(void) +{ + u32 reg, mask; + + bcm63xx_cpu_init(); + + /* stop any running watchdog */ + bcm_wdt_writel(WDT_STOP_1, WDT_CTL_REG); + bcm_wdt_writel(WDT_STOP_2, WDT_CTL_REG); + + /* disable all hardware blocks clock for now */ + if (BCMCPU_IS_6338()) + mask = CKCTL_6338_ALL_SAFE_EN; + else if (BCMCPU_IS_6345()) + mask = CKCTL_6345_ALL_SAFE_EN; + else if (BCMCPU_IS_6348()) + mask = CKCTL_6348_ALL_SAFE_EN; + else + /* BCMCPU_IS_6358() */ + mask = CKCTL_6358_ALL_SAFE_EN; + + reg = bcm_perf_readl(PERF_CKCTL_REG); + reg &= ~mask; + bcm_perf_writel(reg, PERF_CKCTL_REG); + + /* assign command line from kernel config */ + strcpy(arcs_cmdline, CONFIG_CMDLINE); + + /* register gpiochip */ + bcm63xx_gpio_init(); + + /* do low level board init */ + board_prom_init(); +} + +void __init prom_free_prom_memory(void) +{ +} diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c new file mode 100644 index 00000000000..b18a0ca926f --- /dev/null +++ b/arch/mips/bcm63xx/setup.c @@ -0,0 +1,125 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void bcm63xx_machine_halt(void) +{ + printk(KERN_INFO "System halted\n"); + while (1) + ; +} + +static void bcm6348_a1_reboot(void) +{ + u32 reg; + + /* soft reset all blocks */ + printk(KERN_INFO "soft-reseting all blocks ...\n"); + reg = bcm_perf_readl(PERF_SOFTRESET_REG); + reg &= ~SOFTRESET_6348_ALL; + bcm_perf_writel(reg, PERF_SOFTRESET_REG); + mdelay(10); + + reg = bcm_perf_readl(PERF_SOFTRESET_REG); + reg |= SOFTRESET_6348_ALL; + bcm_perf_writel(reg, PERF_SOFTRESET_REG); + mdelay(10); + + /* Jump to the power on address. */ + printk(KERN_INFO "jumping to reset vector.\n"); + /* set high vectors (base at 0xbfc00000 */ + set_c0_status(ST0_BEV | ST0_ERL); + /* run uncached in kseg0 */ + change_c0_config(CONF_CM_CMASK, CONF_CM_UNCACHED); + __flush_cache_all(); + /* remove all wired TLB entries */ + write_c0_wired(0); + __asm__ __volatile__( + "jr\t%0" + : + : "r" (0xbfc00000)); + while (1) + ; +} + +void bcm63xx_machine_reboot(void) +{ + u32 reg; + + /* mask and clear all external irq */ + reg = bcm_perf_readl(PERF_EXTIRQ_CFG_REG); + reg &= ~EXTIRQ_CFG_MASK_ALL; + reg |= EXTIRQ_CFG_CLEAR_ALL; + bcm_perf_writel(reg, PERF_EXTIRQ_CFG_REG); + + if (BCMCPU_IS_6348() && (bcm63xx_get_cpu_rev() == 0xa1)) + bcm6348_a1_reboot(); + + printk(KERN_INFO "triggering watchdog soft-reset...\n"); + bcm_perf_writel(SYS_PLL_SOFT_RESET, PERF_SYS_PLL_CTL_REG); + while (1) + ; +} + +static void __bcm63xx_machine_reboot(char *p) +{ + bcm63xx_machine_reboot(); +} + +/* + * return system type in /proc/cpuinfo + */ +const char *get_system_type(void) +{ + static char buf[128]; + snprintf(buf, sizeof(buf), "bcm63xx/%s (0x%04x/0x%04X)", + board_get_name(), + bcm63xx_get_cpu_id(), bcm63xx_get_cpu_rev()); + return buf; +} + +void __init plat_time_init(void) +{ + mips_hpt_frequency = bcm63xx_get_cpu_freq() / 2; +} + +void __init plat_mem_setup(void) +{ + add_memory_region(0, bcm63xx_get_memory_size(), BOOT_MEM_RAM); + + _machine_halt = bcm63xx_machine_halt; + _machine_restart = __bcm63xx_machine_reboot; + pm_power_off = bcm63xx_machine_halt; + + set_io_port_base(0); + ioport_resource.start = 0; + ioport_resource.end = ~0; + + board_setup(); +} + +int __init bcm63xx_register_devices(void) +{ + return board_register_devices(); +} + +arch_initcall(bcm63xx_register_devices); diff --git a/arch/mips/bcm63xx/timer.c b/arch/mips/bcm63xx/timer.c new file mode 100644 index 00000000000..ba522bdcde4 --- /dev/null +++ b/arch/mips/bcm63xx/timer.c @@ -0,0 +1,205 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(timer_reg_lock); +static DEFINE_SPINLOCK(timer_data_lock); +static struct clk *periph_clk; + +static struct timer_data { + void (*cb)(void *); + void *data; +} timer_data[BCM63XX_TIMER_COUNT]; + +static irqreturn_t timer_interrupt(int irq, void *dev_id) +{ + u32 stat; + int i; + + spin_lock(&timer_reg_lock); + stat = bcm_timer_readl(TIMER_IRQSTAT_REG); + bcm_timer_writel(stat, TIMER_IRQSTAT_REG); + spin_unlock(&timer_reg_lock); + + for (i = 0; i < BCM63XX_TIMER_COUNT; i++) { + if (!(stat & TIMER_IRQSTAT_TIMER_CAUSE(i))) + continue; + + spin_lock(&timer_data_lock); + if (!timer_data[i].cb) { + spin_unlock(&timer_data_lock); + continue; + } + + timer_data[i].cb(timer_data[i].data); + spin_unlock(&timer_data_lock); + } + + return IRQ_HANDLED; +} + +int bcm63xx_timer_enable(int id) +{ + u32 reg; + unsigned long flags; + + if (id >= BCM63XX_TIMER_COUNT) + return -EINVAL; + + spin_lock_irqsave(&timer_reg_lock, flags); + + reg = bcm_timer_readl(TIMER_CTLx_REG(id)); + reg |= TIMER_CTL_ENABLE_MASK; + bcm_timer_writel(reg, TIMER_CTLx_REG(id)); + + reg = bcm_timer_readl(TIMER_IRQSTAT_REG); + reg |= TIMER_IRQSTAT_TIMER_IR_EN(id); + bcm_timer_writel(reg, TIMER_IRQSTAT_REG); + + spin_unlock_irqrestore(&timer_reg_lock, flags); + return 0; +} + +EXPORT_SYMBOL(bcm63xx_timer_enable); + +int bcm63xx_timer_disable(int id) +{ + u32 reg; + unsigned long flags; + + if (id >= BCM63XX_TIMER_COUNT) + return -EINVAL; + + spin_lock_irqsave(&timer_reg_lock, flags); + + reg = bcm_timer_readl(TIMER_CTLx_REG(id)); + reg &= ~TIMER_CTL_ENABLE_MASK; + bcm_timer_writel(reg, TIMER_CTLx_REG(id)); + + reg = bcm_timer_readl(TIMER_IRQSTAT_REG); + reg &= ~TIMER_IRQSTAT_TIMER_IR_EN(id); + bcm_timer_writel(reg, TIMER_IRQSTAT_REG); + + spin_unlock_irqrestore(&timer_reg_lock, flags); + return 0; +} + +EXPORT_SYMBOL(bcm63xx_timer_disable); + +int bcm63xx_timer_register(int id, void (*callback)(void *data), void *data) +{ + unsigned long flags; + int ret; + + if (id >= BCM63XX_TIMER_COUNT || !callback) + return -EINVAL; + + ret = 0; + spin_lock_irqsave(&timer_data_lock, flags); + if (timer_data[id].cb) { + ret = -EBUSY; + goto out; + } + + timer_data[id].cb = callback; + timer_data[id].data = data; + +out: + spin_unlock_irqrestore(&timer_data_lock, flags); + return ret; +} + +EXPORT_SYMBOL(bcm63xx_timer_register); + +void bcm63xx_timer_unregister(int id) +{ + unsigned long flags; + + if (id >= BCM63XX_TIMER_COUNT) + return; + + spin_lock_irqsave(&timer_data_lock, flags); + timer_data[id].cb = NULL; + spin_unlock_irqrestore(&timer_data_lock, flags); +} + +EXPORT_SYMBOL(bcm63xx_timer_unregister); + +unsigned int bcm63xx_timer_countdown(unsigned int countdown_us) +{ + return (clk_get_rate(periph_clk) / (1000 * 1000)) * countdown_us; +} + +EXPORT_SYMBOL(bcm63xx_timer_countdown); + +int bcm63xx_timer_set(int id, int monotonic, unsigned int countdown_us) +{ + u32 reg, countdown; + unsigned long flags; + + if (id >= BCM63XX_TIMER_COUNT) + return -EINVAL; + + countdown = bcm63xx_timer_countdown(countdown_us); + if (countdown & ~TIMER_CTL_COUNTDOWN_MASK) + return -EINVAL; + + spin_lock_irqsave(&timer_reg_lock, flags); + reg = bcm_timer_readl(TIMER_CTLx_REG(id)); + + if (monotonic) + reg &= ~TIMER_CTL_MONOTONIC_MASK; + else + reg |= TIMER_CTL_MONOTONIC_MASK; + + reg &= ~TIMER_CTL_COUNTDOWN_MASK; + reg |= countdown; + bcm_timer_writel(reg, TIMER_CTLx_REG(id)); + + spin_unlock_irqrestore(&timer_reg_lock, flags); + return 0; +} + +EXPORT_SYMBOL(bcm63xx_timer_set); + +int bcm63xx_timer_init(void) +{ + int ret, irq; + u32 reg; + + reg = bcm_timer_readl(TIMER_IRQSTAT_REG); + reg &= ~TIMER_IRQSTAT_TIMER0_IR_EN; + reg &= ~TIMER_IRQSTAT_TIMER1_IR_EN; + reg &= ~TIMER_IRQSTAT_TIMER2_IR_EN; + bcm_timer_writel(reg, TIMER_IRQSTAT_REG); + + periph_clk = clk_get(NULL, "periph"); + if (IS_ERR(periph_clk)) + return -ENODEV; + + irq = bcm63xx_get_irq_number(IRQ_TIMER); + ret = request_irq(irq, timer_interrupt, 0, "bcm63xx_timer", NULL); + if (ret) { + printk(KERN_ERR "bcm63xx_timer: failed to register irq\n"); + return ret; + } + + return 0; +} + +arch_initcall(bcm63xx_timer_init); diff --git a/arch/mips/configs/bcm63xx_defconfig b/arch/mips/configs/bcm63xx_defconfig new file mode 100644 index 00000000000..ea00c18d1f7 --- /dev/null +++ b/arch/mips/configs/bcm63xx_defconfig @@ -0,0 +1,972 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.30-rc6 +# Sun May 31 20:17:18 2009 +# +CONFIG_MIPS=y + +# +# Machine selection +# +# CONFIG_MACH_ALCHEMY is not set +# CONFIG_BASLER_EXCITE is not set +# CONFIG_BCM47XX is not set +CONFIG_BCM63XX=y +# CONFIG_MIPS_COBALT is not set +# CONFIG_MACH_DECSTATION is not set +# CONFIG_MACH_JAZZ is not set +# CONFIG_LASAT is not set +# CONFIG_LEMOTE_FULONG is not set +# CONFIG_MIPS_MALTA is not set +# CONFIG_MIPS_SIM is not set +# CONFIG_NEC_MARKEINS is not set +# CONFIG_MACH_VR41XX is not set +# CONFIG_NXP_STB220 is not set +# CONFIG_NXP_STB225 is not set +# CONFIG_PNX8550_JBS is not set +# CONFIG_PNX8550_STB810 is not set +# CONFIG_PMC_MSP is not set +# CONFIG_PMC_YOSEMITE is not set +# CONFIG_SGI_IP22 is not set +# CONFIG_SGI_IP27 is not set +# CONFIG_SGI_IP28 is not set +# CONFIG_SGI_IP32 is not set +# CONFIG_SIBYTE_CRHINE is not set +# CONFIG_SIBYTE_CARMEL is not set +# CONFIG_SIBYTE_CRHONE is not set +# CONFIG_SIBYTE_RHONE is not set +# CONFIG_SIBYTE_SWARM is not set +# CONFIG_SIBYTE_LITTLESUR is not set +# CONFIG_SIBYTE_SENTOSA is not set +# CONFIG_SIBYTE_BIGSUR is not set +# CONFIG_SNI_RM is not set +# CONFIG_MACH_TX39XX is not set +# CONFIG_MACH_TX49XX is not set +# CONFIG_MIKROTIK_RB532 is not set +# CONFIG_WR_PPMC is not set +# CONFIG_CAVIUM_OCTEON_SIMULATOR is not set +# CONFIG_CAVIUM_OCTEON_REFERENCE_BOARD is not set + +# +# CPU support +# +CONFIG_BCM63XX_CPU_6348=y +CONFIG_BCM63XX_CPU_6358=y +CONFIG_BOARD_BCM963XX=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_ARCH_SUPPORTS_OPROFILE=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y +CONFIG_SCHED_OMIT_FRAME_POINTER=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_CEVT_R4K_LIB=y +CONFIG_CEVT_R4K=y +CONFIG_CSRC_R4K_LIB=y +CONFIG_CSRC_R4K=y +CONFIG_DMA_NONCOHERENT=y +CONFIG_DMA_NEED_PCI_MAP_STATE=y +CONFIG_EARLY_PRINTK=y +CONFIG_SYS_HAS_EARLY_PRINTK=y +# CONFIG_HOTPLUG_CPU is not set +# CONFIG_NO_IOPORT is not set +CONFIG_GENERIC_GPIO=y +CONFIG_CPU_BIG_ENDIAN=y +# CONFIG_CPU_LITTLE_ENDIAN is not set +CONFIG_SYS_SUPPORTS_BIG_ENDIAN=y +CONFIG_IRQ_CPU=y +CONFIG_SWAP_IO_SPACE=y +CONFIG_MIPS_L1_CACHE_SHIFT=5 + +# +# CPU selection +# +# CONFIG_CPU_LOONGSON2 is not set +CONFIG_CPU_MIPS32_R1=y +# CONFIG_CPU_MIPS32_R2 is not set +# CONFIG_CPU_MIPS64_R1 is not set +# CONFIG_CPU_MIPS64_R2 is not set +# CONFIG_CPU_R3000 is not set +# CONFIG_CPU_TX39XX is not set +# CONFIG_CPU_VR41XX is not set +# CONFIG_CPU_R4300 is not set +# CONFIG_CPU_R4X00 is not set +# CONFIG_CPU_TX49XX is not set +# CONFIG_CPU_R5000 is not set +# CONFIG_CPU_R5432 is not set +# CONFIG_CPU_R5500 is not set +# CONFIG_CPU_R6000 is not set +# CONFIG_CPU_NEVADA is not set +# CONFIG_CPU_R8000 is not set +# CONFIG_CPU_R10000 is not set +# CONFIG_CPU_RM7000 is not set +# CONFIG_CPU_RM9000 is not set +# CONFIG_CPU_SB1 is not set +# CONFIG_CPU_CAVIUM_OCTEON is not set +CONFIG_SYS_HAS_CPU_MIPS32_R1=y +CONFIG_CPU_MIPS32=y +CONFIG_CPU_MIPSR1=y +CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y +CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y +CONFIG_HARDWARE_WATCHPOINTS=y + +# +# Kernel type +# +CONFIG_32BIT=y +# CONFIG_64BIT is not set +CONFIG_PAGE_SIZE_4KB=y +# CONFIG_PAGE_SIZE_8KB is not set +# CONFIG_PAGE_SIZE_16KB is not set +# CONFIG_PAGE_SIZE_32KB is not set +# CONFIG_PAGE_SIZE_64KB is not set +CONFIG_CPU_HAS_PREFETCH=y +CONFIG_MIPS_MT_DISABLED=y +# CONFIG_MIPS_MT_SMP is not set +# CONFIG_MIPS_MT_SMTC is not set +CONFIG_CPU_HAS_LLSC=y +CONFIG_CPU_HAS_SYNC=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_CPU_SUPPORTS_HIGHMEM=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +# CONFIG_HZ_48 is not set +# CONFIG_HZ_100 is not set +# CONFIG_HZ_128 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_256 is not set +# CONFIG_HZ_1000 is not set +# CONFIG_HZ_1024 is not set +CONFIG_SYS_SUPPORTS_ARBIT_HZ=y +CONFIG_HZ=250 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +# CONFIG_KEXEC is not set +# CONFIG_SECCOMP is not set +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +# CONFIG_SYSVIPC is not set +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set + +# +# RCU Subsystem +# +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=17 +# CONFIG_GROUP_SCHED is not set +# CONFIG_CGROUPS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +# CONFIG_RELAY is not set +# CONFIG_NAMESPACES is not set +# CONFIG_BLK_DEV_INITRD is not set +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +CONFIG_EMBEDDED=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +# CONFIG_PCSPKR_PLATFORM is not set +CONFIG_BASE_FULL=y +# CONFIG_FUTEX is not set +# CONFIG_EPOLL is not set +# CONFIG_SIGNALFD is not set +# CONFIG_TIMERFD is not set +# CONFIG_EVENTFD is not set +# CONFIG_SHMEM is not set +# CONFIG_AIO is not set +# CONFIG_VM_EVENT_COUNTERS is not set +CONFIG_PCI_QUIRKS=y +# CONFIG_SLUB_DEBUG is not set +CONFIG_COMPAT_BRK=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_SLOW_WORK is not set +# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set +CONFIG_BASE_SMALL=0 +# CONFIG_MODULES is not set +CONFIG_BLOCK=y +# CONFIG_LBD is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +# CONFIG_IOSCHED_AS is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_NOOP=y +CONFIG_DEFAULT_IOSCHED="noop" +# CONFIG_FREEZER is not set + +# +# Bus options (PCI, PCMCIA, EISA, ISA, TC) +# +CONFIG_HW_HAS_PCI=y +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +# CONFIG_ARCH_SUPPORTS_MSI is not set +# CONFIG_PCI_LEGACY is not set +# CONFIG_PCI_STUB is not set +# CONFIG_PCI_IOV is not set +CONFIG_MMU=y +CONFIG_PCCARD=y +# CONFIG_PCMCIA_DEBUG is not set +CONFIG_PCMCIA=y +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y +CONFIG_CARDBUS=y + +# +# PC-card bridges +# +# CONFIG_YENTA is not set +# CONFIG_PD6729 is not set +# CONFIG_I82092 is not set +CONFIG_PCMCIA_BCM63XX=y +# CONFIG_HOTPLUG_PCI is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_HAVE_AOUT is not set +# CONFIG_BINFMT_MISC is not set +CONFIG_TRAD_SIGNALS=y + +# +# Power management options +# +CONFIG_ARCH_SUSPEND_POSSIBLE=y +# CONFIG_PM is not set +CONFIG_NET=y + +# +# Networking options +# +# CONFIG_PACKET is not set +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_NET_SCHED is not set +# CONFIG_DCB is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_CONCAT is not set +CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_REDBOOT_PARTS is not set +# CONFIG_MTD_CMDLINE_PARTS is not set +# CONFIG_MTD_AR7_PARTS is not set + +# +# User Modules And Translation Layers +# +# CONFIG_MTD_CHAR is not set +# CONFIG_MTD_BLKDEVS is not set +# CONFIG_MTD_BLOCK is not set +# CONFIG_MTD_BLOCK_RO is not set +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set + +# +# RAM/ROM/Flash chip drivers +# +CONFIG_MTD_CFI=y +# CONFIG_MTD_JEDECPROBE is not set +CONFIG_MTD_GEN_PROBE=y +# CONFIG_MTD_CFI_ADV_OPTIONS is not set +CONFIG_MTD_MAP_BANK_WIDTH_1=y +CONFIG_MTD_MAP_BANK_WIDTH_2=y +CONFIG_MTD_MAP_BANK_WIDTH_4=y +# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set +CONFIG_MTD_CFI_I1=y +CONFIG_MTD_CFI_I2=y +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_CFI_AMDSTD=y +# CONFIG_MTD_CFI_STAA is not set +CONFIG_MTD_CFI_UTIL=y +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +# CONFIG_MTD_ABSENT is not set + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_COMPLEX_MAPPINGS is not set +CONFIG_MTD_PHYSMAP=y +# CONFIG_MTD_PHYSMAP_COMPAT is not set +# CONFIG_MTD_INTEL_VR_NOR is not set +# CONFIG_MTD_PLATRAM is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_PMC551 is not set +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_PHRAM is not set +# CONFIG_MTD_MTDRAM is not set +# CONFIG_MTD_BLOCK2MTD is not set + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set +# CONFIG_MTD_NAND is not set +# CONFIG_MTD_ONENAND is not set + +# +# LPDDR flash memory drivers +# +# CONFIG_MTD_LPDDR is not set + +# +# UBI - Unsorted block images +# +# CONFIG_MTD_UBI is not set +# CONFIG_PARPORT is not set +# CONFIG_BLK_DEV is not set +# CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# + +# +# Enable only one of the two stacks, unless you know what you are doing +# +# CONFIG_FIREWIRE is not set +# CONFIG_IEEE1394 is not set +# CONFIG_I2O is not set +CONFIG_NETDEVICES=y +CONFIG_COMPAT_NET_DEV_OPS=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_VETH is not set +# CONFIG_ARCNET is not set +CONFIG_PHYLIB=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set +# CONFIG_VITESSE_PHY is not set +# CONFIG_SMSC_PHY is not set +# CONFIG_BROADCOM_PHY is not set +CONFIG_BCM63XX_PHY=y +# CONFIG_ICPLUS_PHY is not set +# CONFIG_REALTEK_PHY is not set +# CONFIG_NATIONAL_PHY is not set +# CONFIG_STE10XP is not set +# CONFIG_LSI_ET1011C_PHY is not set +# CONFIG_FIXED_PHY is not set +# CONFIG_MDIO_BITBANG is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_AX88796 is not set +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_SMC91X is not set +# CONFIG_DM9000 is not set +# CONFIG_ETHOC is not set +# CONFIG_DNET is not set +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set +# CONFIG_NET_PCI is not set +# CONFIG_B44 is not set +# CONFIG_ATL2 is not set +CONFIG_BCM63XX_ENET=y +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set +# CONFIG_TR is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set +# CONFIG_NET_PCMCIA is not set +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +# CONFIG_INPUT is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_DEVKMEM is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_NOZOMI is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_SERIAL_BCM63XX=y +CONFIG_SERIAL_BCM63XX_CONSOLE=y +# CONFIG_UNIX98_PTYS is not set +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# PCMCIA character devices +# +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set +# CONFIG_IPWIRELESS is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set +# CONFIG_SPI is not set +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +# CONFIG_GPIO_SYSFS is not set + +# +# Memory mapped GPIO expanders: +# + +# +# I2C GPIO expanders: +# + +# +# PCI GPIO expanders: +# +# CONFIG_GPIO_BT8XX is not set + +# +# SPI GPIO expanders: +# +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +# CONFIG_THERMAL_HWMON is not set +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +CONFIG_SSB=y +CONFIG_SSB_SPROM=y +CONFIG_SSB_PCIHOST_POSSIBLE=y +CONFIG_SSB_PCIHOST=y +# CONFIG_SSB_B43_PCI_BRIDGE is not set +CONFIG_SSB_PCMCIAHOST_POSSIBLE=y +# CONFIG_SSB_PCMCIAHOST is not set +# CONFIG_SSB_SILENT is not set +# CONFIG_SSB_DEBUG is not set +CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y +# CONFIG_SSB_DRIVER_PCICORE is not set +# CONFIG_SSB_DRIVER_MIPS is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_REGULATOR is not set + +# +# Multimedia devices +# + +# +# Multimedia core support +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +# CONFIG_VIDEO_MEDIA is not set + +# +# Multimedia drivers +# +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_DRM is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +# CONFIG_FB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +CONFIG_DISPLAY_SUPPORT=y + +# +# Display hardware drivers +# +# CONFIG_SOUND is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set +# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set + +# +# Miscellaneous USB options +# +# CONFIG_USB_DEVICEFS is not set +# CONFIG_USB_DEVICE_CLASS is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_OTG is not set +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set +# CONFIG_USB_MON is not set +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set + +# +# USB Host Controller Drivers +# +# CONFIG_USB_C67X00_HCD is not set +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_EHCI_BIG_ENDIAN_MMIO=y +# CONFIG_USB_OXU210HP_HCD is not set +# CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_HCD_SSB is not set +CONFIG_USB_OHCI_BIG_ENDIAN_DESC=y +CONFIG_USB_OHCI_BIG_ENDIAN_MMIO=y +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +# CONFIG_USB_UHCI_HCD is not set +# CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_WHCI_HCD is not set +# CONFIG_USB_HWA_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_ACM is not set +# CONFIG_USB_PRINTER is not set +# CONFIG_USB_WDM is not set +# CONFIG_USB_TMC is not set + +# +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may +# + +# +# also be needed; see USB_STORAGE Help for more info +# +# CONFIG_USB_LIBUSUAL is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set + +# +# USB port drivers +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_ADUTUX is not set +# CONFIG_USB_SEVSEG is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_BERRY_CHARGE is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set +# CONFIG_USB_IOWARRIOR is not set +# CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_VST is not set +# CONFIG_USB_GADGET is not set + +# +# OTG and related infrastructure +# +# CONFIG_USB_GPIO_VBUS is not set +# CONFIG_NOP_USB_XCEIV is not set +# CONFIG_UWB is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +CONFIG_RTC_LIB=y +# CONFIG_RTC_CLASS is not set +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +# CONFIG_UIO is not set +# CONFIG_STAGING is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +# CONFIG_FILE_LOCKING is not set +# CONFIG_XFS_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_JFFS2_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set +# CONFIG_NETWORK_FILESYSTEMS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_NLS is not set +# CONFIG_DLM is not set + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +CONFIG_MAGIC_SYSRQ=y +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set +# CONFIG_DEBUG_KERNEL is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_TRACING_SUPPORT=y + +# +# Tracers +# +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +CONFIG_CMDLINE="console=ttyS0,115200" + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +# CONFIG_CRYPTO is not set +# CONFIG_BINARY_PRINTF is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_NLATTR=y diff --git a/arch/mips/include/asm/fixmap.h b/arch/mips/include/asm/fixmap.h index 0f5caa1307f..efeddc8db8b 100644 --- a/arch/mips/include/asm/fixmap.h +++ b/arch/mips/include/asm/fixmap.h @@ -67,11 +67,15 @@ enum fixed_addresses { * the start of the fixmap, and leave one page empty * at the top of mem.. */ +#ifdef CONFIG_BCM63XX +#define FIXADDR_TOP ((unsigned long)(long)(int)0xff000000) +#else #if defined(CONFIG_CPU_TX39XX) || defined(CONFIG_CPU_TX49XX) #define FIXADDR_TOP ((unsigned long)(long)(int)(0xff000000 - 0x20000)) #else #define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000) #endif +#endif #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_board.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_board.h new file mode 100644 index 00000000000..fa3e7e617b0 --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_board.h @@ -0,0 +1,12 @@ +#ifndef BCM63XX_BOARD_H_ +#define BCM63XX_BOARD_H_ + +const char *board_get_name(void); + +void board_prom_init(void); + +void board_setup(void); + +int board_register_devices(void); + +#endif /* ! BCM63XX_BOARD_H_ */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_clk.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_clk.h new file mode 100644 index 00000000000..8fcf8df4418 --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_clk.h @@ -0,0 +1,11 @@ +#ifndef BCM63XX_CLK_H_ +#define BCM63XX_CLK_H_ + +struct clk { + void (*set)(struct clk *, int); + unsigned int rate; + unsigned int usage; + int id; +}; + +#endif /* ! BCM63XX_CLK_H_ */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h new file mode 100644 index 00000000000..b12c4aca2cc --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h @@ -0,0 +1,538 @@ +#ifndef BCM63XX_CPU_H_ +#define BCM63XX_CPU_H_ + +#include +#include + +/* + * Macro to fetch bcm63xx cpu id and revision, should be optimized at + * compile time if only one CPU support is enabled (idea stolen from + * arm mach-types) + */ +#define BCM6338_CPU_ID 0x6338 +#define BCM6345_CPU_ID 0x6345 +#define BCM6348_CPU_ID 0x6348 +#define BCM6358_CPU_ID 0x6358 + +void __init bcm63xx_cpu_init(void); +u16 __bcm63xx_get_cpu_id(void); +u16 bcm63xx_get_cpu_rev(void); +unsigned int bcm63xx_get_cpu_freq(void); + +#ifdef CONFIG_BCM63XX_CPU_6338 +# ifdef bcm63xx_get_cpu_id +# undef bcm63xx_get_cpu_id +# define bcm63xx_get_cpu_id() __bcm63xx_get_cpu_id() +# define BCMCPU_RUNTIME_DETECT +# else +# define bcm63xx_get_cpu_id() BCM6338_CPU_ID +# endif +# define BCMCPU_IS_6338() (bcm63xx_get_cpu_id() == BCM6338_CPU_ID) +#else +# define BCMCPU_IS_6338() (0) +#endif + +#ifdef CONFIG_BCM63XX_CPU_6345 +# ifdef bcm63xx_get_cpu_id +# undef bcm63xx_get_cpu_id +# define bcm63xx_get_cpu_id() __bcm63xx_get_cpu_id() +# define BCMCPU_RUNTIME_DETECT +# else +# define bcm63xx_get_cpu_id() BCM6345_CPU_ID +# endif +# define BCMCPU_IS_6345() (bcm63xx_get_cpu_id() == BCM6345_CPU_ID) +#else +# define BCMCPU_IS_6345() (0) +#endif + +#ifdef CONFIG_BCM63XX_CPU_6348 +# ifdef bcm63xx_get_cpu_id +# undef bcm63xx_get_cpu_id +# define bcm63xx_get_cpu_id() __bcm63xx_get_cpu_id() +# define BCMCPU_RUNTIME_DETECT +# else +# define bcm63xx_get_cpu_id() BCM6348_CPU_ID +# endif +# define BCMCPU_IS_6348() (bcm63xx_get_cpu_id() == BCM6348_CPU_ID) +#else +# define BCMCPU_IS_6348() (0) +#endif + +#ifdef CONFIG_BCM63XX_CPU_6358 +# ifdef bcm63xx_get_cpu_id +# undef bcm63xx_get_cpu_id +# define bcm63xx_get_cpu_id() __bcm63xx_get_cpu_id() +# define BCMCPU_RUNTIME_DETECT +# else +# define bcm63xx_get_cpu_id() BCM6358_CPU_ID +# endif +# define BCMCPU_IS_6358() (bcm63xx_get_cpu_id() == BCM6358_CPU_ID) +#else +# define BCMCPU_IS_6358() (0) +#endif + +#ifndef bcm63xx_get_cpu_id +#error "No CPU support configured" +#endif + +/* + * While registers sets are (mostly) the same across 63xx CPU, base + * address of these sets do change. + */ +enum bcm63xx_regs_set { + RSET_DSL_LMEM = 0, + RSET_PERF, + RSET_TIMER, + RSET_WDT, + RSET_UART0, + RSET_GPIO, + RSET_SPI, + RSET_UDC0, + RSET_OHCI0, + RSET_OHCI_PRIV, + RSET_USBH_PRIV, + RSET_MPI, + RSET_PCMCIA, + RSET_DSL, + RSET_ENET0, + RSET_ENET1, + RSET_ENETDMA, + RSET_EHCI0, + RSET_SDRAM, + RSET_MEMC, + RSET_DDR, +}; + +#define RSET_DSL_LMEM_SIZE (64 * 1024 * 4) +#define RSET_DSL_SIZE 4096 +#define RSET_WDT_SIZE 12 +#define RSET_ENET_SIZE 2048 +#define RSET_ENETDMA_SIZE 2048 +#define RSET_UART_SIZE 24 +#define RSET_UDC_SIZE 256 +#define RSET_OHCI_SIZE 256 +#define RSET_EHCI_SIZE 256 +#define RSET_PCMCIA_SIZE 12 + +/* + * 6338 register sets base address + */ +#define BCM_6338_DSL_LMEM_BASE (0xfff00000) +#define BCM_6338_PERF_BASE (0xfffe0000) +#define BCM_6338_BB_BASE (0xfffe0100) +#define BCM_6338_TIMER_BASE (0xfffe0200) +#define BCM_6338_WDT_BASE (0xfffe021c) +#define BCM_6338_UART0_BASE (0xfffe0300) +#define BCM_6338_GPIO_BASE (0xfffe0400) +#define BCM_6338_SPI_BASE (0xfffe0c00) +#define BCM_6338_UDC0_BASE (0xdeadbeef) +#define BCM_6338_USBDMA_BASE (0xfffe2400) +#define BCM_6338_OHCI0_BASE (0xdeadbeef) +#define BCM_6338_OHCI_PRIV_BASE (0xfffe3000) +#define BCM_6338_USBH_PRIV_BASE (0xdeadbeef) +#define BCM_6338_MPI_BASE (0xfffe3160) +#define BCM_6338_PCMCIA_BASE (0xdeadbeef) +#define BCM_6338_SDRAM_REGS_BASE (0xfffe3100) +#define BCM_6338_DSL_BASE (0xfffe1000) +#define BCM_6338_SAR_BASE (0xfffe2000) +#define BCM_6338_UBUS_BASE (0xdeadbeef) +#define BCM_6338_ENET0_BASE (0xfffe2800) +#define BCM_6338_ENET1_BASE (0xdeadbeef) +#define BCM_6338_ENETDMA_BASE (0xfffe2400) +#define BCM_6338_EHCI0_BASE (0xdeadbeef) +#define BCM_6338_SDRAM_BASE (0xfffe3100) +#define BCM_6338_MEMC_BASE (0xdeadbeef) +#define BCM_6338_DDR_BASE (0xdeadbeef) + +/* + * 6345 register sets base address + */ +#define BCM_6345_DSL_LMEM_BASE (0xfff00000) +#define BCM_6345_PERF_BASE (0xfffe0000) +#define BCM_6345_BB_BASE (0xfffe0100) +#define BCM_6345_TIMER_BASE (0xfffe0200) +#define BCM_6345_WDT_BASE (0xfffe021c) +#define BCM_6345_UART0_BASE (0xfffe0300) +#define BCM_6345_GPIO_BASE (0xfffe0400) +#define BCM_6345_SPI_BASE (0xdeadbeef) +#define BCM_6345_UDC0_BASE (0xdeadbeef) +#define BCM_6345_USBDMA_BASE (0xfffe2800) +#define BCM_6345_ENET0_BASE (0xfffe1800) +#define BCM_6345_ENETDMA_BASE (0xfffe2800) +#define BCM_6345_PCMCIA_BASE (0xfffe2028) +#define BCM_6345_MPI_BASE (0xdeadbeef) +#define BCM_6345_OHCI0_BASE (0xfffe2100) +#define BCM_6345_OHCI_PRIV_BASE (0xfffe2200) +#define BCM_6345_USBH_PRIV_BASE (0xdeadbeef) +#define BCM_6345_SDRAM_REGS_BASE (0xfffe2300) +#define BCM_6345_DSL_BASE (0xdeadbeef) +#define BCM_6345_SAR_BASE (0xdeadbeef) +#define BCM_6345_UBUS_BASE (0xdeadbeef) +#define BCM_6345_ENET1_BASE (0xdeadbeef) +#define BCM_6345_EHCI0_BASE (0xdeadbeef) +#define BCM_6345_SDRAM_BASE (0xfffe2300) +#define BCM_6345_MEMC_BASE (0xdeadbeef) +#define BCM_6345_DDR_BASE (0xdeadbeef) + +/* + * 6348 register sets base address + */ +#define BCM_6348_DSL_LMEM_BASE (0xfff00000) +#define BCM_6348_PERF_BASE (0xfffe0000) +#define BCM_6348_TIMER_BASE (0xfffe0200) +#define BCM_6348_WDT_BASE (0xfffe021c) +#define BCM_6348_UART0_BASE (0xfffe0300) +#define BCM_6348_GPIO_BASE (0xfffe0400) +#define BCM_6348_SPI_BASE (0xfffe0c00) +#define BCM_6348_UDC0_BASE (0xfffe1000) +#define BCM_6348_OHCI0_BASE (0xfffe1b00) +#define BCM_6348_OHCI_PRIV_BASE (0xfffe1c00) +#define BCM_6348_USBH_PRIV_BASE (0xdeadbeef) +#define BCM_6348_MPI_BASE (0xfffe2000) +#define BCM_6348_PCMCIA_BASE (0xfffe2054) +#define BCM_6348_SDRAM_REGS_BASE (0xfffe2300) +#define BCM_6348_DSL_BASE (0xfffe3000) +#define BCM_6348_ENET0_BASE (0xfffe6000) +#define BCM_6348_ENET1_BASE (0xfffe6800) +#define BCM_6348_ENETDMA_BASE (0xfffe7000) +#define BCM_6348_EHCI0_BASE (0xdeadbeef) +#define BCM_6348_SDRAM_BASE (0xfffe2300) +#define BCM_6348_MEMC_BASE (0xdeadbeef) +#define BCM_6348_DDR_BASE (0xdeadbeef) + +/* + * 6358 register sets base address + */ +#define BCM_6358_DSL_LMEM_BASE (0xfff00000) +#define BCM_6358_PERF_BASE (0xfffe0000) +#define BCM_6358_TIMER_BASE (0xfffe0040) +#define BCM_6358_WDT_BASE (0xfffe005c) +#define BCM_6358_UART0_BASE (0xfffe0100) +#define BCM_6358_GPIO_BASE (0xfffe0080) +#define BCM_6358_SPI_BASE (0xdeadbeef) +#define BCM_6358_UDC0_BASE (0xfffe0800) +#define BCM_6358_OHCI0_BASE (0xfffe1400) +#define BCM_6358_OHCI_PRIV_BASE (0xdeadbeef) +#define BCM_6358_USBH_PRIV_BASE (0xfffe1500) +#define BCM_6358_MPI_BASE (0xfffe1000) +#define BCM_6358_PCMCIA_BASE (0xfffe1054) +#define BCM_6358_SDRAM_REGS_BASE (0xfffe2300) +#define BCM_6358_DSL_BASE (0xfffe3000) +#define BCM_6358_ENET0_BASE (0xfffe4000) +#define BCM_6358_ENET1_BASE (0xfffe4800) +#define BCM_6358_ENETDMA_BASE (0xfffe5000) +#define BCM_6358_EHCI0_BASE (0xfffe1300) +#define BCM_6358_SDRAM_BASE (0xdeadbeef) +#define BCM_6358_MEMC_BASE (0xfffe1200) +#define BCM_6358_DDR_BASE (0xfffe12a0) + + +extern const unsigned long *bcm63xx_regs_base; + +static inline unsigned long bcm63xx_regset_address(enum bcm63xx_regs_set set) +{ +#ifdef BCMCPU_RUNTIME_DETECT + return bcm63xx_regs_base[set]; +#else +#ifdef CONFIG_BCM63XX_CPU_6338 + switch (set) { + case RSET_DSL_LMEM: + return BCM_6338_DSL_LMEM_BASE; + case RSET_PERF: + return BCM_6338_PERF_BASE; + case RSET_TIMER: + return BCM_6338_TIMER_BASE; + case RSET_WDT: + return BCM_6338_WDT_BASE; + case RSET_UART0: + return BCM_6338_UART0_BASE; + case RSET_GPIO: + return BCM_6338_GPIO_BASE; + case RSET_SPI: + return BCM_6338_SPI_BASE; + case RSET_UDC0: + return BCM_6338_UDC0_BASE; + case RSET_OHCI0: + return BCM_6338_OHCI0_BASE; + case RSET_OHCI_PRIV: + return BCM_6338_OHCI_PRIV_BASE; + case RSET_USBH_PRIV: + return BCM_6338_USBH_PRIV_BASE; + case RSET_MPI: + return BCM_6338_MPI_BASE; + case RSET_PCMCIA: + return BCM_6338_PCMCIA_BASE; + case RSET_DSL: + return BCM_6338_DSL_BASE; + case RSET_ENET0: + return BCM_6338_ENET0_BASE; + case RSET_ENET1: + return BCM_6338_ENET1_BASE; + case RSET_ENETDMA: + return BCM_6338_ENETDMA_BASE; + case RSET_EHCI0: + return BCM_6338_EHCI0_BASE; + case RSET_SDRAM: + return BCM_6338_SDRAM_BASE; + case RSET_MEMC: + return BCM_6338_MEMC_BASE; + case RSET_DDR: + return BCM_6338_DDR_BASE; + } +#endif +#ifdef CONFIG_BCM63XX_CPU_6345 + switch (set) { + case RSET_DSL_LMEM: + return BCM_6345_DSL_LMEM_BASE; + case RSET_PERF: + return BCM_6345_PERF_BASE; + case RSET_TIMER: + return BCM_6345_TIMER_BASE; + case RSET_WDT: + return BCM_6345_WDT_BASE; + case RSET_UART0: + return BCM_6345_UART0_BASE; + case RSET_GPIO: + return BCM_6345_GPIO_BASE; + case RSET_SPI: + return BCM_6345_SPI_BASE; + case RSET_UDC0: + return BCM_6345_UDC0_BASE; + case RSET_OHCI0: + return BCM_6345_OHCI0_BASE; + case RSET_OHCI_PRIV: + return BCM_6345_OHCI_PRIV_BASE; + case RSET_USBH_PRIV: + return BCM_6345_USBH_PRIV_BASE; + case RSET_MPI: + return BCM_6345_MPI_BASE; + case RSET_PCMCIA: + return BCM_6345_PCMCIA_BASE; + case RSET_DSL: + return BCM_6345_DSL_BASE; + case RSET_ENET0: + return BCM_6345_ENET0_BASE; + case RSET_ENET1: + return BCM_6345_ENET1_BASE; + case RSET_ENETDMA: + return BCM_6345_ENETDMA_BASE; + case RSET_EHCI0: + return BCM_6345_EHCI0_BASE; + case RSET_SDRAM: + return BCM_6345_SDRAM_BASE; + case RSET_MEMC: + return BCM_6345_MEMC_BASE; + case RSET_DDR: + return BCM_6345_DDR_BASE; + } +#endif +#ifdef CONFIG_BCM63XX_CPU_6348 + switch (set) { + case RSET_DSL_LMEM: + return BCM_6348_DSL_LMEM_BASE; + case RSET_PERF: + return BCM_6348_PERF_BASE; + case RSET_TIMER: + return BCM_6348_TIMER_BASE; + case RSET_WDT: + return BCM_6348_WDT_BASE; + case RSET_UART0: + return BCM_6348_UART0_BASE; + case RSET_GPIO: + return BCM_6348_GPIO_BASE; + case RSET_SPI: + return BCM_6348_SPI_BASE; + case RSET_UDC0: + return BCM_6348_UDC0_BASE; + case RSET_OHCI0: + return BCM_6348_OHCI0_BASE; + case RSET_OHCI_PRIV: + return BCM_6348_OHCI_PRIV_BASE; + case RSET_USBH_PRIV: + return BCM_6348_USBH_PRIV_BASE; + case RSET_MPI: + return BCM_6348_MPI_BASE; + case RSET_PCMCIA: + return BCM_6348_PCMCIA_BASE; + case RSET_DSL: + return BCM_6348_DSL_BASE; + case RSET_ENET0: + return BCM_6348_ENET0_BASE; + case RSET_ENET1: + return BCM_6348_ENET1_BASE; + case RSET_ENETDMA: + return BCM_6348_ENETDMA_BASE; + case RSET_EHCI0: + return BCM_6348_EHCI0_BASE; + case RSET_SDRAM: + return BCM_6348_SDRAM_BASE; + case RSET_MEMC: + return BCM_6348_MEMC_BASE; + case RSET_DDR: + return BCM_6348_DDR_BASE; + } +#endif +#ifdef CONFIG_BCM63XX_CPU_6358 + switch (set) { + case RSET_DSL_LMEM: + return BCM_6358_DSL_LMEM_BASE; + case RSET_PERF: + return BCM_6358_PERF_BASE; + case RSET_TIMER: + return BCM_6358_TIMER_BASE; + case RSET_WDT: + return BCM_6358_WDT_BASE; + case RSET_UART0: + return BCM_6358_UART0_BASE; + case RSET_GPIO: + return BCM_6358_GPIO_BASE; + case RSET_SPI: + return BCM_6358_SPI_BASE; + case RSET_UDC0: + return BCM_6358_UDC0_BASE; + case RSET_OHCI0: + return BCM_6358_OHCI0_BASE; + case RSET_OHCI_PRIV: + return BCM_6358_OHCI_PRIV_BASE; + case RSET_USBH_PRIV: + return BCM_6358_USBH_PRIV_BASE; + case RSET_MPI: + return BCM_6358_MPI_BASE; + case RSET_PCMCIA: + return BCM_6358_PCMCIA_BASE; + case RSET_ENET0: + return BCM_6358_ENET0_BASE; + case RSET_ENET1: + return BCM_6358_ENET1_BASE; + case RSET_ENETDMA: + return BCM_6358_ENETDMA_BASE; + case RSET_DSL: + return BCM_6358_DSL_BASE; + case RSET_EHCI0: + return BCM_6358_EHCI0_BASE; + case RSET_SDRAM: + return BCM_6358_SDRAM_BASE; + case RSET_MEMC: + return BCM_6358_MEMC_BASE; + case RSET_DDR: + return BCM_6358_DDR_BASE; + } +#endif +#endif + /* unreached */ + return 0; +} + +/* + * IRQ number changes across CPU too + */ +enum bcm63xx_irq { + IRQ_TIMER = 0, + IRQ_UART0, + IRQ_DSL, + IRQ_ENET0, + IRQ_ENET1, + IRQ_ENET_PHY, + IRQ_OHCI0, + IRQ_EHCI0, + IRQ_PCMCIA0, + IRQ_ENET0_RXDMA, + IRQ_ENET0_TXDMA, + IRQ_ENET1_RXDMA, + IRQ_ENET1_TXDMA, + IRQ_PCI, + IRQ_PCMCIA, +}; + +/* + * 6338 irqs + */ +#define BCM_6338_TIMER_IRQ (IRQ_INTERNAL_BASE + 0) +#define BCM_6338_SPI_IRQ (IRQ_INTERNAL_BASE + 1) +#define BCM_6338_UART0_IRQ (IRQ_INTERNAL_BASE + 2) +#define BCM_6338_DG_IRQ (IRQ_INTERNAL_BASE + 4) +#define BCM_6338_DSL_IRQ (IRQ_INTERNAL_BASE + 5) +#define BCM_6338_ATM_IRQ (IRQ_INTERNAL_BASE + 6) +#define BCM_6338_UDC0_IRQ (IRQ_INTERNAL_BASE + 7) +#define BCM_6338_ENET0_IRQ (IRQ_INTERNAL_BASE + 8) +#define BCM_6338_ENET_PHY_IRQ (IRQ_INTERNAL_BASE + 9) +#define BCM_6338_SDRAM_IRQ (IRQ_INTERNAL_BASE + 10) +#define BCM_6338_USB_CNTL_RX_DMA_IRQ (IRQ_INTERNAL_BASE + 11) +#define BCM_6338_USB_CNTL_TX_DMA_IRQ (IRQ_INTERNAL_BASE + 12) +#define BCM_6338_USB_BULK_RX_DMA_IRQ (IRQ_INTERNAL_BASE + 13) +#define BCM_6338_USB_BULK_TX_DMA_IRQ (IRQ_INTERNAL_BASE + 14) +#define BCM_6338_ENET0_RXDMA_IRQ (IRQ_INTERNAL_BASE + 15) +#define BCM_6338_ENET0_TXDMA_IRQ (IRQ_INTERNAL_BASE + 16) +#define BCM_6338_SDIO_IRQ (IRQ_INTERNAL_BASE + 17) + +/* + * 6345 irqs + */ +#define BCM_6345_TIMER_IRQ (IRQ_INTERNAL_BASE + 0) +#define BCM_6345_UART0_IRQ (IRQ_INTERNAL_BASE + 2) +#define BCM_6345_DSL_IRQ (IRQ_INTERNAL_BASE + 3) +#define BCM_6345_ATM_IRQ (IRQ_INTERNAL_BASE + 4) +#define BCM_6345_USB_IRQ (IRQ_INTERNAL_BASE + 5) +#define BCM_6345_ENET0_IRQ (IRQ_INTERNAL_BASE + 8) +#define BCM_6345_ENET_PHY_IRQ (IRQ_INTERNAL_BASE + 12) +#define BCM_6345_ENET0_RXDMA_IRQ (IRQ_INTERNAL_BASE + 13 + 1) +#define BCM_6345_ENET0_TXDMA_IRQ (IRQ_INTERNAL_BASE + 13 + 2) +#define BCM_6345_EBI_RX_IRQ (IRQ_INTERNAL_BASE + 13 + 5) +#define BCM_6345_EBI_TX_IRQ (IRQ_INTERNAL_BASE + 13 + 6) +#define BCM_6345_RESERVED_RX_IRQ (IRQ_INTERNAL_BASE + 13 + 9) +#define BCM_6345_RESERVED_TX_IRQ (IRQ_INTERNAL_BASE + 13 + 10) +#define BCM_6345_USB_BULK_RX_DMA_IRQ (IRQ_INTERNAL_BASE + 13 + 13) +#define BCM_6345_USB_BULK_TX_DMA_IRQ (IRQ_INTERNAL_BASE + 13 + 14) +#define BCM_6345_USB_CNTL_RX_DMA_IRQ (IRQ_INTERNAL_BASE + 13 + 15) +#define BCM_6345_USB_CNTL_TX_DMA_IRQ (IRQ_INTERNAL_BASE + 13 + 16) +#define BCM_6345_USB_ISO_RX_DMA_IRQ (IRQ_INTERNAL_BASE + 13 + 17) +#define BCM_6345_USB_ISO_TX_DMA_IRQ (IRQ_INTERNAL_BASE + 13 + 18) + +/* + * 6348 irqs + */ +#define BCM_6348_TIMER_IRQ (IRQ_INTERNAL_BASE + 0) +#define BCM_6348_UART0_IRQ (IRQ_INTERNAL_BASE + 2) +#define BCM_6348_DSL_IRQ (IRQ_INTERNAL_BASE + 4) +#define BCM_6348_ENET1_IRQ (IRQ_INTERNAL_BASE + 7) +#define BCM_6348_ENET0_IRQ (IRQ_INTERNAL_BASE + 8) +#define BCM_6348_ENET_PHY_IRQ (IRQ_INTERNAL_BASE + 9) +#define BCM_6348_OHCI0_IRQ (IRQ_INTERNAL_BASE + 12) +#define BCM_6348_ENET0_RXDMA_IRQ (IRQ_INTERNAL_BASE + 20) +#define BCM_6348_ENET0_TXDMA_IRQ (IRQ_INTERNAL_BASE + 21) +#define BCM_6348_ENET1_RXDMA_IRQ (IRQ_INTERNAL_BASE + 22) +#define BCM_6348_ENET1_TXDMA_IRQ (IRQ_INTERNAL_BASE + 23) +#define BCM_6348_PCMCIA_IRQ (IRQ_INTERNAL_BASE + 24) +#define BCM_6348_PCI_IRQ (IRQ_INTERNAL_BASE + 24) + +/* + * 6358 irqs + */ +#define BCM_6358_TIMER_IRQ (IRQ_INTERNAL_BASE + 0) +#define BCM_6358_UART0_IRQ (IRQ_INTERNAL_BASE + 2) +#define BCM_6358_OHCI0_IRQ (IRQ_INTERNAL_BASE + 5) +#define BCM_6358_ENET1_IRQ (IRQ_INTERNAL_BASE + 6) +#define BCM_6358_ENET0_IRQ (IRQ_INTERNAL_BASE + 8) +#define BCM_6358_ENET_PHY_IRQ (IRQ_INTERNAL_BASE + 9) +#define BCM_6358_EHCI0_IRQ (IRQ_INTERNAL_BASE + 10) +#define BCM_6358_ENET0_RXDMA_IRQ (IRQ_INTERNAL_BASE + 15) +#define BCM_6358_ENET0_TXDMA_IRQ (IRQ_INTERNAL_BASE + 16) +#define BCM_6358_ENET1_RXDMA_IRQ (IRQ_INTERNAL_BASE + 17) +#define BCM_6358_ENET1_TXDMA_IRQ (IRQ_INTERNAL_BASE + 18) +#define BCM_6358_DSL_IRQ (IRQ_INTERNAL_BASE + 29) +#define BCM_6358_PCI_IRQ (IRQ_INTERNAL_BASE + 31) +#define BCM_6358_PCMCIA_IRQ (IRQ_INTERNAL_BASE + 24) + +extern const int *bcm63xx_irqs; + +static inline int bcm63xx_get_irq_number(enum bcm63xx_irq irq) +{ + return bcm63xx_irqs[irq]; +} + +/* + * return installed memory size + */ +unsigned int bcm63xx_get_memory_size(void); + +#endif /* !BCM63XX_CPU_H_ */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cs.h new file mode 100644 index 00000000000..b1821c866e5 --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cs.h @@ -0,0 +1,10 @@ +#ifndef BCM63XX_CS_H +#define BCM63XX_CS_H + +int bcm63xx_set_cs_base(unsigned int cs, u32 base, unsigned int size); +int bcm63xx_set_cs_timing(unsigned int cs, unsigned int wait, + unsigned int setup, unsigned int hold); +int bcm63xx_set_cs_param(unsigned int cs, u32 flags); +int bcm63xx_set_cs_status(unsigned int cs, int enable); + +#endif /* !BCM63XX_CS_H */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h new file mode 100644 index 00000000000..b587d45c304 --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h @@ -0,0 +1,13 @@ +#ifndef __BCM63XX_DSP_H +#define __BCM63XX_DSP_H + +struct bcm63xx_dsp_platform_data { + unsigned gpio_rst; + unsigned gpio_int; + unsigned cs; + unsigned ext_irq; +}; + +int __init bcm63xx_dsp_register(const struct bcm63xx_dsp_platform_data *pd); + +#endif /* __BCM63XX_DSP_H */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_enet.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_enet.h new file mode 100644 index 00000000000..d53f611184b --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_enet.h @@ -0,0 +1,45 @@ +#ifndef BCM63XX_DEV_ENET_H_ +#define BCM63XX_DEV_ENET_H_ + +#include +#include + +/* + * on board ethernet platform data + */ +struct bcm63xx_enet_platform_data { + char mac_addr[ETH_ALEN]; + + int has_phy; + + /* if has_phy, then set use_internal_phy */ + int use_internal_phy; + + /* or fill phy info to use an external one */ + int phy_id; + int has_phy_interrupt; + int phy_interrupt; + + /* if has_phy, use autonegociated pause parameters or force + * them */ + int pause_auto; + int pause_rx; + int pause_tx; + + /* if !has_phy, set desired forced speed/duplex */ + int force_speed_100; + int force_duplex_full; + + /* if !has_phy, set callback to perform mii device + * init/remove */ + int (*mii_config)(struct net_device *dev, int probe, + int (*mii_read)(struct net_device *dev, + int phy_id, int reg), + void (*mii_write)(struct net_device *dev, + int phy_id, int reg, int val)); +}; + +int __init bcm63xx_enet_register(int unit, + const struct bcm63xx_enet_platform_data *pd); + +#endif /* ! BCM63XX_DEV_ENET_H_ */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_pci.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_pci.h new file mode 100644 index 00000000000..c549344b70a --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_pci.h @@ -0,0 +1,6 @@ +#ifndef BCM63XX_DEV_PCI_H_ +#define BCM63XX_DEV_PCI_H_ + +extern int bcm63xx_pci_enabled; + +#endif /* BCM63XX_DEV_PCI_H_ */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h new file mode 100644 index 00000000000..76a0b7216af --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h @@ -0,0 +1,22 @@ +#ifndef BCM63XX_GPIO_H +#define BCM63XX_GPIO_H + +#include + +int __init bcm63xx_gpio_init(void); + +static inline unsigned long bcm63xx_gpio_count(void) +{ + switch (bcm63xx_get_cpu_id()) { + case BCM6358_CPU_ID: + return 40; + case BCM6348_CPU_ID: + default: + return 37; + } +} + +#define GPIO_DIR_OUT 0x0 +#define GPIO_DIR_IN 0x1 + +#endif /* !BCM63XX_GPIO_H */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h new file mode 100644 index 00000000000..91180fac6ed --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h @@ -0,0 +1,93 @@ +#ifndef BCM63XX_IO_H_ +#define BCM63XX_IO_H_ + +#include "bcm63xx_cpu.h" + +/* + * Physical memory map, RAM is mapped at 0x0. + * + * Note that size MUST be a power of two. + */ +#define BCM_PCMCIA_COMMON_BASE_PA (0x20000000) +#define BCM_PCMCIA_COMMON_SIZE (16 * 1024 * 1024) +#define BCM_PCMCIA_COMMON_END_PA (BCM_PCMCIA_COMMON_BASE_PA + \ + BCM_PCMCIA_COMMON_SIZE - 1) + +#define BCM_PCMCIA_ATTR_BASE_PA (0x21000000) +#define BCM_PCMCIA_ATTR_SIZE (16 * 1024 * 1024) +#define BCM_PCMCIA_ATTR_END_PA (BCM_PCMCIA_ATTR_BASE_PA + \ + BCM_PCMCIA_ATTR_SIZE - 1) + +#define BCM_PCMCIA_IO_BASE_PA (0x22000000) +#define BCM_PCMCIA_IO_SIZE (64 * 1024) +#define BCM_PCMCIA_IO_END_PA (BCM_PCMCIA_IO_BASE_PA + \ + BCM_PCMCIA_IO_SIZE - 1) + +#define BCM_PCI_MEM_BASE_PA (0x30000000) +#define BCM_PCI_MEM_SIZE (128 * 1024 * 1024) +#define BCM_PCI_MEM_END_PA (BCM_PCI_MEM_BASE_PA + \ + BCM_PCI_MEM_SIZE - 1) + +#define BCM_PCI_IO_BASE_PA (0x08000000) +#define BCM_PCI_IO_SIZE (64 * 1024) +#define BCM_PCI_IO_END_PA (BCM_PCI_IO_BASE_PA + \ + BCM_PCI_IO_SIZE - 1) +#define BCM_PCI_IO_HALF_PA (BCM_PCI_IO_BASE_PA + \ + (BCM_PCI_IO_SIZE / 2) - 1) + +#define BCM_CB_MEM_BASE_PA (0x38000000) +#define BCM_CB_MEM_SIZE (128 * 1024 * 1024) +#define BCM_CB_MEM_END_PA (BCM_CB_MEM_BASE_PA + \ + BCM_CB_MEM_SIZE - 1) + + +/* + * Internal registers are accessed through KSEG3 + */ +#define BCM_REGS_VA(x) ((void __iomem *)(x)) + +#define bcm_readb(a) (*(volatile unsigned char *) BCM_REGS_VA(a)) +#define bcm_readw(a) (*(volatile unsigned short *) BCM_REGS_VA(a)) +#define bcm_readl(a) (*(volatile unsigned int *) BCM_REGS_VA(a)) +#define bcm_writeb(v, a) (*(volatile unsigned char *) BCM_REGS_VA((a)) = (v)) +#define bcm_writew(v, a) (*(volatile unsigned short *) BCM_REGS_VA((a)) = (v)) +#define bcm_writel(v, a) (*(volatile unsigned int *) BCM_REGS_VA((a)) = (v)) + +/* + * IO helpers to access register set for current CPU + */ +#define bcm_rset_readb(s, o) bcm_readb(bcm63xx_regset_address(s) + (o)) +#define bcm_rset_readw(s, o) bcm_readw(bcm63xx_regset_address(s) + (o)) +#define bcm_rset_readl(s, o) bcm_readl(bcm63xx_regset_address(s) + (o)) +#define bcm_rset_writeb(s, v, o) bcm_writeb((v), \ + bcm63xx_regset_address(s) + (o)) +#define bcm_rset_writew(s, v, o) bcm_writew((v), \ + bcm63xx_regset_address(s) + (o)) +#define bcm_rset_writel(s, v, o) bcm_writel((v), \ + bcm63xx_regset_address(s) + (o)) + +/* + * helpers for frequently used register sets + */ +#define bcm_perf_readl(o) bcm_rset_readl(RSET_PERF, (o)) +#define bcm_perf_writel(v, o) bcm_rset_writel(RSET_PERF, (v), (o)) +#define bcm_timer_readl(o) bcm_rset_readl(RSET_TIMER, (o)) +#define bcm_timer_writel(v, o) bcm_rset_writel(RSET_TIMER, (v), (o)) +#define bcm_wdt_readl(o) bcm_rset_readl(RSET_WDT, (o)) +#define bcm_wdt_writel(v, o) bcm_rset_writel(RSET_WDT, (v), (o)) +#define bcm_gpio_readl(o) bcm_rset_readl(RSET_GPIO, (o)) +#define bcm_gpio_writel(v, o) bcm_rset_writel(RSET_GPIO, (v), (o)) +#define bcm_uart0_readl(o) bcm_rset_readl(RSET_UART0, (o)) +#define bcm_uart0_writel(v, o) bcm_rset_writel(RSET_UART0, (v), (o)) +#define bcm_mpi_readl(o) bcm_rset_readl(RSET_MPI, (o)) +#define bcm_mpi_writel(v, o) bcm_rset_writel(RSET_MPI, (v), (o)) +#define bcm_pcmcia_readl(o) bcm_rset_readl(RSET_PCMCIA, (o)) +#define bcm_pcmcia_writel(v, o) bcm_rset_writel(RSET_PCMCIA, (v), (o)) +#define bcm_sdram_readl(o) bcm_rset_readl(RSET_SDRAM, (o)) +#define bcm_sdram_writel(v, o) bcm_rset_writel(RSET_SDRAM, (v), (o)) +#define bcm_memc_readl(o) bcm_rset_readl(RSET_MEMC, (o)) +#define bcm_memc_writel(v, o) bcm_rset_writel(RSET_MEMC, (v), (o)) +#define bcm_ddr_readl(o) bcm_rset_readl(RSET_DDR, (o)) +#define bcm_ddr_writel(v, o) bcm_rset_writel(RSET_DDR, (v), (o)) + +#endif /* ! BCM63XX_IO_H_ */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_irq.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_irq.h new file mode 100644 index 00000000000..5f95577c821 --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_irq.h @@ -0,0 +1,15 @@ +#ifndef BCM63XX_IRQ_H_ +#define BCM63XX_IRQ_H_ + +#include + +#define IRQ_MIPS_BASE 0 +#define IRQ_INTERNAL_BASE 8 + +#define IRQ_EXT_BASE (IRQ_MIPS_BASE + 3) +#define IRQ_EXT_0 (IRQ_EXT_BASE + 0) +#define IRQ_EXT_1 (IRQ_EXT_BASE + 1) +#define IRQ_EXT_2 (IRQ_EXT_BASE + 2) +#define IRQ_EXT_3 (IRQ_EXT_BASE + 3) + +#endif /* ! BCM63XX_IRQ_H_ */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h new file mode 100644 index 00000000000..ed4ccec87dd --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h @@ -0,0 +1,773 @@ +#ifndef BCM63XX_REGS_H_ +#define BCM63XX_REGS_H_ + +/************************************************************************* + * _REG relative to RSET_PERF + *************************************************************************/ + +/* Chip Identifier / Revision register */ +#define PERF_REV_REG 0x0 +#define REV_CHIPID_SHIFT 16 +#define REV_CHIPID_MASK (0xffff << REV_CHIPID_SHIFT) +#define REV_REVID_SHIFT 0 +#define REV_REVID_MASK (0xffff << REV_REVID_SHIFT) + +/* Clock Control register */ +#define PERF_CKCTL_REG 0x4 + +#define CKCTL_6338_ADSLPHY_EN (1 << 0) +#define CKCTL_6338_MPI_EN (1 << 1) +#define CKCTL_6338_DRAM_EN (1 << 2) +#define CKCTL_6338_ENET_EN (1 << 4) +#define CKCTL_6338_USBS_EN (1 << 4) +#define CKCTL_6338_SAR_EN (1 << 5) +#define CKCTL_6338_SPI_EN (1 << 9) + +#define CKCTL_6338_ALL_SAFE_EN (CKCTL_6338_ADSLPHY_EN | \ + CKCTL_6338_MPI_EN | \ + CKCTL_6338_ENET_EN | \ + CKCTL_6338_SAR_EN | \ + CKCTL_6338_SPI_EN) + +#define CKCTL_6345_CPU_EN (1 << 0) +#define CKCTL_6345_BUS_EN (1 << 1) +#define CKCTL_6345_EBI_EN (1 << 2) +#define CKCTL_6345_UART_EN (1 << 3) +#define CKCTL_6345_ADSLPHY_EN (1 << 4) +#define CKCTL_6345_ENET_EN (1 << 7) +#define CKCTL_6345_USBH_EN (1 << 8) + +#define CKCTL_6345_ALL_SAFE_EN (CKCTL_6345_ENET_EN | \ + CKCTL_6345_USBH_EN | \ + CKCTL_6345_ADSLPHY_EN) + +#define CKCTL_6348_ADSLPHY_EN (1 << 0) +#define CKCTL_6348_MPI_EN (1 << 1) +#define CKCTL_6348_SDRAM_EN (1 << 2) +#define CKCTL_6348_M2M_EN (1 << 3) +#define CKCTL_6348_ENET_EN (1 << 4) +#define CKCTL_6348_SAR_EN (1 << 5) +#define CKCTL_6348_USBS_EN (1 << 6) +#define CKCTL_6348_USBH_EN (1 << 8) +#define CKCTL_6348_SPI_EN (1 << 9) + +#define CKCTL_6348_ALL_SAFE_EN (CKCTL_6348_ADSLPHY_EN | \ + CKCTL_6348_M2M_EN | \ + CKCTL_6348_ENET_EN | \ + CKCTL_6348_SAR_EN | \ + CKCTL_6348_USBS_EN | \ + CKCTL_6348_USBH_EN | \ + CKCTL_6348_SPI_EN) + +#define CKCTL_6358_ENET_EN (1 << 4) +#define CKCTL_6358_ADSLPHY_EN (1 << 5) +#define CKCTL_6358_PCM_EN (1 << 8) +#define CKCTL_6358_SPI_EN (1 << 9) +#define CKCTL_6358_USBS_EN (1 << 10) +#define CKCTL_6358_SAR_EN (1 << 11) +#define CKCTL_6358_EMUSB_EN (1 << 17) +#define CKCTL_6358_ENET0_EN (1 << 18) +#define CKCTL_6358_ENET1_EN (1 << 19) +#define CKCTL_6358_USBSU_EN (1 << 20) +#define CKCTL_6358_EPHY_EN (1 << 21) + +#define CKCTL_6358_ALL_SAFE_EN (CKCTL_6358_ENET_EN | \ + CKCTL_6358_ADSLPHY_EN | \ + CKCTL_6358_PCM_EN | \ + CKCTL_6358_SPI_EN | \ + CKCTL_6358_USBS_EN | \ + CKCTL_6358_SAR_EN | \ + CKCTL_6358_EMUSB_EN | \ + CKCTL_6358_ENET0_EN | \ + CKCTL_6358_ENET1_EN | \ + CKCTL_6358_USBSU_EN | \ + CKCTL_6358_EPHY_EN) + +/* System PLL Control register */ +#define PERF_SYS_PLL_CTL_REG 0x8 +#define SYS_PLL_SOFT_RESET 0x1 + +/* Interrupt Mask register */ +#define PERF_IRQMASK_REG 0xc +#define PERF_IRQSTAT_REG 0x10 + +/* Interrupt Status register */ +#define PERF_IRQSTAT_REG 0x10 + +/* External Interrupt Configuration register */ +#define PERF_EXTIRQ_CFG_REG 0x14 +#define EXTIRQ_CFG_SENSE(x) (1 << (x)) +#define EXTIRQ_CFG_STAT(x) (1 << (x + 5)) +#define EXTIRQ_CFG_CLEAR(x) (1 << (x + 10)) +#define EXTIRQ_CFG_MASK(x) (1 << (x + 15)) +#define EXTIRQ_CFG_BOTHEDGE(x) (1 << (x + 20)) +#define EXTIRQ_CFG_LEVELSENSE(x) (1 << (x + 25)) + +#define EXTIRQ_CFG_CLEAR_ALL (0xf << 10) +#define EXTIRQ_CFG_MASK_ALL (0xf << 15) + +/* Soft Reset register */ +#define PERF_SOFTRESET_REG 0x28 + +#define SOFTRESET_6338_SPI_MASK (1 << 0) +#define SOFTRESET_6338_ENET_MASK (1 << 2) +#define SOFTRESET_6338_USBH_MASK (1 << 3) +#define SOFTRESET_6338_USBS_MASK (1 << 4) +#define SOFTRESET_6338_ADSL_MASK (1 << 5) +#define SOFTRESET_6338_DMAMEM_MASK (1 << 6) +#define SOFTRESET_6338_SAR_MASK (1 << 7) +#define SOFTRESET_6338_ACLC_MASK (1 << 8) +#define SOFTRESET_6338_ADSLMIPSPLL_MASK (1 << 10) +#define SOFTRESET_6338_ALL (SOFTRESET_6338_SPI_MASK | \ + SOFTRESET_6338_ENET_MASK | \ + SOFTRESET_6338_USBH_MASK | \ + SOFTRESET_6338_USBS_MASK | \ + SOFTRESET_6338_ADSL_MASK | \ + SOFTRESET_6338_DMAMEM_MASK | \ + SOFTRESET_6338_SAR_MASK | \ + SOFTRESET_6338_ACLC_MASK | \ + SOFTRESET_6338_ADSLMIPSPLL_MASK) + +#define SOFTRESET_6348_SPI_MASK (1 << 0) +#define SOFTRESET_6348_ENET_MASK (1 << 2) +#define SOFTRESET_6348_USBH_MASK (1 << 3) +#define SOFTRESET_6348_USBS_MASK (1 << 4) +#define SOFTRESET_6348_ADSL_MASK (1 << 5) +#define SOFTRESET_6348_DMAMEM_MASK (1 << 6) +#define SOFTRESET_6348_SAR_MASK (1 << 7) +#define SOFTRESET_6348_ACLC_MASK (1 << 8) +#define SOFTRESET_6348_ADSLMIPSPLL_MASK (1 << 10) + +#define SOFTRESET_6348_ALL (SOFTRESET_6348_SPI_MASK | \ + SOFTRESET_6348_ENET_MASK | \ + SOFTRESET_6348_USBH_MASK | \ + SOFTRESET_6348_USBS_MASK | \ + SOFTRESET_6348_ADSL_MASK | \ + SOFTRESET_6348_DMAMEM_MASK | \ + SOFTRESET_6348_SAR_MASK | \ + SOFTRESET_6348_ACLC_MASK | \ + SOFTRESET_6348_ADSLMIPSPLL_MASK) + +/* MIPS PLL control register */ +#define PERF_MIPSPLLCTL_REG 0x34 +#define MIPSPLLCTL_N1_SHIFT 20 +#define MIPSPLLCTL_N1_MASK (0x7 << MIPSPLLCTL_N1_SHIFT) +#define MIPSPLLCTL_N2_SHIFT 15 +#define MIPSPLLCTL_N2_MASK (0x1f << MIPSPLLCTL_N2_SHIFT) +#define MIPSPLLCTL_M1REF_SHIFT 12 +#define MIPSPLLCTL_M1REF_MASK (0x7 << MIPSPLLCTL_M1REF_SHIFT) +#define MIPSPLLCTL_M2REF_SHIFT 9 +#define MIPSPLLCTL_M2REF_MASK (0x7 << MIPSPLLCTL_M2REF_SHIFT) +#define MIPSPLLCTL_M1CPU_SHIFT 6 +#define MIPSPLLCTL_M1CPU_MASK (0x7 << MIPSPLLCTL_M1CPU_SHIFT) +#define MIPSPLLCTL_M1BUS_SHIFT 3 +#define MIPSPLLCTL_M1BUS_MASK (0x7 << MIPSPLLCTL_M1BUS_SHIFT) +#define MIPSPLLCTL_M2BUS_SHIFT 0 +#define MIPSPLLCTL_M2BUS_MASK (0x7 << MIPSPLLCTL_M2BUS_SHIFT) + +/* ADSL PHY PLL Control register */ +#define PERF_ADSLPLLCTL_REG 0x38 +#define ADSLPLLCTL_N1_SHIFT 20 +#define ADSLPLLCTL_N1_MASK (0x7 << ADSLPLLCTL_N1_SHIFT) +#define ADSLPLLCTL_N2_SHIFT 15 +#define ADSLPLLCTL_N2_MASK (0x1f << ADSLPLLCTL_N2_SHIFT) +#define ADSLPLLCTL_M1REF_SHIFT 12 +#define ADSLPLLCTL_M1REF_MASK (0x7 << ADSLPLLCTL_M1REF_SHIFT) +#define ADSLPLLCTL_M2REF_SHIFT 9 +#define ADSLPLLCTL_M2REF_MASK (0x7 << ADSLPLLCTL_M2REF_SHIFT) +#define ADSLPLLCTL_M1CPU_SHIFT 6 +#define ADSLPLLCTL_M1CPU_MASK (0x7 << ADSLPLLCTL_M1CPU_SHIFT) +#define ADSLPLLCTL_M1BUS_SHIFT 3 +#define ADSLPLLCTL_M1BUS_MASK (0x7 << ADSLPLLCTL_M1BUS_SHIFT) +#define ADSLPLLCTL_M2BUS_SHIFT 0 +#define ADSLPLLCTL_M2BUS_MASK (0x7 << ADSLPLLCTL_M2BUS_SHIFT) + +#define ADSLPLLCTL_VAL(n1, n2, m1ref, m2ref, m1cpu, m1bus, m2bus) \ + (((n1) << ADSLPLLCTL_N1_SHIFT) | \ + ((n2) << ADSLPLLCTL_N2_SHIFT) | \ + ((m1ref) << ADSLPLLCTL_M1REF_SHIFT) | \ + ((m2ref) << ADSLPLLCTL_M2REF_SHIFT) | \ + ((m1cpu) << ADSLPLLCTL_M1CPU_SHIFT) | \ + ((m1bus) << ADSLPLLCTL_M1BUS_SHIFT) | \ + ((m2bus) << ADSLPLLCTL_M2BUS_SHIFT)) + + +/************************************************************************* + * _REG relative to RSET_TIMER + *************************************************************************/ + +#define BCM63XX_TIMER_COUNT 4 +#define TIMER_T0_ID 0 +#define TIMER_T1_ID 1 +#define TIMER_T2_ID 2 +#define TIMER_WDT_ID 3 + +/* Timer irqstat register */ +#define TIMER_IRQSTAT_REG 0 +#define TIMER_IRQSTAT_TIMER_CAUSE(x) (1 << (x)) +#define TIMER_IRQSTAT_TIMER0_CAUSE (1 << 0) +#define TIMER_IRQSTAT_TIMER1_CAUSE (1 << 1) +#define TIMER_IRQSTAT_TIMER2_CAUSE (1 << 2) +#define TIMER_IRQSTAT_WDT_CAUSE (1 << 3) +#define TIMER_IRQSTAT_TIMER_IR_EN(x) (1 << ((x) + 8)) +#define TIMER_IRQSTAT_TIMER0_IR_EN (1 << 8) +#define TIMER_IRQSTAT_TIMER1_IR_EN (1 << 9) +#define TIMER_IRQSTAT_TIMER2_IR_EN (1 << 10) + +/* Timer control register */ +#define TIMER_CTLx_REG(x) (0x4 + (x * 4)) +#define TIMER_CTL0_REG 0x4 +#define TIMER_CTL1_REG 0x8 +#define TIMER_CTL2_REG 0xC +#define TIMER_CTL_COUNTDOWN_MASK (0x3fffffff) +#define TIMER_CTL_MONOTONIC_MASK (1 << 30) +#define TIMER_CTL_ENABLE_MASK (1 << 31) + + +/************************************************************************* + * _REG relative to RSET_WDT + *************************************************************************/ + +/* Watchdog default count register */ +#define WDT_DEFVAL_REG 0x0 + +/* Watchdog control register */ +#define WDT_CTL_REG 0x4 + +/* Watchdog control register constants */ +#define WDT_START_1 (0xff00) +#define WDT_START_2 (0x00ff) +#define WDT_STOP_1 (0xee00) +#define WDT_STOP_2 (0x00ee) + +/* Watchdog reset length register */ +#define WDT_RSTLEN_REG 0x8 + + +/************************************************************************* + * _REG relative to RSET_UARTx + *************************************************************************/ + +/* UART Control Register */ +#define UART_CTL_REG 0x0 +#define UART_CTL_RXTMOUTCNT_SHIFT 0 +#define UART_CTL_RXTMOUTCNT_MASK (0x1f << UART_CTL_RXTMOUTCNT_SHIFT) +#define UART_CTL_RSTTXDN_SHIFT 5 +#define UART_CTL_RSTTXDN_MASK (1 << UART_CTL_RSTTXDN_SHIFT) +#define UART_CTL_RSTRXFIFO_SHIFT 6 +#define UART_CTL_RSTRXFIFO_MASK (1 << UART_CTL_RSTRXFIFO_SHIFT) +#define UART_CTL_RSTTXFIFO_SHIFT 7 +#define UART_CTL_RSTTXFIFO_MASK (1 << UART_CTL_RSTTXFIFO_SHIFT) +#define UART_CTL_STOPBITS_SHIFT 8 +#define UART_CTL_STOPBITS_MASK (0xf << UART_CTL_STOPBITS_SHIFT) +#define UART_CTL_STOPBITS_1 (0x7 << UART_CTL_STOPBITS_SHIFT) +#define UART_CTL_STOPBITS_2 (0xf << UART_CTL_STOPBITS_SHIFT) +#define UART_CTL_BITSPERSYM_SHIFT 12 +#define UART_CTL_BITSPERSYM_MASK (0x3 << UART_CTL_BITSPERSYM_SHIFT) +#define UART_CTL_XMITBRK_SHIFT 14 +#define UART_CTL_XMITBRK_MASK (1 << UART_CTL_XMITBRK_SHIFT) +#define UART_CTL_RSVD_SHIFT 15 +#define UART_CTL_RSVD_MASK (1 << UART_CTL_RSVD_SHIFT) +#define UART_CTL_RXPAREVEN_SHIFT 16 +#define UART_CTL_RXPAREVEN_MASK (1 << UART_CTL_RXPAREVEN_SHIFT) +#define UART_CTL_RXPAREN_SHIFT 17 +#define UART_CTL_RXPAREN_MASK (1 << UART_CTL_RXPAREN_SHIFT) +#define UART_CTL_TXPAREVEN_SHIFT 18 +#define UART_CTL_TXPAREVEN_MASK (1 << UART_CTL_TXPAREVEN_SHIFT) +#define UART_CTL_TXPAREN_SHIFT 18 +#define UART_CTL_TXPAREN_MASK (1 << UART_CTL_TXPAREN_SHIFT) +#define UART_CTL_LOOPBACK_SHIFT 20 +#define UART_CTL_LOOPBACK_MASK (1 << UART_CTL_LOOPBACK_SHIFT) +#define UART_CTL_RXEN_SHIFT 21 +#define UART_CTL_RXEN_MASK (1 << UART_CTL_RXEN_SHIFT) +#define UART_CTL_TXEN_SHIFT 22 +#define UART_CTL_TXEN_MASK (1 << UART_CTL_TXEN_SHIFT) +#define UART_CTL_BRGEN_SHIFT 23 +#define UART_CTL_BRGEN_MASK (1 << UART_CTL_BRGEN_SHIFT) + +/* UART Baudword register */ +#define UART_BAUD_REG 0x4 + +/* UART Misc Control register */ +#define UART_MCTL_REG 0x8 +#define UART_MCTL_DTR_SHIFT 0 +#define UART_MCTL_DTR_MASK (1 << UART_MCTL_DTR_SHIFT) +#define UART_MCTL_RTS_SHIFT 1 +#define UART_MCTL_RTS_MASK (1 << UART_MCTL_RTS_SHIFT) +#define UART_MCTL_RXFIFOTHRESH_SHIFT 8 +#define UART_MCTL_RXFIFOTHRESH_MASK (0xf << UART_MCTL_RXFIFOTHRESH_SHIFT) +#define UART_MCTL_TXFIFOTHRESH_SHIFT 12 +#define UART_MCTL_TXFIFOTHRESH_MASK (0xf << UART_MCTL_TXFIFOTHRESH_SHIFT) +#define UART_MCTL_RXFIFOFILL_SHIFT 16 +#define UART_MCTL_RXFIFOFILL_MASK (0x1f << UART_MCTL_RXFIFOFILL_SHIFT) +#define UART_MCTL_TXFIFOFILL_SHIFT 24 +#define UART_MCTL_TXFIFOFILL_MASK (0x1f << UART_MCTL_TXFIFOFILL_SHIFT) + +/* UART External Input Configuration register */ +#define UART_EXTINP_REG 0xc +#define UART_EXTINP_RI_SHIFT 0 +#define UART_EXTINP_RI_MASK (1 << UART_EXTINP_RI_SHIFT) +#define UART_EXTINP_CTS_SHIFT 1 +#define UART_EXTINP_CTS_MASK (1 << UART_EXTINP_CTS_SHIFT) +#define UART_EXTINP_DCD_SHIFT 2 +#define UART_EXTINP_DCD_MASK (1 << UART_EXTINP_DCD_SHIFT) +#define UART_EXTINP_DSR_SHIFT 3 +#define UART_EXTINP_DSR_MASK (1 << UART_EXTINP_DSR_SHIFT) +#define UART_EXTINP_IRSTAT(x) (1 << (x + 4)) +#define UART_EXTINP_IRMASK(x) (1 << (x + 8)) +#define UART_EXTINP_IR_RI 0 +#define UART_EXTINP_IR_CTS 1 +#define UART_EXTINP_IR_DCD 2 +#define UART_EXTINP_IR_DSR 3 +#define UART_EXTINP_RI_NOSENSE_SHIFT 16 +#define UART_EXTINP_RI_NOSENSE_MASK (1 << UART_EXTINP_RI_NOSENSE_SHIFT) +#define UART_EXTINP_CTS_NOSENSE_SHIFT 17 +#define UART_EXTINP_CTS_NOSENSE_MASK (1 << UART_EXTINP_CTS_NOSENSE_SHIFT) +#define UART_EXTINP_DCD_NOSENSE_SHIFT 18 +#define UART_EXTINP_DCD_NOSENSE_MASK (1 << UART_EXTINP_DCD_NOSENSE_SHIFT) +#define UART_EXTINP_DSR_NOSENSE_SHIFT 19 +#define UART_EXTINP_DSR_NOSENSE_MASK (1 << UART_EXTINP_DSR_NOSENSE_SHIFT) + +/* UART Interrupt register */ +#define UART_IR_REG 0x10 +#define UART_IR_MASK(x) (1 << (x + 16)) +#define UART_IR_STAT(x) (1 << (x)) +#define UART_IR_EXTIP 0 +#define UART_IR_TXUNDER 1 +#define UART_IR_TXOVER 2 +#define UART_IR_TXTRESH 3 +#define UART_IR_TXRDLATCH 4 +#define UART_IR_TXEMPTY 5 +#define UART_IR_RXUNDER 6 +#define UART_IR_RXOVER 7 +#define UART_IR_RXTIMEOUT 8 +#define UART_IR_RXFULL 9 +#define UART_IR_RXTHRESH 10 +#define UART_IR_RXNOTEMPTY 11 +#define UART_IR_RXFRAMEERR 12 +#define UART_IR_RXPARERR 13 +#define UART_IR_RXBRK 14 +#define UART_IR_TXDONE 15 + +/* UART Fifo register */ +#define UART_FIFO_REG 0x14 +#define UART_FIFO_VALID_SHIFT 0 +#define UART_FIFO_VALID_MASK 0xff +#define UART_FIFO_FRAMEERR_SHIFT 8 +#define UART_FIFO_FRAMEERR_MASK (1 << UART_FIFO_FRAMEERR_SHIFT) +#define UART_FIFO_PARERR_SHIFT 9 +#define UART_FIFO_PARERR_MASK (1 << UART_FIFO_PARERR_SHIFT) +#define UART_FIFO_BRKDET_SHIFT 10 +#define UART_FIFO_BRKDET_MASK (1 << UART_FIFO_BRKDET_SHIFT) +#define UART_FIFO_ANYERR_MASK (UART_FIFO_FRAMEERR_MASK | \ + UART_FIFO_PARERR_MASK | \ + UART_FIFO_BRKDET_MASK) + + +/************************************************************************* + * _REG relative to RSET_GPIO + *************************************************************************/ + +/* GPIO registers */ +#define GPIO_CTL_HI_REG 0x0 +#define GPIO_CTL_LO_REG 0x4 +#define GPIO_DATA_HI_REG 0x8 +#define GPIO_DATA_LO_REG 0xC + +/* GPIO mux registers and constants */ +#define GPIO_MODE_REG 0x18 + +#define GPIO_MODE_6348_G4_DIAG 0x00090000 +#define GPIO_MODE_6348_G4_UTOPIA 0x00080000 +#define GPIO_MODE_6348_G4_LEGACY_LED 0x00030000 +#define GPIO_MODE_6348_G4_MII_SNOOP 0x00020000 +#define GPIO_MODE_6348_G4_EXT_EPHY 0x00010000 +#define GPIO_MODE_6348_G3_DIAG 0x00009000 +#define GPIO_MODE_6348_G3_UTOPIA 0x00008000 +#define GPIO_MODE_6348_G3_EXT_MII 0x00007000 +#define GPIO_MODE_6348_G2_DIAG 0x00000900 +#define GPIO_MODE_6348_G2_PCI 0x00000500 +#define GPIO_MODE_6348_G1_DIAG 0x00000090 +#define GPIO_MODE_6348_G1_UTOPIA 0x00000080 +#define GPIO_MODE_6348_G1_SPI_UART 0x00000060 +#define GPIO_MODE_6348_G1_SPI_MASTER 0x00000060 +#define GPIO_MODE_6348_G1_MII_PCCARD 0x00000040 +#define GPIO_MODE_6348_G1_MII_SNOOP 0x00000020 +#define GPIO_MODE_6348_G1_EXT_EPHY 0x00000010 +#define GPIO_MODE_6348_G0_DIAG 0x00000009 +#define GPIO_MODE_6348_G0_EXT_MII 0x00000007 + +#define GPIO_MODE_6358_EXTRACS (1 << 5) +#define GPIO_MODE_6358_UART1 (1 << 6) +#define GPIO_MODE_6358_EXTRA_SPI_SS (1 << 7) +#define GPIO_MODE_6358_SERIAL_LED (1 << 10) +#define GPIO_MODE_6358_UTOPIA (1 << 12) + + +/************************************************************************* + * _REG relative to RSET_ENET + *************************************************************************/ + +/* Receiver Configuration register */ +#define ENET_RXCFG_REG 0x0 +#define ENET_RXCFG_ALLMCAST_SHIFT 1 +#define ENET_RXCFG_ALLMCAST_MASK (1 << ENET_RXCFG_ALLMCAST_SHIFT) +#define ENET_RXCFG_PROMISC_SHIFT 3 +#define ENET_RXCFG_PROMISC_MASK (1 << ENET_RXCFG_PROMISC_SHIFT) +#define ENET_RXCFG_LOOPBACK_SHIFT 4 +#define ENET_RXCFG_LOOPBACK_MASK (1 << ENET_RXCFG_LOOPBACK_SHIFT) +#define ENET_RXCFG_ENFLOW_SHIFT 5 +#define ENET_RXCFG_ENFLOW_MASK (1 << ENET_RXCFG_ENFLOW_SHIFT) + +/* Receive Maximum Length register */ +#define ENET_RXMAXLEN_REG 0x4 +#define ENET_RXMAXLEN_SHIFT 0 +#define ENET_RXMAXLEN_MASK (0x7ff << ENET_RXMAXLEN_SHIFT) + +/* Transmit Maximum Length register */ +#define ENET_TXMAXLEN_REG 0x8 +#define ENET_TXMAXLEN_SHIFT 0 +#define ENET_TXMAXLEN_MASK (0x7ff << ENET_TXMAXLEN_SHIFT) + +/* MII Status/Control register */ +#define ENET_MIISC_REG 0x10 +#define ENET_MIISC_MDCFREQDIV_SHIFT 0 +#define ENET_MIISC_MDCFREQDIV_MASK (0x7f << ENET_MIISC_MDCFREQDIV_SHIFT) +#define ENET_MIISC_PREAMBLEEN_SHIFT 7 +#define ENET_MIISC_PREAMBLEEN_MASK (1 << ENET_MIISC_PREAMBLEEN_SHIFT) + +/* MII Data register */ +#define ENET_MIIDATA_REG 0x14 +#define ENET_MIIDATA_DATA_SHIFT 0 +#define ENET_MIIDATA_DATA_MASK (0xffff << ENET_MIIDATA_DATA_SHIFT) +#define ENET_MIIDATA_TA_SHIFT 16 +#define ENET_MIIDATA_TA_MASK (0x3 << ENET_MIIDATA_TA_SHIFT) +#define ENET_MIIDATA_REG_SHIFT 18 +#define ENET_MIIDATA_REG_MASK (0x1f << ENET_MIIDATA_REG_SHIFT) +#define ENET_MIIDATA_PHYID_SHIFT 23 +#define ENET_MIIDATA_PHYID_MASK (0x1f << ENET_MIIDATA_PHYID_SHIFT) +#define ENET_MIIDATA_OP_READ_MASK (0x6 << 28) +#define ENET_MIIDATA_OP_WRITE_MASK (0x5 << 28) + +/* Ethernet Interrupt Mask register */ +#define ENET_IRMASK_REG 0x18 + +/* Ethernet Interrupt register */ +#define ENET_IR_REG 0x1c +#define ENET_IR_MII (1 << 0) +#define ENET_IR_MIB (1 << 1) +#define ENET_IR_FLOWC (1 << 2) + +/* Ethernet Control register */ +#define ENET_CTL_REG 0x2c +#define ENET_CTL_ENABLE_SHIFT 0 +#define ENET_CTL_ENABLE_MASK (1 << ENET_CTL_ENABLE_SHIFT) +#define ENET_CTL_DISABLE_SHIFT 1 +#define ENET_CTL_DISABLE_MASK (1 << ENET_CTL_DISABLE_SHIFT) +#define ENET_CTL_SRESET_SHIFT 2 +#define ENET_CTL_SRESET_MASK (1 << ENET_CTL_SRESET_SHIFT) +#define ENET_CTL_EPHYSEL_SHIFT 3 +#define ENET_CTL_EPHYSEL_MASK (1 << ENET_CTL_EPHYSEL_SHIFT) + +/* Transmit Control register */ +#define ENET_TXCTL_REG 0x30 +#define ENET_TXCTL_FD_SHIFT 0 +#define ENET_TXCTL_FD_MASK (1 << ENET_TXCTL_FD_SHIFT) + +/* Transmit Watermask register */ +#define ENET_TXWMARK_REG 0x34 +#define ENET_TXWMARK_WM_SHIFT 0 +#define ENET_TXWMARK_WM_MASK (0x3f << ENET_TXWMARK_WM_SHIFT) + +/* MIB Control register */ +#define ENET_MIBCTL_REG 0x38 +#define ENET_MIBCTL_RDCLEAR_SHIFT 0 +#define ENET_MIBCTL_RDCLEAR_MASK (1 << ENET_MIBCTL_RDCLEAR_SHIFT) + +/* Perfect Match Data Low register */ +#define ENET_PML_REG(x) (0x58 + (x) * 8) +#define ENET_PMH_REG(x) (0x5c + (x) * 8) +#define ENET_PMH_DATAVALID_SHIFT 16 +#define ENET_PMH_DATAVALID_MASK (1 << ENET_PMH_DATAVALID_SHIFT) + +/* MIB register */ +#define ENET_MIB_REG(x) (0x200 + (x) * 4) +#define ENET_MIB_REG_COUNT 55 + + +/************************************************************************* + * _REG relative to RSET_ENETDMA + *************************************************************************/ + +/* Controller Configuration Register */ +#define ENETDMA_CFG_REG (0x0) +#define ENETDMA_CFG_EN_SHIFT 0 +#define ENETDMA_CFG_EN_MASK (1 << ENETDMA_CFG_EN_SHIFT) +#define ENETDMA_CFG_FLOWCH_MASK(x) (1 << ((x >> 1) + 1)) + +/* Flow Control Descriptor Low Threshold register */ +#define ENETDMA_FLOWCL_REG(x) (0x4 + (x) * 6) + +/* Flow Control Descriptor High Threshold register */ +#define ENETDMA_FLOWCH_REG(x) (0x8 + (x) * 6) + +/* Flow Control Descriptor Buffer Alloca Threshold register */ +#define ENETDMA_BUFALLOC_REG(x) (0xc + (x) * 6) +#define ENETDMA_BUFALLOC_FORCE_SHIFT 31 +#define ENETDMA_BUFALLOC_FORCE_MASK (1 << ENETDMA_BUFALLOC_FORCE_SHIFT) + +/* Channel Configuration register */ +#define ENETDMA_CHANCFG_REG(x) (0x100 + (x) * 0x10) +#define ENETDMA_CHANCFG_EN_SHIFT 0 +#define ENETDMA_CHANCFG_EN_MASK (1 << ENETDMA_CHANCFG_EN_SHIFT) +#define ENETDMA_CHANCFG_PKTHALT_SHIFT 1 +#define ENETDMA_CHANCFG_PKTHALT_MASK (1 << ENETDMA_CHANCFG_PKTHALT_SHIFT) + +/* Interrupt Control/Status register */ +#define ENETDMA_IR_REG(x) (0x104 + (x) * 0x10) +#define ENETDMA_IR_BUFDONE_MASK (1 << 0) +#define ENETDMA_IR_PKTDONE_MASK (1 << 1) +#define ENETDMA_IR_NOTOWNER_MASK (1 << 2) + +/* Interrupt Mask register */ +#define ENETDMA_IRMASK_REG(x) (0x108 + (x) * 0x10) + +/* Maximum Burst Length */ +#define ENETDMA_MAXBURST_REG(x) (0x10C + (x) * 0x10) + +/* Ring Start Address register */ +#define ENETDMA_RSTART_REG(x) (0x200 + (x) * 0x10) + +/* State Ram Word 2 */ +#define ENETDMA_SRAM2_REG(x) (0x204 + (x) * 0x10) + +/* State Ram Word 3 */ +#define ENETDMA_SRAM3_REG(x) (0x208 + (x) * 0x10) + +/* State Ram Word 4 */ +#define ENETDMA_SRAM4_REG(x) (0x20c + (x) * 0x10) + + +/************************************************************************* + * _REG relative to RSET_OHCI_PRIV + *************************************************************************/ + +#define OHCI_PRIV_REG 0x0 +#define OHCI_PRIV_PORT1_HOST_SHIFT 0 +#define OHCI_PRIV_PORT1_HOST_MASK (1 << OHCI_PRIV_PORT1_HOST_SHIFT) +#define OHCI_PRIV_REG_SWAP_SHIFT 3 +#define OHCI_PRIV_REG_SWAP_MASK (1 << OHCI_PRIV_REG_SWAP_SHIFT) + + +/************************************************************************* + * _REG relative to RSET_USBH_PRIV + *************************************************************************/ + +#define USBH_PRIV_SWAP_REG 0x0 +#define USBH_PRIV_SWAP_EHCI_ENDN_SHIFT 4 +#define USBH_PRIV_SWAP_EHCI_ENDN_MASK (1 << USBH_PRIV_SWAP_EHCI_ENDN_SHIFT) +#define USBH_PRIV_SWAP_EHCI_DATA_SHIFT 3 +#define USBH_PRIV_SWAP_EHCI_DATA_MASK (1 << USBH_PRIV_SWAP_EHCI_DATA_SHIFT) +#define USBH_PRIV_SWAP_OHCI_ENDN_SHIFT 1 +#define USBH_PRIV_SWAP_OHCI_ENDN_MASK (1 << USBH_PRIV_SWAP_OHCI_ENDN_SHIFT) +#define USBH_PRIV_SWAP_OHCI_DATA_SHIFT 0 +#define USBH_PRIV_SWAP_OHCI_DATA_MASK (1 << USBH_PRIV_SWAP_OHCI_DATA_SHIFT) + +#define USBH_PRIV_TEST_REG 0x24 + + +/************************************************************************* + * _REG relative to RSET_MPI + *************************************************************************/ + +/* well known (hard wired) chip select */ +#define MPI_CS_PCMCIA_COMMON 4 +#define MPI_CS_PCMCIA_ATTR 5 +#define MPI_CS_PCMCIA_IO 6 + +/* Chip select base register */ +#define MPI_CSBASE_REG(x) (0x0 + (x) * 8) +#define MPI_CSBASE_BASE_SHIFT 13 +#define MPI_CSBASE_BASE_MASK (0x1ffff << MPI_CSBASE_BASE_SHIFT) +#define MPI_CSBASE_SIZE_SHIFT 0 +#define MPI_CSBASE_SIZE_MASK (0xf << MPI_CSBASE_SIZE_SHIFT) + +#define MPI_CSBASE_SIZE_8K 0 +#define MPI_CSBASE_SIZE_16K 1 +#define MPI_CSBASE_SIZE_32K 2 +#define MPI_CSBASE_SIZE_64K 3 +#define MPI_CSBASE_SIZE_128K 4 +#define MPI_CSBASE_SIZE_256K 5 +#define MPI_CSBASE_SIZE_512K 6 +#define MPI_CSBASE_SIZE_1M 7 +#define MPI_CSBASE_SIZE_2M 8 +#define MPI_CSBASE_SIZE_4M 9 +#define MPI_CSBASE_SIZE_8M 10 +#define MPI_CSBASE_SIZE_16M 11 +#define MPI_CSBASE_SIZE_32M 12 +#define MPI_CSBASE_SIZE_64M 13 +#define MPI_CSBASE_SIZE_128M 14 +#define MPI_CSBASE_SIZE_256M 15 + +/* Chip select control register */ +#define MPI_CSCTL_REG(x) (0x4 + (x) * 8) +#define MPI_CSCTL_ENABLE_MASK (1 << 0) +#define MPI_CSCTL_WAIT_SHIFT 1 +#define MPI_CSCTL_WAIT_MASK (0x7 << MPI_CSCTL_WAIT_SHIFT) +#define MPI_CSCTL_DATA16_MASK (1 << 4) +#define MPI_CSCTL_SYNCMODE_MASK (1 << 7) +#define MPI_CSCTL_TSIZE_MASK (1 << 8) +#define MPI_CSCTL_ENDIANSWAP_MASK (1 << 10) +#define MPI_CSCTL_SETUP_SHIFT 16 +#define MPI_CSCTL_SETUP_MASK (0xf << MPI_CSCTL_SETUP_SHIFT) +#define MPI_CSCTL_HOLD_SHIFT 20 +#define MPI_CSCTL_HOLD_MASK (0xf << MPI_CSCTL_HOLD_SHIFT) + +/* PCI registers */ +#define MPI_SP0_RANGE_REG 0x100 +#define MPI_SP0_REMAP_REG 0x104 +#define MPI_SP0_REMAP_ENABLE_MASK (1 << 0) +#define MPI_SP1_RANGE_REG 0x10C +#define MPI_SP1_REMAP_REG 0x110 +#define MPI_SP1_REMAP_ENABLE_MASK (1 << 0) + +#define MPI_L2PCFG_REG 0x11C +#define MPI_L2PCFG_CFG_TYPE_SHIFT 0 +#define MPI_L2PCFG_CFG_TYPE_MASK (0x3 << MPI_L2PCFG_CFG_TYPE_SHIFT) +#define MPI_L2PCFG_REG_SHIFT 2 +#define MPI_L2PCFG_REG_MASK (0x3f << MPI_L2PCFG_REG_SHIFT) +#define MPI_L2PCFG_FUNC_SHIFT 8 +#define MPI_L2PCFG_FUNC_MASK (0x7 << MPI_L2PCFG_FUNC_SHIFT) +#define MPI_L2PCFG_DEVNUM_SHIFT 11 +#define MPI_L2PCFG_DEVNUM_MASK (0x1f << MPI_L2PCFG_DEVNUM_SHIFT) +#define MPI_L2PCFG_CFG_USEREG_MASK (1 << 30) +#define MPI_L2PCFG_CFG_SEL_MASK (1 << 31) + +#define MPI_L2PMEMRANGE1_REG 0x120 +#define MPI_L2PMEMBASE1_REG 0x124 +#define MPI_L2PMEMREMAP1_REG 0x128 +#define MPI_L2PMEMRANGE2_REG 0x12C +#define MPI_L2PMEMBASE2_REG 0x130 +#define MPI_L2PMEMREMAP2_REG 0x134 +#define MPI_L2PIORANGE_REG 0x138 +#define MPI_L2PIOBASE_REG 0x13C +#define MPI_L2PIOREMAP_REG 0x140 +#define MPI_L2P_BASE_MASK (0xffff8000) +#define MPI_L2PREMAP_ENABLED_MASK (1 << 0) +#define MPI_L2PREMAP_IS_CARDBUS_MASK (1 << 2) + +#define MPI_PCIMODESEL_REG 0x144 +#define MPI_PCIMODESEL_BAR1_NOSWAP_MASK (1 << 0) +#define MPI_PCIMODESEL_BAR2_NOSWAP_MASK (1 << 1) +#define MPI_PCIMODESEL_EXT_ARB_MASK (1 << 2) +#define MPI_PCIMODESEL_PREFETCH_SHIFT 4 +#define MPI_PCIMODESEL_PREFETCH_MASK (0xf << MPI_PCIMODESEL_PREFETCH_SHIFT) + +#define MPI_LOCBUSCTL_REG 0x14C +#define MPI_LOCBUSCTL_EN_PCI_GPIO_MASK (1 << 0) +#define MPI_LOCBUSCTL_U2P_NOSWAP_MASK (1 << 1) + +#define MPI_LOCINT_REG 0x150 +#define MPI_LOCINT_MASK(x) (1 << (x + 16)) +#define MPI_LOCINT_STAT(x) (1 << (x)) +#define MPI_LOCINT_DIR_FAILED 6 +#define MPI_LOCINT_EXT_PCI_INT 7 +#define MPI_LOCINT_SERR 8 +#define MPI_LOCINT_CSERR 9 + +#define MPI_PCICFGCTL_REG 0x178 +#define MPI_PCICFGCTL_CFGADDR_SHIFT 2 +#define MPI_PCICFGCTL_CFGADDR_MASK (0x1f << MPI_PCICFGCTL_CFGADDR_SHIFT) +#define MPI_PCICFGCTL_WRITEEN_MASK (1 << 7) + +#define MPI_PCICFGDATA_REG 0x17C + +/* PCI host bridge custom register */ +#define BCMPCI_REG_TIMERS 0x40 +#define REG_TIMER_TRDY_SHIFT 0 +#define REG_TIMER_TRDY_MASK (0xff << REG_TIMER_TRDY_SHIFT) +#define REG_TIMER_RETRY_SHIFT 8 +#define REG_TIMER_RETRY_MASK (0xff << REG_TIMER_RETRY_SHIFT) + + +/************************************************************************* + * _REG relative to RSET_PCMCIA + *************************************************************************/ + +#define PCMCIA_C1_REG 0x0 +#define PCMCIA_C1_CD1_MASK (1 << 0) +#define PCMCIA_C1_CD2_MASK (1 << 1) +#define PCMCIA_C1_VS1_MASK (1 << 2) +#define PCMCIA_C1_VS2_MASK (1 << 3) +#define PCMCIA_C1_VS1OE_MASK (1 << 6) +#define PCMCIA_C1_VS2OE_MASK (1 << 7) +#define PCMCIA_C1_CBIDSEL_SHIFT (8) +#define PCMCIA_C1_CBIDSEL_MASK (0x1f << PCMCIA_C1_CBIDSEL_SHIFT) +#define PCMCIA_C1_EN_PCMCIA_GPIO_MASK (1 << 13) +#define PCMCIA_C1_EN_PCMCIA_MASK (1 << 14) +#define PCMCIA_C1_EN_CARDBUS_MASK (1 << 15) +#define PCMCIA_C1_RESET_MASK (1 << 18) + +#define PCMCIA_C2_REG 0x8 +#define PCMCIA_C2_DATA16_MASK (1 << 0) +#define PCMCIA_C2_BYTESWAP_MASK (1 << 1) +#define PCMCIA_C2_RWCOUNT_SHIFT 2 +#define PCMCIA_C2_RWCOUNT_MASK (0x3f << PCMCIA_C2_RWCOUNT_SHIFT) +#define PCMCIA_C2_INACTIVE_SHIFT 8 +#define PCMCIA_C2_INACTIVE_MASK (0x3f << PCMCIA_C2_INACTIVE_SHIFT) +#define PCMCIA_C2_SETUP_SHIFT 16 +#define PCMCIA_C2_SETUP_MASK (0x3f << PCMCIA_C2_SETUP_SHIFT) +#define PCMCIA_C2_HOLD_SHIFT 24 +#define PCMCIA_C2_HOLD_MASK (0x3f << PCMCIA_C2_HOLD_SHIFT) + + +/************************************************************************* + * _REG relative to RSET_SDRAM + *************************************************************************/ + +#define SDRAM_CFG_REG 0x0 +#define SDRAM_CFG_ROW_SHIFT 4 +#define SDRAM_CFG_ROW_MASK (0x3 << SDRAM_CFG_ROW_SHIFT) +#define SDRAM_CFG_COL_SHIFT 6 +#define SDRAM_CFG_COL_MASK (0x3 << SDRAM_CFG_COL_SHIFT) +#define SDRAM_CFG_32B_SHIFT 10 +#define SDRAM_CFG_32B_MASK (1 << SDRAM_CFG_32B_SHIFT) +#define SDRAM_CFG_BANK_SHIFT 13 +#define SDRAM_CFG_BANK_MASK (1 << SDRAM_CFG_BANK_SHIFT) + +#define SDRAM_PRIO_REG 0x2C +#define SDRAM_PRIO_MIPS_SHIFT 29 +#define SDRAM_PRIO_MIPS_MASK (1 << SDRAM_PRIO_MIPS_SHIFT) +#define SDRAM_PRIO_ADSL_SHIFT 30 +#define SDRAM_PRIO_ADSL_MASK (1 << SDRAM_PRIO_ADSL_SHIFT) +#define SDRAM_PRIO_EN_SHIFT 31 +#define SDRAM_PRIO_EN_MASK (1 << SDRAM_PRIO_EN_SHIFT) + + +/************************************************************************* + * _REG relative to RSET_MEMC + *************************************************************************/ + +#define MEMC_CFG_REG 0x4 +#define MEMC_CFG_32B_SHIFT 1 +#define MEMC_CFG_32B_MASK (1 << MEMC_CFG_32B_SHIFT) +#define MEMC_CFG_COL_SHIFT 3 +#define MEMC_CFG_COL_MASK (0x3 << MEMC_CFG_COL_SHIFT) +#define MEMC_CFG_ROW_SHIFT 6 +#define MEMC_CFG_ROW_MASK (0x3 << MEMC_CFG_ROW_SHIFT) + + +/************************************************************************* + * _REG relative to RSET_DDR + *************************************************************************/ + +#define DDR_DMIPSPLLCFG_REG 0x18 +#define DMIPSPLLCFG_M1_SHIFT 0 +#define DMIPSPLLCFG_M1_MASK (0xff << DMIPSPLLCFG_M1_SHIFT) +#define DMIPSPLLCFG_N1_SHIFT 23 +#define DMIPSPLLCFG_N1_MASK (0x3f << DMIPSPLLCFG_N1_SHIFT) +#define DMIPSPLLCFG_N2_SHIFT 29 +#define DMIPSPLLCFG_N2_MASK (0x7 << DMIPSPLLCFG_N2_SHIFT) + +#endif /* BCM63XX_REGS_H_ */ + diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_timer.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_timer.h new file mode 100644 index 00000000000..c0fce833c9e --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_timer.h @@ -0,0 +1,11 @@ +#ifndef BCM63XX_TIMER_H_ +#define BCM63XX_TIMER_H_ + +int bcm63xx_timer_register(int id, void (*callback)(void *data), void *data); +void bcm63xx_timer_unregister(int id); +int bcm63xx_timer_set(int id, int monotonic, unsigned int countdown_us); +int bcm63xx_timer_enable(int id); +int bcm63xx_timer_disable(int id); +unsigned int bcm63xx_timer_countdown(unsigned int countdown_us); + +#endif /* !BCM63XX_TIMER_H_ */ diff --git a/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h b/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h new file mode 100644 index 00000000000..6479090a410 --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h @@ -0,0 +1,60 @@ +#ifndef BOARD_BCM963XX_H_ +#define BOARD_BCM963XX_H_ + +#include +#include +#include +#include +#include + +/* + * flash mapping + */ +#define BCM963XX_CFE_VERSION_OFFSET 0x570 +#define BCM963XX_NVRAM_OFFSET 0x580 + +/* + * nvram structure + */ +struct bcm963xx_nvram { + u32 version; + u8 reserved1[256]; + u8 name[16]; + u32 main_tp_number; + u32 psi_size; + u32 mac_addr_count; + u8 mac_addr_base[6]; + u8 reserved2[2]; + u32 checksum_old; + u8 reserved3[720]; + u32 checksum_high; +}; + +/* + * board definition + */ +struct board_info { + u8 name[16]; + unsigned int expected_cpu_id; + + /* enabled feature/device */ + unsigned int has_enet0:1; + unsigned int has_enet1:1; + unsigned int has_pci:1; + unsigned int has_pccard:1; + unsigned int has_ohci0:1; + unsigned int has_ehci0:1; + unsigned int has_dsp:1; + + /* ethernet config */ + struct bcm63xx_enet_platform_data enet0; + struct bcm63xx_enet_platform_data enet1; + + /* DSP config */ + struct bcm63xx_dsp_platform_data dsp; + + /* GPIO LEDs */ + struct gpio_led leds[5]; +}; + +#endif /* ! BOARD_BCM963XX_H_ */ diff --git a/arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h b/arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h new file mode 100644 index 00000000000..71742bac940 --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h @@ -0,0 +1,51 @@ +#ifndef __ASM_MACH_BCM963XX_CPU_FEATURE_OVERRIDES_H +#define __ASM_MACH_BCM963XX_CPU_FEATURE_OVERRIDES_H + +#include + +#define cpu_has_tlb 1 +#define cpu_has_4kex 1 +#define cpu_has_4k_cache 1 +#define cpu_has_fpu 0 +#define cpu_has_32fpr 0 +#define cpu_has_counter 1 +#define cpu_has_watch 0 +#define cpu_has_divec 1 +#define cpu_has_vce 0 +#define cpu_has_cache_cdex_p 0 +#define cpu_has_cache_cdex_s 0 +#define cpu_has_prefetch 1 +#define cpu_has_mcheck 1 +#define cpu_has_ejtag 1 +#define cpu_has_llsc 1 +#define cpu_has_mips16 0 +#define cpu_has_mdmx 0 +#define cpu_has_mips3d 0 +#define cpu_has_smartmips 0 +#define cpu_has_vtag_icache 0 + +#if !defined(BCMCPU_RUNTIME_DETECT) && (defined(CONFIG_BCMCPU_IS_6348) || defined(CONFIG_CPU_IS_6338) || defined(CONFIG_CPU_IS_BCM6345)) +#define cpu_has_dc_aliases 0 +#endif + +#define cpu_has_ic_fills_f_dc 0 +#define cpu_has_pindexed_dcache 0 + +#define cpu_has_mips32r1 1 +#define cpu_has_mips32r2 0 +#define cpu_has_mips64r1 0 +#define cpu_has_mips64r2 0 + +#define cpu_has_dsp 0 +#define cpu_has_mipsmt 0 +#define cpu_has_userlocal 0 + +#define cpu_has_nofpuex 0 +#define cpu_has_64bits 0 +#define cpu_has_64bit_zero_reg 0 + +#define cpu_dcache_line_size() 16 +#define cpu_icache_line_size() 16 +#define cpu_scache_line_size() 0 + +#endif /* __ASM_MACH_BCM963XX_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-bcm63xx/gpio.h b/arch/mips/include/asm/mach-bcm63xx/gpio.h new file mode 100644 index 00000000000..7cda8c0a397 --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/gpio.h @@ -0,0 +1,15 @@ +#ifndef __ASM_MIPS_MACH_BCM63XX_GPIO_H +#define __ASM_MIPS_MACH_BCM63XX_GPIO_H + +#include + +#define gpio_to_irq(gpio) NULL + +#define gpio_get_value __gpio_get_value +#define gpio_set_value __gpio_set_value + +#define gpio_cansleep __gpio_cansleep + +#include + +#endif /* __ASM_MIPS_MACH_BCM63XX_GPIO_H */ diff --git a/arch/mips/include/asm/mach-bcm63xx/war.h b/arch/mips/include/asm/mach-bcm63xx/war.h new file mode 100644 index 00000000000..8e3f3fdf320 --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/war.h @@ -0,0 +1,25 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2002, 2004, 2007 by Ralf Baechle + */ +#ifndef __ASM_MIPS_MACH_BCM63XX_WAR_H +#define __ASM_MIPS_MACH_BCM63XX_WAR_H + +#define R4600_V1_INDEX_ICACHEOP_WAR 0 +#define R4600_V1_HIT_CACHEOP_WAR 0 +#define R4600_V2_HIT_CACHEOP_WAR 0 +#define R5432_CP0_INTERRUPT_WAR 0 +#define BCM1250_M3_WAR 0 +#define SIBYTE_1956_WAR 0 +#define MIPS4K_ICACHE_REFILL_WAR 0 +#define MIPS_CACHE_SYNC_WAR 0 +#define TX49XX_ICACHE_INDEX_INV_WAR 0 +#define RM9000_CDEX_SMP_WAR 0 +#define ICACHE_REFILLS_WORKAROUND_WAR 0 +#define R10000_LLSC_WAR 0 +#define MIPS34K_MISSED_ITLB_WAR 0 + +#endif /* __ASM_MIPS_MACH_BCM63XX_WAR_H */ diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index 0d4d5ea6fac..91bfe73a7f6 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -16,6 +16,8 @@ obj-$(CONFIG_PCI_VR41XX) += ops-vr41xx.o pci-vr41xx.o obj-$(CONFIG_NEC_MARKEINS) += ops-emma2rh.o pci-emma2rh.o fixup-emma2rh.o obj-$(CONFIG_PCI_TX4927) += ops-tx4927.o obj-$(CONFIG_BCM47XX) += pci-bcm47xx.o +obj-$(CONFIG_BCM63XX) += pci-bcm63xx.o fixup-bcm63xx.o \ + ops-bcm63xx.o # # These are still pretty much in the old state, watch, go blind. diff --git a/arch/mips/pci/fixup-bcm63xx.c b/arch/mips/pci/fixup-bcm63xx.c new file mode 100644 index 00000000000..340863009da --- /dev/null +++ b/arch/mips/pci/fixup-bcm63xx.c @@ -0,0 +1,21 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + */ + +#include +#include +#include + +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + return bcm63xx_get_irq_number(IRQ_PCI); +} + +int pcibios_plat_dev_init(struct pci_dev *dev) +{ + return 0; +} diff --git a/arch/mips/pci/ops-bcm63xx.c b/arch/mips/pci/ops-bcm63xx.c new file mode 100644 index 00000000000..822ae179bc5 --- /dev/null +++ b/arch/mips/pci/ops-bcm63xx.c @@ -0,0 +1,467 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + */ + +#include +#include +#include +#include +#include +#include + +#include "pci-bcm63xx.h" + +/* + * swizzle 32bits data to return only the needed part + */ +static int postprocess_read(u32 data, int where, unsigned int size) +{ + u32 ret; + + ret = 0; + switch (size) { + case 1: + ret = (data >> ((where & 3) << 3)) & 0xff; + break; + case 2: + ret = (data >> ((where & 3) << 3)) & 0xffff; + break; + case 4: + ret = data; + break; + } + return ret; +} + +static int preprocess_write(u32 orig_data, u32 val, int where, + unsigned int size) +{ + u32 ret; + + ret = 0; + switch (size) { + case 1: + ret = (orig_data & ~(0xff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + break; + case 2: + ret = (orig_data & ~(0xffff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + break; + case 4: + ret = val; + break; + } + return ret; +} + +/* + * setup hardware for a configuration cycle with given parameters + */ +static int bcm63xx_setup_cfg_access(int type, unsigned int busn, + unsigned int devfn, int where) +{ + unsigned int slot, func, reg; + u32 val; + + slot = PCI_SLOT(devfn); + func = PCI_FUNC(devfn); + reg = where >> 2; + + /* sanity check */ + if (slot > (MPI_L2PCFG_DEVNUM_MASK >> MPI_L2PCFG_DEVNUM_SHIFT)) + return 1; + + if (func > (MPI_L2PCFG_FUNC_MASK >> MPI_L2PCFG_FUNC_SHIFT)) + return 1; + + if (reg > (MPI_L2PCFG_REG_MASK >> MPI_L2PCFG_REG_SHIFT)) + return 1; + + /* ok, setup config access */ + val = (reg << MPI_L2PCFG_REG_SHIFT); + val |= (func << MPI_L2PCFG_FUNC_SHIFT); + val |= (slot << MPI_L2PCFG_DEVNUM_SHIFT); + val |= MPI_L2PCFG_CFG_USEREG_MASK; + val |= MPI_L2PCFG_CFG_SEL_MASK; + /* type 0 cycle for local bus, type 1 cycle for anything else */ + if (type != 0) { + /* FIXME: how to specify bus ??? */ + val |= (1 << MPI_L2PCFG_CFG_TYPE_SHIFT); + } + bcm_mpi_writel(val, MPI_L2PCFG_REG); + + return 0; +} + +static int bcm63xx_do_cfg_read(int type, unsigned int busn, + unsigned int devfn, int where, int size, + u32 *val) +{ + u32 data; + + /* two phase cycle, first we write address, then read data at + * another location, caller already has a spinlock so no need + * to add one here */ + if (bcm63xx_setup_cfg_access(type, busn, devfn, where)) + return PCIBIOS_DEVICE_NOT_FOUND; + iob(); + data = le32_to_cpu(__raw_readl(pci_iospace_start)); + /* restore IO space normal behaviour */ + bcm_mpi_writel(0, MPI_L2PCFG_REG); + + *val = postprocess_read(data, where, size); + + return PCIBIOS_SUCCESSFUL; +} + +static int bcm63xx_do_cfg_write(int type, unsigned int busn, + unsigned int devfn, int where, int size, + u32 val) +{ + u32 data; + + /* two phase cycle, first we write address, then write data to + * another location, caller already has a spinlock so no need + * to add one here */ + if (bcm63xx_setup_cfg_access(type, busn, devfn, where)) + return PCIBIOS_DEVICE_NOT_FOUND; + iob(); + + data = le32_to_cpu(__raw_readl(pci_iospace_start)); + data = preprocess_write(data, val, where, size); + + __raw_writel(cpu_to_le32(data), pci_iospace_start); + wmb(); + /* no way to know the access is done, we have to wait */ + udelay(500); + /* restore IO space normal behaviour */ + bcm_mpi_writel(0, MPI_L2PCFG_REG); + + return PCIBIOS_SUCCESSFUL; +} + +static int bcm63xx_pci_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + int type; + + type = bus->parent ? 1 : 0; + + if (type == 0 && PCI_SLOT(devfn) == CARDBUS_PCI_IDSEL) + return PCIBIOS_DEVICE_NOT_FOUND; + + return bcm63xx_do_cfg_read(type, bus->number, devfn, + where, size, val); +} + +static int bcm63xx_pci_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + int type; + + type = bus->parent ? 1 : 0; + + if (type == 0 && PCI_SLOT(devfn) == CARDBUS_PCI_IDSEL) + return PCIBIOS_DEVICE_NOT_FOUND; + + return bcm63xx_do_cfg_write(type, bus->number, devfn, + where, size, val); +} + +struct pci_ops bcm63xx_pci_ops = { + .read = bcm63xx_pci_read, + .write = bcm63xx_pci_write +}; + +#ifdef CONFIG_CARDBUS +/* + * emulate configuration read access on a cardbus bridge + */ +#define FAKE_CB_BRIDGE_SLOT 0x1e + +static int fake_cb_bridge_bus_number = -1; + +static struct { + u16 pci_command; + u8 cb_latency; + u8 subordinate_busn; + u8 cardbus_busn; + u8 pci_busn; + int bus_assigned; + u16 bridge_control; + + u32 mem_base0; + u32 mem_limit0; + u32 mem_base1; + u32 mem_limit1; + + u32 io_base0; + u32 io_limit0; + u32 io_base1; + u32 io_limit1; +} fake_cb_bridge_regs; + +static int fake_cb_bridge_read(int where, int size, u32 *val) +{ + unsigned int reg; + u32 data; + + data = 0; + reg = where >> 2; + switch (reg) { + case (PCI_VENDOR_ID >> 2): + case (PCI_CB_SUBSYSTEM_VENDOR_ID >> 2): + /* create dummy vendor/device id from our cpu id */ + data = (bcm63xx_get_cpu_id() << 16) | PCI_VENDOR_ID_BROADCOM; + break; + + case (PCI_COMMAND >> 2): + data = (PCI_STATUS_DEVSEL_SLOW << 16); + data |= fake_cb_bridge_regs.pci_command; + break; + + case (PCI_CLASS_REVISION >> 2): + data = (PCI_CLASS_BRIDGE_CARDBUS << 16); + break; + + case (PCI_CACHE_LINE_SIZE >> 2): + data = (PCI_HEADER_TYPE_CARDBUS << 16); + break; + + case (PCI_INTERRUPT_LINE >> 2): + /* bridge control */ + data = (fake_cb_bridge_regs.bridge_control << 16); + /* pin:intA line:0xff */ + data |= (0x1 << 8) | 0xff; + break; + + case (PCI_CB_PRIMARY_BUS >> 2): + data = (fake_cb_bridge_regs.cb_latency << 24); + data |= (fake_cb_bridge_regs.subordinate_busn << 16); + data |= (fake_cb_bridge_regs.cardbus_busn << 8); + data |= fake_cb_bridge_regs.pci_busn; + break; + + case (PCI_CB_MEMORY_BASE_0 >> 2): + data = fake_cb_bridge_regs.mem_base0; + break; + + case (PCI_CB_MEMORY_LIMIT_0 >> 2): + data = fake_cb_bridge_regs.mem_limit0; + break; + + case (PCI_CB_MEMORY_BASE_1 >> 2): + data = fake_cb_bridge_regs.mem_base1; + break; + + case (PCI_CB_MEMORY_LIMIT_1 >> 2): + data = fake_cb_bridge_regs.mem_limit1; + break; + + case (PCI_CB_IO_BASE_0 >> 2): + /* | 1 for 32bits io support */ + data = fake_cb_bridge_regs.io_base0 | 0x1; + break; + + case (PCI_CB_IO_LIMIT_0 >> 2): + data = fake_cb_bridge_regs.io_limit0; + break; + + case (PCI_CB_IO_BASE_1 >> 2): + /* | 1 for 32bits io support */ + data = fake_cb_bridge_regs.io_base1 | 0x1; + break; + + case (PCI_CB_IO_LIMIT_1 >> 2): + data = fake_cb_bridge_regs.io_limit1; + break; + } + + *val = postprocess_read(data, where, size); + return PCIBIOS_SUCCESSFUL; +} + +/* + * emulate configuration write access on a cardbus bridge + */ +static int fake_cb_bridge_write(int where, int size, u32 val) +{ + unsigned int reg; + u32 data, tmp; + int ret; + + ret = fake_cb_bridge_read((where & ~0x3), 4, &data); + if (ret != PCIBIOS_SUCCESSFUL) + return ret; + + data = preprocess_write(data, val, where, size); + + reg = where >> 2; + switch (reg) { + case (PCI_COMMAND >> 2): + fake_cb_bridge_regs.pci_command = (data & 0xffff); + break; + + case (PCI_CB_PRIMARY_BUS >> 2): + fake_cb_bridge_regs.cb_latency = (data >> 24) & 0xff; + fake_cb_bridge_regs.subordinate_busn = (data >> 16) & 0xff; + fake_cb_bridge_regs.cardbus_busn = (data >> 8) & 0xff; + fake_cb_bridge_regs.pci_busn = data & 0xff; + if (fake_cb_bridge_regs.cardbus_busn) + fake_cb_bridge_regs.bus_assigned = 1; + break; + + case (PCI_INTERRUPT_LINE >> 2): + tmp = (data >> 16) & 0xffff; + /* disable memory prefetch support */ + tmp &= ~PCI_CB_BRIDGE_CTL_PREFETCH_MEM0; + tmp &= ~PCI_CB_BRIDGE_CTL_PREFETCH_MEM1; + fake_cb_bridge_regs.bridge_control = tmp; + break; + + case (PCI_CB_MEMORY_BASE_0 >> 2): + fake_cb_bridge_regs.mem_base0 = data; + break; + + case (PCI_CB_MEMORY_LIMIT_0 >> 2): + fake_cb_bridge_regs.mem_limit0 = data; + break; + + case (PCI_CB_MEMORY_BASE_1 >> 2): + fake_cb_bridge_regs.mem_base1 = data; + break; + + case (PCI_CB_MEMORY_LIMIT_1 >> 2): + fake_cb_bridge_regs.mem_limit1 = data; + break; + + case (PCI_CB_IO_BASE_0 >> 2): + fake_cb_bridge_regs.io_base0 = data; + break; + + case (PCI_CB_IO_LIMIT_0 >> 2): + fake_cb_bridge_regs.io_limit0 = data; + break; + + case (PCI_CB_IO_BASE_1 >> 2): + fake_cb_bridge_regs.io_base1 = data; + break; + + case (PCI_CB_IO_LIMIT_1 >> 2): + fake_cb_bridge_regs.io_limit1 = data; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +static int bcm63xx_cb_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + /* snoop access to slot 0x1e on root bus, we fake a cardbus + * bridge at this location */ + if (!bus->parent && PCI_SLOT(devfn) == FAKE_CB_BRIDGE_SLOT) { + fake_cb_bridge_bus_number = bus->number; + return fake_cb_bridge_read(where, size, val); + } + + /* a configuration cycle for the device behind the cardbus + * bridge is actually done as a type 0 cycle on the primary + * bus. This means that only one device can be on the cardbus + * bus */ + if (fake_cb_bridge_regs.bus_assigned && + bus->number == fake_cb_bridge_regs.cardbus_busn && + PCI_SLOT(devfn) == 0) + return bcm63xx_do_cfg_read(0, 0, + PCI_DEVFN(CARDBUS_PCI_IDSEL, 0), + where, size, val); + + return PCIBIOS_DEVICE_NOT_FOUND; +} + +static int bcm63xx_cb_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + if (!bus->parent && PCI_SLOT(devfn) == FAKE_CB_BRIDGE_SLOT) { + fake_cb_bridge_bus_number = bus->number; + return fake_cb_bridge_write(where, size, val); + } + + if (fake_cb_bridge_regs.bus_assigned && + bus->number == fake_cb_bridge_regs.cardbus_busn && + PCI_SLOT(devfn) == 0) + return bcm63xx_do_cfg_write(0, 0, + PCI_DEVFN(CARDBUS_PCI_IDSEL, 0), + where, size, val); + + return PCIBIOS_DEVICE_NOT_FOUND; +} + +struct pci_ops bcm63xx_cb_ops = { + .read = bcm63xx_cb_read, + .write = bcm63xx_cb_write, +}; + +/* + * only one IO window, so it cannot be shared by PCI and cardbus, use + * fixup to choose and detect unhandled configuration + */ +static void bcm63xx_fixup(struct pci_dev *dev) +{ + static int io_window = -1; + int i, found, new_io_window; + u32 val; + + /* look for any io resource */ + found = 0; + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + if (pci_resource_flags(dev, i) & IORESOURCE_IO) { + found = 1; + break; + } + } + + if (!found) + return; + + /* skip our fake bus with only cardbus bridge on it */ + if (dev->bus->number == fake_cb_bridge_bus_number) + return; + + /* find on which bus the device is */ + if (fake_cb_bridge_regs.bus_assigned && + dev->bus->number == fake_cb_bridge_regs.cardbus_busn && + PCI_SLOT(dev->devfn) == 0) + new_io_window = 1; + else + new_io_window = 0; + + if (new_io_window == io_window) + return; + + if (io_window != -1) { + printk(KERN_ERR "bcm63xx: both PCI and cardbus devices " + "need IO, which hardware cannot do\n"); + return; + } + + printk(KERN_INFO "bcm63xx: PCI IO window assigned to %s\n", + (new_io_window == 0) ? "PCI" : "cardbus"); + + val = bcm_mpi_readl(MPI_L2PIOREMAP_REG); + if (io_window) + val |= MPI_L2PREMAP_IS_CARDBUS_MASK; + else + val &= ~MPI_L2PREMAP_IS_CARDBUS_MASK; + bcm_mpi_writel(val, MPI_L2PIOREMAP_REG); + + io_window = new_io_window; +} + +DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, bcm63xx_fixup); +#endif diff --git a/arch/mips/pci/pci-bcm63xx.c b/arch/mips/pci/pci-bcm63xx.c new file mode 100644 index 00000000000..82e0fde1dba --- /dev/null +++ b/arch/mips/pci/pci-bcm63xx.c @@ -0,0 +1,224 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Maxime Bizon + */ + +#include +#include +#include +#include +#include + +#include "pci-bcm63xx.h" + +/* + * Allow PCI to be disabled at runtime depending on board nvram + * configuration + */ +int bcm63xx_pci_enabled; + +static struct resource bcm_pci_mem_resource = { + .name = "bcm63xx PCI memory space", + .start = BCM_PCI_MEM_BASE_PA, + .end = BCM_PCI_MEM_END_PA, + .flags = IORESOURCE_MEM +}; + +static struct resource bcm_pci_io_resource = { + .name = "bcm63xx PCI IO space", + .start = BCM_PCI_IO_BASE_PA, +#ifdef CONFIG_CARDBUS + .end = BCM_PCI_IO_HALF_PA, +#else + .end = BCM_PCI_IO_END_PA, +#endif + .flags = IORESOURCE_IO +}; + +struct pci_controller bcm63xx_controller = { + .pci_ops = &bcm63xx_pci_ops, + .io_resource = &bcm_pci_io_resource, + .mem_resource = &bcm_pci_mem_resource, +}; + +/* + * We handle cardbus via a fake Cardbus bridge, memory and io spaces + * have to be clearly separated from PCI one since we have different + * memory decoder. + */ +#ifdef CONFIG_CARDBUS +static struct resource bcm_cb_mem_resource = { + .name = "bcm63xx Cardbus memory space", + .start = BCM_CB_MEM_BASE_PA, + .end = BCM_CB_MEM_END_PA, + .flags = IORESOURCE_MEM +}; + +static struct resource bcm_cb_io_resource = { + .name = "bcm63xx Cardbus IO space", + .start = BCM_PCI_IO_HALF_PA + 1, + .end = BCM_PCI_IO_END_PA, + .flags = IORESOURCE_IO +}; + +struct pci_controller bcm63xx_cb_controller = { + .pci_ops = &bcm63xx_cb_ops, + .io_resource = &bcm_cb_io_resource, + .mem_resource = &bcm_cb_mem_resource, +}; +#endif + +static u32 bcm63xx_int_cfg_readl(u32 reg) +{ + u32 tmp; + + tmp = reg & MPI_PCICFGCTL_CFGADDR_MASK; + tmp |= MPI_PCICFGCTL_WRITEEN_MASK; + bcm_mpi_writel(tmp, MPI_PCICFGCTL_REG); + iob(); + return bcm_mpi_readl(MPI_PCICFGDATA_REG); +} + +static void bcm63xx_int_cfg_writel(u32 val, u32 reg) +{ + u32 tmp; + + tmp = reg & MPI_PCICFGCTL_CFGADDR_MASK; + tmp |= MPI_PCICFGCTL_WRITEEN_MASK; + bcm_mpi_writel(tmp, MPI_PCICFGCTL_REG); + bcm_mpi_writel(val, MPI_PCICFGDATA_REG); +} + +void __iomem *pci_iospace_start; + +static int __init bcm63xx_pci_init(void) +{ + unsigned int mem_size; + u32 val; + + if (!BCMCPU_IS_6348() && !BCMCPU_IS_6358()) + return -ENODEV; + + if (!bcm63xx_pci_enabled) + return -ENODEV; + + /* + * configuration access are done through IO space, remap 4 + * first bytes to access it from CPU. + * + * this means that no io access from CPU should happen while + * we do a configuration cycle, but there's no way we can add + * a spinlock for each io access, so this is currently kind of + * broken on SMP. + */ + pci_iospace_start = ioremap_nocache(BCM_PCI_IO_BASE_PA, 4); + if (!pci_iospace_start) + return -ENOMEM; + + /* setup local bus to PCI access (PCI memory) */ + val = BCM_PCI_MEM_BASE_PA & MPI_L2P_BASE_MASK; + bcm_mpi_writel(val, MPI_L2PMEMBASE1_REG); + bcm_mpi_writel(~(BCM_PCI_MEM_SIZE - 1), MPI_L2PMEMRANGE1_REG); + bcm_mpi_writel(val | MPI_L2PREMAP_ENABLED_MASK, MPI_L2PMEMREMAP1_REG); + + /* set Cardbus IDSEL (type 0 cfg access on primary bus for + * this IDSEL will be done on Cardbus instead) */ + val = bcm_pcmcia_readl(PCMCIA_C1_REG); + val &= ~PCMCIA_C1_CBIDSEL_MASK; + val |= (CARDBUS_PCI_IDSEL << PCMCIA_C1_CBIDSEL_SHIFT); + bcm_pcmcia_writel(val, PCMCIA_C1_REG); + +#ifdef CONFIG_CARDBUS + /* setup local bus to PCI access (Cardbus memory) */ + val = BCM_CB_MEM_BASE_PA & MPI_L2P_BASE_MASK; + bcm_mpi_writel(val, MPI_L2PMEMBASE2_REG); + bcm_mpi_writel(~(BCM_CB_MEM_SIZE - 1), MPI_L2PMEMRANGE2_REG); + val |= MPI_L2PREMAP_ENABLED_MASK | MPI_L2PREMAP_IS_CARDBUS_MASK; + bcm_mpi_writel(val, MPI_L2PMEMREMAP2_REG); +#else + /* disable second access windows */ + bcm_mpi_writel(0, MPI_L2PMEMREMAP2_REG); +#endif + + /* setup local bus to PCI access (IO memory), we have only 1 + * IO window for both PCI and cardbus, but it cannot handle + * both at the same time, assume standard PCI for now, if + * cardbus card has IO zone, PCI fixup will change window to + * cardbus */ + val = BCM_PCI_IO_BASE_PA & MPI_L2P_BASE_MASK; + bcm_mpi_writel(val, MPI_L2PIOBASE_REG); + bcm_mpi_writel(~(BCM_PCI_IO_SIZE - 1), MPI_L2PIORANGE_REG); + bcm_mpi_writel(val | MPI_L2PREMAP_ENABLED_MASK, MPI_L2PIOREMAP_REG); + + /* enable PCI related GPIO pins */ + bcm_mpi_writel(MPI_LOCBUSCTL_EN_PCI_GPIO_MASK, MPI_LOCBUSCTL_REG); + + /* setup PCI to local bus access, used by PCI device to target + * local RAM while bus mastering */ + bcm63xx_int_cfg_writel(0, PCI_BASE_ADDRESS_3); + if (BCMCPU_IS_6358()) + val = MPI_SP0_REMAP_ENABLE_MASK; + else + val = 0; + bcm_mpi_writel(val, MPI_SP0_REMAP_REG); + + bcm63xx_int_cfg_writel(0x0, PCI_BASE_ADDRESS_4); + bcm_mpi_writel(0, MPI_SP1_REMAP_REG); + + mem_size = bcm63xx_get_memory_size(); + + /* 6348 before rev b0 exposes only 16 MB of RAM memory through + * PCI, throw a warning if we have more memory */ + if (BCMCPU_IS_6348() && (bcm63xx_get_cpu_rev() & 0xf0) == 0xa0) { + if (mem_size > (16 * 1024 * 1024)) + printk(KERN_WARNING "bcm63xx: this CPU " + "revision cannot handle more than 16MB " + "of RAM for PCI bus mastering\n"); + } else { + /* setup sp0 range to local RAM size */ + bcm_mpi_writel(~(mem_size - 1), MPI_SP0_RANGE_REG); + bcm_mpi_writel(0, MPI_SP1_RANGE_REG); + } + + /* change host bridge retry counter to infinite number of + * retry, needed for some broadcom wifi cards with Silicon + * Backplane bus where access to srom seems very slow */ + val = bcm63xx_int_cfg_readl(BCMPCI_REG_TIMERS); + val &= ~REG_TIMER_RETRY_MASK; + bcm63xx_int_cfg_writel(val, BCMPCI_REG_TIMERS); + + /* enable memory decoder and bus mastering */ + val = bcm63xx_int_cfg_readl(PCI_COMMAND); + val |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + bcm63xx_int_cfg_writel(val, PCI_COMMAND); + + /* enable read prefetching & disable byte swapping for bus + * mastering transfers */ + val = bcm_mpi_readl(MPI_PCIMODESEL_REG); + val &= ~MPI_PCIMODESEL_BAR1_NOSWAP_MASK; + val &= ~MPI_PCIMODESEL_BAR2_NOSWAP_MASK; + val &= ~MPI_PCIMODESEL_PREFETCH_MASK; + val |= (8 << MPI_PCIMODESEL_PREFETCH_SHIFT); + bcm_mpi_writel(val, MPI_PCIMODESEL_REG); + + /* enable pci interrupt */ + val = bcm_mpi_readl(MPI_LOCINT_REG); + val |= MPI_LOCINT_MASK(MPI_LOCINT_EXT_PCI_INT); + bcm_mpi_writel(val, MPI_LOCINT_REG); + + register_pci_controller(&bcm63xx_controller); + +#ifdef CONFIG_CARDBUS + register_pci_controller(&bcm63xx_cb_controller); +#endif + + /* mark memory space used for IO mapping as reserved */ + request_mem_region(BCM_PCI_IO_BASE_PA, BCM_PCI_IO_SIZE, + "bcm63xx PCI IO space"); + return 0; +} + +arch_initcall(bcm63xx_pci_init); diff --git a/arch/mips/pci/pci-bcm63xx.h b/arch/mips/pci/pci-bcm63xx.h new file mode 100644 index 00000000000..a6e594ef3d6 --- /dev/null +++ b/arch/mips/pci/pci-bcm63xx.h @@ -0,0 +1,27 @@ +#ifndef PCI_BCM63XX_H_ +#define PCI_BCM63XX_H_ + +#include +#include +#include +#include + +/* + * Cardbus shares the PCI bus, but has no IDSEL, so a special id is + * reserved for it. If you have a standard PCI device at this id, you + * need to change the following definition. + */ +#define CARDBUS_PCI_IDSEL 0x8 + +/* + * defined in ops-bcm63xx.c + */ +extern struct pci_ops bcm63xx_pci_ops; +extern struct pci_ops bcm63xx_cb_ops; + +/* + * defined in pci-bcm63xx.c + */ +extern void __iomem *pci_iospace_start; + +#endif /* ! PCI_BCM63XX_H_ */ -- cgit v1.2.3-70-g09d2 From cdd6c482c9ff9c55475ee7392ec8f672eddb7be6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Sep 2009 12:02:48 +0200 Subject: perf: Do the big rename: Performance Counters -> Performance Events Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian Acked-by: Peter Zijlstra Acked-by: Paul Mackerras Reviewed-by: Arjan van de Ven Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Benjamin Herrenschmidt Cc: David Howells Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: LKML-Reference: Signed-off-by: Ingo Molnar --- arch/arm/include/asm/unistd.h | 2 +- arch/arm/kernel/calls.S | 2 +- arch/blackfin/include/asm/unistd.h | 2 +- arch/blackfin/mach-common/entry.S | 2 +- arch/frv/Kconfig | 2 +- arch/frv/include/asm/perf_counter.h | 17 - arch/frv/include/asm/perf_event.h | 17 + arch/frv/include/asm/unistd.h | 2 +- arch/frv/kernel/entry.S | 2 +- arch/frv/lib/Makefile | 2 +- arch/frv/lib/perf_counter.c | 19 - arch/frv/lib/perf_event.c | 19 + arch/m68k/include/asm/unistd.h | 2 +- arch/m68k/kernel/entry.S | 2 +- arch/m68knommu/kernel/syscalltable.S | 2 +- arch/microblaze/include/asm/unistd.h | 2 +- arch/microblaze/kernel/syscall_table.S | 2 +- arch/mips/include/asm/unistd.h | 6 +- arch/mips/kernel/scall32-o32.S | 2 +- arch/mips/kernel/scall64-64.S | 2 +- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-o32.S | 2 +- arch/mn10300/include/asm/unistd.h | 2 +- arch/mn10300/kernel/entry.S | 2 +- arch/parisc/Kconfig | 2 +- arch/parisc/include/asm/perf_counter.h | 7 - arch/parisc/include/asm/perf_event.h | 7 + arch/parisc/include/asm/unistd.h | 4 +- arch/parisc/kernel/syscall_table.S | 2 +- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/hw_irq.h | 22 +- arch/powerpc/include/asm/paca.h | 2 +- arch/powerpc/include/asm/perf_counter.h | 110 - arch/powerpc/include/asm/perf_event.h | 110 + arch/powerpc/include/asm/systbl.h | 2 +- arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/entry_64.S | 8 +- arch/powerpc/kernel/irq.c | 8 +- arch/powerpc/kernel/mpc7450-pmu.c | 2 +- arch/powerpc/kernel/perf_callchain.c | 2 +- arch/powerpc/kernel/perf_counter.c | 1315 -------- arch/powerpc/kernel/perf_event.c | 1315 ++++++++ arch/powerpc/kernel/power4-pmu.c | 2 +- arch/powerpc/kernel/power5+-pmu.c | 2 +- arch/powerpc/kernel/power5-pmu.c | 2 +- arch/powerpc/kernel/power6-pmu.c | 2 +- arch/powerpc/kernel/power7-pmu.c | 2 +- arch/powerpc/kernel/ppc970-pmu.c | 2 +- arch/powerpc/kernel/time.c | 30 +- arch/powerpc/mm/fault.c | 8 +- arch/powerpc/platforms/Kconfig.cputype | 4 +- arch/s390/Kconfig | 2 +- arch/s390/include/asm/perf_counter.h | 10 - arch/s390/include/asm/perf_event.h | 10 + arch/s390/include/asm/unistd.h | 2 +- arch/s390/kernel/compat_wrapper.S | 8 +- arch/s390/kernel/syscalls.S | 2 +- arch/s390/mm/fault.c | 8 +- arch/sh/Kconfig | 2 +- arch/sh/include/asm/perf_counter.h | 9 - arch/sh/include/asm/perf_event.h | 9 + arch/sh/include/asm/unistd_32.h | 2 +- arch/sh/include/asm/unistd_64.h | 2 +- arch/sh/kernel/syscalls_32.S | 2 +- arch/sh/kernel/syscalls_64.S | 2 +- arch/sh/mm/fault_32.c | 8 +- arch/sh/mm/tlbflush_64.c | 8 +- arch/sparc/Kconfig | 4 +- arch/sparc/include/asm/perf_counter.h | 14 - arch/sparc/include/asm/perf_event.h | 14 + arch/sparc/include/asm/unistd.h | 2 +- arch/sparc/kernel/Makefile | 2 +- arch/sparc/kernel/nmi.c | 4 +- arch/sparc/kernel/pcr.c | 10 +- arch/sparc/kernel/perf_counter.c | 556 ---- arch/sparc/kernel/perf_event.c | 556 ++++ arch/sparc/kernel/systbls_32.S | 2 +- arch/sparc/kernel/systbls_64.S | 4 +- arch/x86/Kconfig | 2 +- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/include/asm/entry_arch.h | 2 +- arch/x86/include/asm/perf_counter.h | 108 - arch/x86/include/asm/perf_event.h | 108 + arch/x86/include/asm/unistd_32.h | 2 +- arch/x86/include/asm/unistd_64.h | 4 +- arch/x86/kernel/apic/apic.c | 6 +- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/common.c | 4 +- arch/x86/kernel/cpu/perf_counter.c | 2298 -------------- arch/x86/kernel/cpu/perf_event.c | 2298 ++++++++++++++ arch/x86/kernel/cpu/perfctr-watchdog.c | 2 +- arch/x86/kernel/entry_64.S | 2 +- arch/x86/kernel/irqinit.c | 2 +- arch/x86/kernel/syscall_table_32.S | 2 +- arch/x86/mm/fault.c | 8 +- arch/x86/oprofile/op_model_ppro.c | 4 +- arch/x86/oprofile/op_x86_model.h | 2 +- drivers/char/sysrq.c | 4 +- fs/exec.c | 6 +- include/asm-generic/unistd.h | 4 +- include/linux/init_task.h | 14 +- include/linux/perf_counter.h | 858 ------ include/linux/perf_event.h | 858 ++++++ include/linux/prctl.h | 4 +- include/linux/sched.h | 12 +- include/linux/syscalls.h | 6 +- include/trace/ftrace.h | 10 +- init/Kconfig | 8 +- kernel/Makefile | 2 +- kernel/exit.c | 8 +- kernel/fork.c | 8 +- kernel/perf_counter.c | 5000 ------------------------------- kernel/perf_event.c | 5000 +++++++++++++++++++++++++++++++ kernel/sched.c | 14 +- kernel/sys.c | 10 +- kernel/sys_ni.c | 2 +- kernel/sysctl.c | 22 +- kernel/timer.c | 4 +- kernel/trace/trace_syscalls.c | 6 +- mm/mmap.c | 6 +- mm/mprotect.c | 4 +- tools/perf/Makefile | 2 +- tools/perf/builtin-annotate.c | 28 +- tools/perf/builtin-record.c | 22 +- tools/perf/builtin-report.c | 48 +- tools/perf/builtin-sched.c | 20 +- tools/perf/builtin-stat.c | 10 +- tools/perf/builtin-timechart.c | 14 +- tools/perf/builtin-top.c | 12 +- tools/perf/builtin-trace.c | 22 +- tools/perf/design.txt | 58 +- tools/perf/perf.h | 12 +- tools/perf/util/event.h | 4 +- tools/perf/util/header.c | 6 +- tools/perf/util/header.h | 8 +- tools/perf/util/parse-events.c | 32 +- tools/perf/util/parse-events.h | 2 +- tools/perf/util/trace-event-info.c | 8 +- tools/perf/util/trace-event.h | 2 +- 141 files changed, 10694 insertions(+), 10694 deletions(-) delete mode 100644 arch/frv/include/asm/perf_counter.h create mode 100644 arch/frv/include/asm/perf_event.h delete mode 100644 arch/frv/lib/perf_counter.c create mode 100644 arch/frv/lib/perf_event.c delete mode 100644 arch/parisc/include/asm/perf_counter.h create mode 100644 arch/parisc/include/asm/perf_event.h delete mode 100644 arch/powerpc/include/asm/perf_counter.h create mode 100644 arch/powerpc/include/asm/perf_event.h delete mode 100644 arch/powerpc/kernel/perf_counter.c create mode 100644 arch/powerpc/kernel/perf_event.c delete mode 100644 arch/s390/include/asm/perf_counter.h create mode 100644 arch/s390/include/asm/perf_event.h delete mode 100644 arch/sh/include/asm/perf_counter.h create mode 100644 arch/sh/include/asm/perf_event.h delete mode 100644 arch/sparc/include/asm/perf_counter.h create mode 100644 arch/sparc/include/asm/perf_event.h delete mode 100644 arch/sparc/kernel/perf_counter.c create mode 100644 arch/sparc/kernel/perf_event.c delete mode 100644 arch/x86/include/asm/perf_counter.h create mode 100644 arch/x86/include/asm/perf_event.h delete mode 100644 arch/x86/kernel/cpu/perf_counter.c create mode 100644 arch/x86/kernel/cpu/perf_event.c delete mode 100644 include/linux/perf_counter.h create mode 100644 include/linux/perf_event.h delete mode 100644 kernel/perf_counter.c create mode 100644 kernel/perf_event.c (limited to 'arch/mips/include/asm') diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 9122c9ee18f..89f7eade20a 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -390,7 +390,7 @@ #define __NR_preadv (__NR_SYSCALL_BASE+361) #define __NR_pwritev (__NR_SYSCALL_BASE+362) #define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363) -#define __NR_perf_counter_open (__NR_SYSCALL_BASE+364) +#define __NR_perf_event_open (__NR_SYSCALL_BASE+364) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index ecfa98954d1..fafce1b5c69 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -373,7 +373,7 @@ CALL(sys_preadv) CALL(sys_pwritev) CALL(sys_rt_tgsigqueueinfo) - CALL(sys_perf_counter_open) + CALL(sys_perf_event_open) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index c8e7ee4768c..02b1529dad5 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -381,7 +381,7 @@ #define __NR_preadv 366 #define __NR_pwritev 367 #define __NR_rt_tgsigqueueinfo 368 -#define __NR_perf_counter_open 369 +#define __NR_perf_event_open 369 #define __NR_syscall 370 #define NR_syscalls __NR_syscall diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 01af24cde36..1e7cac23e25 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1620,7 +1620,7 @@ ENTRY(_sys_call_table) .long _sys_preadv .long _sys_pwritev .long _sys_rt_tgsigqueueinfo - .long _sys_perf_counter_open + .long _sys_perf_event_open .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index b86e19c9b5b..4b5830bcbe2 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -7,7 +7,7 @@ config FRV default y select HAVE_IDE select HAVE_ARCH_TRACEHOOK - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config ZONE_DMA bool diff --git a/arch/frv/include/asm/perf_counter.h b/arch/frv/include/asm/perf_counter.h deleted file mode 100644 index ccf726e61b2..00000000000 --- a/arch/frv/include/asm/perf_counter.h +++ /dev/null @@ -1,17 +0,0 @@ -/* FRV performance counter support - * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#ifndef _ASM_PERF_COUNTER_H -#define _ASM_PERF_COUNTER_H - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#endif /* _ASM_PERF_COUNTER_H */ diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h new file mode 100644 index 00000000000..a69e0155d14 --- /dev/null +++ b/arch/frv/include/asm/perf_event.h @@ -0,0 +1,17 @@ +/* FRV performance event support + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _ASM_PERF_EVENT_H +#define _ASM_PERF_EVENT_H + +#define PERF_EVENT_INDEX_OFFSET 0 + +#endif /* _ASM_PERF_EVENT_H */ diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h index 4a8fb427ce0..be6ef0f5cd4 100644 --- a/arch/frv/include/asm/unistd.h +++ b/arch/frv/include/asm/unistd.h @@ -342,7 +342,7 @@ #define __NR_preadv 333 #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 -#define __NR_perf_counter_open 336 +#define __NR_perf_event_open 336 #ifdef __KERNEL__ diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index fde1e446b44..189397ec012 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -1525,6 +1525,6 @@ sys_call_table: .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ - .long sys_perf_counter_open + .long sys_perf_event_open syscall_table_size = (. - sys_call_table) diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile index 0a377210c89..f4709756d0d 100644 --- a/arch/frv/lib/Makefile +++ b/arch/frv/lib/Makefile @@ -5,4 +5,4 @@ lib-y := \ __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \ - outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_counter.o + outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o diff --git a/arch/frv/lib/perf_counter.c b/arch/frv/lib/perf_counter.c deleted file mode 100644 index 2000feecd57..00000000000 --- a/arch/frv/lib/perf_counter.c +++ /dev/null @@ -1,19 +0,0 @@ -/* Performance counter handling - * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include - -/* - * mark the performance counter as pending - */ -void set_perf_counter_pending(void) -{ -} diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c new file mode 100644 index 00000000000..9ac5acfd2e9 --- /dev/null +++ b/arch/frv/lib/perf_event.c @@ -0,0 +1,19 @@ +/* Performance event handling + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include + +/* + * mark the performance event as pending + */ +void set_perf_event_pending(void) +{ +} diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 946d8691f2b..48b87f5ced5 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -335,7 +335,7 @@ #define __NR_preadv 329 #define __NR_pwritev 330 #define __NR_rt_tgsigqueueinfo 331 -#define __NR_perf_counter_open 332 +#define __NR_perf_event_open 332 #ifdef __KERNEL__ diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 922f52e7ed1..c5b33634c98 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -756,5 +756,5 @@ sys_call_table: .long sys_preadv .long sys_pwritev /* 330 */ .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S index 0ae123e0898..23535cc415a 100644 --- a/arch/m68knommu/kernel/syscalltable.S +++ b/arch/m68knommu/kernel/syscalltable.S @@ -350,7 +350,7 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev /* 330 */ .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index 0b852327c0e..cb05a07e55e 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -381,7 +381,7 @@ #define __NR_preadv 363 /* new */ #define __NR_pwritev 364 /* new */ #define __NR_rt_tgsigqueueinfo 365 /* new */ -#define __NR_perf_counter_open 366 /* new */ +#define __NR_perf_event_open 366 /* new */ #define __NR_syscalls 367 diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index 457216097df..ecec1915513 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -370,4 +370,4 @@ ENTRY(sys_call_table) .long sys_ni_syscall .long sys_ni_syscall .long sys_rt_tgsigqueueinfo /* 365 */ - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index e753a777949..8c9dfa9e901 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -353,7 +353,7 @@ #define __NR_preadv (__NR_Linux + 330) #define __NR_pwritev (__NR_Linux + 331) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 332) -#define __NR_perf_counter_open (__NR_Linux + 333) +#define __NR_perf_event_open (__NR_Linux + 333) #define __NR_accept4 (__NR_Linux + 334) /* @@ -664,7 +664,7 @@ #define __NR_preadv (__NR_Linux + 289) #define __NR_pwritev (__NR_Linux + 290) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 291) -#define __NR_perf_counter_open (__NR_Linux + 292) +#define __NR_perf_event_open (__NR_Linux + 292) #define __NR_accept4 (__NR_Linux + 293) /* @@ -979,7 +979,7 @@ #define __NR_preadv (__NR_Linux + 293) #define __NR_pwritev (__NR_Linux + 294) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 295) -#define __NR_perf_counter_open (__NR_Linux + 296) +#define __NR_perf_event_open (__NR_Linux + 296) #define __NR_accept4 (__NR_Linux + 297) /* diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 7c2de4f091c..fd2a9bb620d 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -581,7 +581,7 @@ einval: li v0, -ENOSYS sys sys_preadv 6 /* 4330 */ sys sys_pwritev 6 sys sys_rt_tgsigqueueinfo 4 - sys sys_perf_counter_open 5 + sys sys_perf_event_open 5 sys sys_accept4 4 .endm diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index b97b993846d..18bf7f32c5e 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -418,6 +418,6 @@ sys_call_table: PTR sys_preadv PTR sys_pwritev /* 5390 */ PTR sys_rt_tgsigqueueinfo - PTR sys_perf_counter_open + PTR sys_perf_event_open PTR sys_accept4 .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 1a6ae124635..6ebc0797669 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -416,6 +416,6 @@ EXPORT(sysn32_call_table) PTR sys_preadv PTR sys_pwritev PTR compat_sys_rt_tgsigqueueinfo /* 5295 */ - PTR sys_perf_counter_open + PTR sys_perf_event_open PTR sys_accept4 .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index cd31087a651..9bbf9775e0b 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -536,6 +536,6 @@ sys_call_table: PTR compat_sys_preadv /* 4330 */ PTR compat_sys_pwritev PTR compat_sys_rt_tgsigqueueinfo - PTR sys_perf_counter_open + PTR sys_perf_event_open PTR sys_accept4 .size sys_call_table,.-sys_call_table diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h index fad68616af3..2a983931c11 100644 --- a/arch/mn10300/include/asm/unistd.h +++ b/arch/mn10300/include/asm/unistd.h @@ -347,7 +347,7 @@ #define __NR_preadv 334 #define __NR_pwritev 335 #define __NR_rt_tgsigqueueinfo 336 -#define __NR_perf_counter_open 337 +#define __NR_perf_event_open 337 #ifdef __KERNEL__ diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index e0d2563af4f..a94e7ea3faa 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -723,7 +723,7 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev /* 335 */ .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open nr_syscalls=(.-sys_call_table)/4 diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 06f8d5b5b0f..f388dc68f60 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -16,7 +16,7 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/include/asm/perf_counter.h b/arch/parisc/include/asm/perf_counter.h deleted file mode 100644 index dc9e829f701..00000000000 --- a/arch/parisc/include/asm/perf_counter.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __ASM_PARISC_PERF_COUNTER_H -#define __ASM_PARISC_PERF_COUNTER_H - -/* parisc only supports software counters through this interface. */ -static inline void set_perf_counter_pending(void) { } - -#endif /* __ASM_PARISC_PERF_COUNTER_H */ diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h new file mode 100644 index 00000000000..cc146427d8f --- /dev/null +++ b/arch/parisc/include/asm/perf_event.h @@ -0,0 +1,7 @@ +#ifndef __ASM_PARISC_PERF_EVENT_H +#define __ASM_PARISC_PERF_EVENT_H + +/* parisc only supports software events through this interface. */ +static inline void set_perf_event_pending(void) { } + +#endif /* __ASM_PARISC_PERF_EVENT_H */ diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index f3d3b8b012c..cda158318c6 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -810,9 +810,9 @@ #define __NR_preadv (__NR_Linux + 315) #define __NR_pwritev (__NR_Linux + 316) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 317) -#define __NR_perf_counter_open (__NR_Linux + 318) +#define __NR_perf_event_open (__NR_Linux + 318) -#define __NR_Linux_syscalls (__NR_perf_counter_open + 1) +#define __NR_Linux_syscalls (__NR_perf_event_open + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index cf145eb026b..843f423dec6 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -416,7 +416,7 @@ ENTRY_COMP(preadv) /* 315 */ ENTRY_COMP(pwritev) ENTRY_COMP(rt_tgsigqueueinfo) - ENTRY_SAME(perf_counter_open) + ENTRY_SAME(perf_event_open) /* Nothing yet */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8250902265c..4fd479059d6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -129,7 +129,7 @@ config PPC select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config EARLY_PRINTK bool diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index e73d554538d..abbc2aaaced 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -135,43 +135,43 @@ static inline int irqs_disabled_flags(unsigned long flags) */ struct irq_chip; -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PPC64 -static inline unsigned long test_perf_counter_pending(void) +static inline unsigned long test_perf_event_pending(void) { unsigned long x; asm volatile("lbz %0,%1(13)" : "=r" (x) - : "i" (offsetof(struct paca_struct, perf_counter_pending))); + : "i" (offsetof(struct paca_struct, perf_event_pending))); return x; } -static inline void set_perf_counter_pending(void) +static inline void set_perf_event_pending(void) { asm volatile("stb %0,%1(13)" : : "r" (1), - "i" (offsetof(struct paca_struct, perf_counter_pending))); + "i" (offsetof(struct paca_struct, perf_event_pending))); } -static inline void clear_perf_counter_pending(void) +static inline void clear_perf_event_pending(void) { asm volatile("stb %0,%1(13)" : : "r" (0), - "i" (offsetof(struct paca_struct, perf_counter_pending))); + "i" (offsetof(struct paca_struct, perf_event_pending))); } #endif /* CONFIG_PPC64 */ -#else /* CONFIG_PERF_COUNTERS */ +#else /* CONFIG_PERF_EVENTS */ -static inline unsigned long test_perf_counter_pending(void) +static inline unsigned long test_perf_event_pending(void) { return 0; } -static inline void clear_perf_counter_pending(void) {} -#endif /* CONFIG_PERF_COUNTERS */ +static inline void clear_perf_event_pending(void) {} +#endif /* CONFIG_PERF_EVENTS */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index b634456ea89..154f405b642 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -122,7 +122,7 @@ struct paca_struct { u8 soft_enabled; /* irq soft-enable flag */ u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 io_sync; /* writel() needs spin_unlock sync */ - u8 perf_counter_pending; /* PM interrupt while soft-disabled */ + u8 perf_event_pending; /* PM interrupt while soft-disabled */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h deleted file mode 100644 index 0ea0639fcf7..00000000000 --- a/arch/powerpc/include/asm/perf_counter.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Performance counter support - PowerPC-specific definitions. - * - * Copyright 2008-2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include - -#include - -#define MAX_HWCOUNTERS 8 -#define MAX_EVENT_ALTERNATIVES 8 -#define MAX_LIMITED_HWCOUNTERS 2 - -/* - * This struct provides the constants and functions needed to - * describe the PMU on a particular POWER-family CPU. - */ -struct power_pmu { - const char *name; - int n_counter; - int max_alternatives; - unsigned long add_fields; - unsigned long test_adder; - int (*compute_mmcr)(u64 events[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]); - int (*get_constraint)(u64 event, unsigned long *mskp, - unsigned long *valp); - int (*get_alternatives)(u64 event, unsigned int flags, - u64 alt[]); - void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); - int (*limited_pmc_event)(u64 event); - u32 flags; - int n_generic; - int *generic_events; - int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; -}; - -/* - * Values for power_pmu.flags - */ -#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */ -#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ - -/* - * Values for flags to get_alternatives() - */ -#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ -#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ -#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ - -extern int register_power_pmu(struct power_pmu *); - -struct pt_regs; -extern unsigned long perf_misc_flags(struct pt_regs *regs); -extern unsigned long perf_instruction_pointer(struct pt_regs *regs); - -#define PERF_COUNTER_INDEX_OFFSET 1 - -/* - * Only override the default definitions in include/linux/perf_counter.h - * if we have hardware PMU support. - */ -#ifdef CONFIG_PPC_PERF_CTRS -#define perf_misc_flags(regs) perf_misc_flags(regs) -#endif - -/* - * The power_pmu.get_constraint function returns a 32/64-bit value and - * a 32/64-bit mask that express the constraints between this event and - * other events. - * - * The value and mask are divided up into (non-overlapping) bitfields - * of three different types: - * - * Select field: this expresses the constraint that some set of bits - * in MMCR* needs to be set to a specific value for this event. For a - * select field, the mask contains 1s in every bit of the field, and - * the value contains a unique value for each possible setting of the - * MMCR* bits. The constraint checking code will ensure that two events - * that set the same field in their masks have the same value in their - * value dwords. - * - * Add field: this expresses the constraint that there can be at most - * N events in a particular class. A field of k bits can be used for - * N <= 2^(k-1) - 1. The mask has the most significant bit of the field - * set (and the other bits 0), and the value has only the least significant - * bit of the field set. In addition, the 'add_fields' and 'test_adder' - * in the struct power_pmu for this processor come into play. The - * add_fields value contains 1 in the LSB of the field, and the - * test_adder contains 2^(k-1) - 1 - N in the field. - * - * NAND field: this expresses the constraint that you may not have events - * in all of a set of classes. (For example, on PPC970, you can't select - * events from the FPU, ISU and IDU simultaneously, although any two are - * possible.) For N classes, the field is N+1 bits wide, and each class - * is assigned one bit from the least-significant N bits. The mask has - * only the most-significant bit set, and the value has only the bit - * for the event's class set. The test_adder has the least significant - * bit set in the field. - * - * If an event is not subject to the constraint expressed by a particular - * field, then it will have 0 in both the mask and value for that field. - */ diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h new file mode 100644 index 00000000000..2499aaadaeb --- /dev/null +++ b/arch/powerpc/include/asm/perf_event.h @@ -0,0 +1,110 @@ +/* + * Performance event support - PowerPC-specific definitions. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +#include + +#define MAX_HWEVENTS 8 +#define MAX_EVENT_ALTERNATIVES 8 +#define MAX_LIMITED_HWEVENTS 2 + +/* + * This struct provides the constants and functions needed to + * describe the PMU on a particular POWER-family CPU. + */ +struct power_pmu { + const char *name; + int n_event; + int max_alternatives; + unsigned long add_fields; + unsigned long test_adder; + int (*compute_mmcr)(u64 events[], int n_ev, + unsigned int hwc[], unsigned long mmcr[]); + int (*get_constraint)(u64 event_id, unsigned long *mskp, + unsigned long *valp); + int (*get_alternatives)(u64 event_id, unsigned int flags, + u64 alt[]); + void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); + int (*limited_pmc_event)(u64 event_id); + u32 flags; + int n_generic; + int *generic_events; + int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; +}; + +/* + * Values for power_pmu.flags + */ +#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */ +#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ + +/* + * Values for flags to get_alternatives() + */ +#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ +#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ +#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ + +extern int register_power_pmu(struct power_pmu *); + +struct pt_regs; +extern unsigned long perf_misc_flags(struct pt_regs *regs); +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); + +#define PERF_EVENT_INDEX_OFFSET 1 + +/* + * Only override the default definitions in include/linux/perf_event.h + * if we have hardware PMU support. + */ +#ifdef CONFIG_PPC_PERF_CTRS +#define perf_misc_flags(regs) perf_misc_flags(regs) +#endif + +/* + * The power_pmu.get_constraint function returns a 32/64-bit value and + * a 32/64-bit mask that express the constraints between this event_id and + * other events. + * + * The value and mask are divided up into (non-overlapping) bitfields + * of three different types: + * + * Select field: this expresses the constraint that some set of bits + * in MMCR* needs to be set to a specific value for this event_id. For a + * select field, the mask contains 1s in every bit of the field, and + * the value contains a unique value for each possible setting of the + * MMCR* bits. The constraint checking code will ensure that two events + * that set the same field in their masks have the same value in their + * value dwords. + * + * Add field: this expresses the constraint that there can be at most + * N events in a particular class. A field of k bits can be used for + * N <= 2^(k-1) - 1. The mask has the most significant bit of the field + * set (and the other bits 0), and the value has only the least significant + * bit of the field set. In addition, the 'add_fields' and 'test_adder' + * in the struct power_pmu for this processor come into play. The + * add_fields value contains 1 in the LSB of the field, and the + * test_adder contains 2^(k-1) - 1 - N in the field. + * + * NAND field: this expresses the constraint that you may not have events + * in all of a set of classes. (For example, on PPC970, you can't select + * events from the FPU, ISU and IDU simultaneously, although any two are + * possible.) For N classes, the field is N+1 bits wide, and each class + * is assigned one bit from the least-significant N bits. The mask has + * only the most-significant bit set, and the value has only the bit + * for the event_id's class set. The test_adder has the least significant + * bit set in the field. + * + * If an event_id is not subject to the constraint expressed by a particular + * field, then it will have 0 in both the mask and value for that field. + */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index ed24bd92fe4..c7d671a7d9a 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -322,7 +322,7 @@ SYSCALL_SPU(epoll_create1) SYSCALL_SPU(dup3) SYSCALL_SPU(pipe2) SYSCALL(inotify_init1) -SYSCALL_SPU(perf_counter_open) +SYSCALL_SPU(perf_event_open) COMPAT_SYS_SPU(preadv) COMPAT_SYS_SPU(pwritev) COMPAT_SYS(rt_tgsigqueueinfo) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index cef080bfc60..f6ca7617676 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -341,7 +341,7 @@ #define __NR_dup3 316 #define __NR_pipe2 317 #define __NR_inotify_init1 318 -#define __NR_perf_counter_open 319 +#define __NR_perf_event_open 319 #define __NR_preadv 320 #define __NR_pwritev 321 #define __NR_rt_tgsigqueueinfo 322 diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 569f79ccd31..b23664a0b86 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o perf_callchain.o +obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o perf_callchain.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f0df285f0f8..0812b0f414b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -133,7 +133,7 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); - DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending)); + DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 66bcda34a6b..900e0eea009 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -556,14 +556,14 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); -#ifdef CONFIG_PERF_COUNTERS - /* check paca->perf_counter_pending if we're enabling ints */ +#ifdef CONFIG_PERF_EVENTS + /* check paca->perf_event_pending if we're enabling ints */ lbz r3,PACAPERFPEND(r13) and. r3,r3,r5 beq 27f - bl .perf_counter_do_pending + bl .perf_event_do_pending 27: -#endif /* CONFIG_PERF_COUNTERS */ +#endif /* CONFIG_PERF_EVENTS */ /* extract EE bit and use it to restore paca->hard_enabled */ ld r3,_MSR(r1) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f7f376ea7b1..e5d12117798 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include @@ -138,9 +138,9 @@ notrace void raw_local_irq_restore(unsigned long en) } #endif /* CONFIG_PPC_STD_MMU_64 */ - if (test_perf_counter_pending()) { - clear_perf_counter_pending(); - perf_counter_do_pending(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); } /* diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c index cc466d039af..09d72028f31 100644 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index f74b62c6751..0a03cf70d24 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -10,7 +10,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c deleted file mode 100644 index 5ccf9bca96c..00000000000 --- a/arch/powerpc/kernel/perf_counter.c +++ /dev/null @@ -1,1315 +0,0 @@ -/* - * Performance counter support - powerpc architecture code - * - * Copyright 2008-2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct cpu_hw_counters { - int n_counters; - int n_percpu; - int disabled; - int n_added; - int n_limited; - u8 pmcs_enabled; - struct perf_counter *counter[MAX_HWCOUNTERS]; - u64 events[MAX_HWCOUNTERS]; - unsigned int flags[MAX_HWCOUNTERS]; - unsigned long mmcr[3]; - struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; - u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; - u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; -}; -DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); - -struct power_pmu *ppmu; - -/* - * Normally, to ignore kernel events we set the FCS (freeze counters - * in supervisor mode) bit in MMCR0, but if the kernel runs with the - * hypervisor bit set in the MSR, or if we are running on a processor - * where the hypervisor bit is forced to 1 (as on Apple G5 processors), - * then we need to use the FCHV bit to ignore kernel events. - */ -static unsigned int freeze_counters_kernel = MMCR0_FCS; - -/* - * 32-bit doesn't have MMCRA but does have an MMCR2, - * and a few other names are different. - */ -#ifdef CONFIG_PPC32 - -#define MMCR0_FCHV 0 -#define MMCR0_PMCjCE MMCR0_PMCnCE - -#define SPRN_MMCRA SPRN_MMCR2 -#define MMCRA_SAMPLE_ENABLE 0 - -static inline unsigned long perf_ip_adjust(struct pt_regs *regs) -{ - return 0; -} -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } -static inline u32 perf_get_misc_flags(struct pt_regs *regs) -{ - return 0; -} -static inline void perf_read_regs(struct pt_regs *regs) { } -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return 0; -} - -#endif /* CONFIG_PPC32 */ - -/* - * Things that are specific to 64-bit implementations. - */ -#ifdef CONFIG_PPC64 - -static inline unsigned long perf_ip_adjust(struct pt_regs *regs) -{ - unsigned long mmcra = regs->dsisr; - - if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { - unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; - if (slot > 1) - return 4 * (slot - 1); - } - return 0; -} - -/* - * The user wants a data address recorded. - * If we're not doing instruction sampling, give them the SDAR - * (sampled data address). If we are doing instruction sampling, then - * only give them the SDAR if it corresponds to the instruction - * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC - * bit in MMCRA. - */ -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) -{ - unsigned long mmcra = regs->dsisr; - unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? - POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; - - if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) - *addrp = mfspr(SPRN_SDAR); -} - -static inline u32 perf_get_misc_flags(struct pt_regs *regs) -{ - unsigned long mmcra = regs->dsisr; - - if (TRAP(regs) != 0xf00) - return 0; /* not a PMU interrupt */ - - if (ppmu->flags & PPMU_ALT_SIPR) { - if (mmcra & POWER6_MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & POWER6_MMCRA_SIPR) ? - PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; - } - if (mmcra & MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; -} - -/* - * Overload regs->dsisr to store MMCRA so we only need to read it once - * on each interrupt. - */ -static inline void perf_read_regs(struct pt_regs *regs) -{ - regs->dsisr = mfspr(SPRN_MMCRA); -} - -/* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return !regs->softe; -} - -#endif /* CONFIG_PPC64 */ - -static void perf_counter_interrupt(struct pt_regs *regs); - -void perf_counter_print_debug(void) -{ -} - -/* - * Read one performance monitor counter (PMC). - */ -static unsigned long read_pmc(int idx) -{ - unsigned long val; - - switch (idx) { - case 1: - val = mfspr(SPRN_PMC1); - break; - case 2: - val = mfspr(SPRN_PMC2); - break; - case 3: - val = mfspr(SPRN_PMC3); - break; - case 4: - val = mfspr(SPRN_PMC4); - break; - case 5: - val = mfspr(SPRN_PMC5); - break; - case 6: - val = mfspr(SPRN_PMC6); - break; -#ifdef CONFIG_PPC64 - case 7: - val = mfspr(SPRN_PMC7); - break; - case 8: - val = mfspr(SPRN_PMC8); - break; -#endif /* CONFIG_PPC64 */ - default: - printk(KERN_ERR "oops trying to read PMC%d\n", idx); - val = 0; - } - return val; -} - -/* - * Write one PMC. - */ -static void write_pmc(int idx, unsigned long val) -{ - switch (idx) { - case 1: - mtspr(SPRN_PMC1, val); - break; - case 2: - mtspr(SPRN_PMC2, val); - break; - case 3: - mtspr(SPRN_PMC3, val); - break; - case 4: - mtspr(SPRN_PMC4, val); - break; - case 5: - mtspr(SPRN_PMC5, val); - break; - case 6: - mtspr(SPRN_PMC6, val); - break; -#ifdef CONFIG_PPC64 - case 7: - mtspr(SPRN_PMC7, val); - break; - case 8: - mtspr(SPRN_PMC8, val); - break; -#endif /* CONFIG_PPC64 */ - default: - printk(KERN_ERR "oops trying to write PMC%d\n", idx); - } -} - -/* - * Check if a set of events can all go on the PMU at once. - * If they can't, this will look at alternative codes for the events - * and see if any combination of alternative codes is feasible. - * The feasible set is returned in event[]. - */ -static int power_check_constraints(struct cpu_hw_counters *cpuhw, - u64 event[], unsigned int cflags[], - int n_ev) -{ - unsigned long mask, value, nv; - unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; - int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; - int i, j; - unsigned long addf = ppmu->add_fields; - unsigned long tadd = ppmu->test_adder; - - if (n_ev > ppmu->n_counter) - return -1; - - /* First see if the events will go on as-is */ - for (i = 0; i < n_ev; ++i) { - if ((cflags[i] & PPMU_LIMITED_PMC_REQD) - && !ppmu->limited_pmc_event(event[i])) { - ppmu->get_alternatives(event[i], cflags[i], - cpuhw->alternatives[i]); - event[i] = cpuhw->alternatives[i][0]; - } - if (ppmu->get_constraint(event[i], &cpuhw->amasks[i][0], - &cpuhw->avalues[i][0])) - return -1; - } - value = mask = 0; - for (i = 0; i < n_ev; ++i) { - nv = (value | cpuhw->avalues[i][0]) + - (value & cpuhw->avalues[i][0] & addf); - if ((((nv + tadd) ^ value) & mask) != 0 || - (((nv + tadd) ^ cpuhw->avalues[i][0]) & - cpuhw->amasks[i][0]) != 0) - break; - value = nv; - mask |= cpuhw->amasks[i][0]; - } - if (i == n_ev) - return 0; /* all OK */ - - /* doesn't work, gather alternatives... */ - if (!ppmu->get_alternatives) - return -1; - for (i = 0; i < n_ev; ++i) { - choice[i] = 0; - n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], - cpuhw->alternatives[i]); - for (j = 1; j < n_alt[i]; ++j) - ppmu->get_constraint(cpuhw->alternatives[i][j], - &cpuhw->amasks[i][j], - &cpuhw->avalues[i][j]); - } - - /* enumerate all possibilities and see if any will work */ - i = 0; - j = -1; - value = mask = nv = 0; - while (i < n_ev) { - if (j >= 0) { - /* we're backtracking, restore context */ - value = svalues[i]; - mask = smasks[i]; - j = choice[i]; - } - /* - * See if any alternative k for event i, - * where k > j, will satisfy the constraints. - */ - while (++j < n_alt[i]) { - nv = (value | cpuhw->avalues[i][j]) + - (value & cpuhw->avalues[i][j] & addf); - if ((((nv + tadd) ^ value) & mask) == 0 && - (((nv + tadd) ^ cpuhw->avalues[i][j]) - & cpuhw->amasks[i][j]) == 0) - break; - } - if (j >= n_alt[i]) { - /* - * No feasible alternative, backtrack - * to event i-1 and continue enumerating its - * alternatives from where we got up to. - */ - if (--i < 0) - return -1; - } else { - /* - * Found a feasible alternative for event i, - * remember where we got up to with this event, - * go on to the next event, and start with - * the first alternative for it. - */ - choice[i] = j; - svalues[i] = value; - smasks[i] = mask; - value = nv; - mask |= cpuhw->amasks[i][j]; - ++i; - j = -1; - } - } - - /* OK, we have a feasible combination, tell the caller the solution */ - for (i = 0; i < n_ev; ++i) - event[i] = cpuhw->alternatives[i][choice[i]]; - return 0; -} - -/* - * Check if newly-added counters have consistent settings for - * exclude_{user,kernel,hv} with each other and any previously - * added counters. - */ -static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], - int n_prev, int n_new) -{ - int eu = 0, ek = 0, eh = 0; - int i, n, first; - struct perf_counter *counter; - - n = n_prev + n_new; - if (n <= 1) - return 0; - - first = 1; - for (i = 0; i < n; ++i) { - if (cflags[i] & PPMU_LIMITED_PMC_OK) { - cflags[i] &= ~PPMU_LIMITED_PMC_REQD; - continue; - } - counter = ctrs[i]; - if (first) { - eu = counter->attr.exclude_user; - ek = counter->attr.exclude_kernel; - eh = counter->attr.exclude_hv; - first = 0; - } else if (counter->attr.exclude_user != eu || - counter->attr.exclude_kernel != ek || - counter->attr.exclude_hv != eh) { - return -EAGAIN; - } - } - - if (eu || ek || eh) - for (i = 0; i < n; ++i) - if (cflags[i] & PPMU_LIMITED_PMC_OK) - cflags[i] |= PPMU_LIMITED_PMC_REQD; - - return 0; -} - -static void power_pmu_read(struct perf_counter *counter) -{ - s64 val, delta, prev; - - if (!counter->hw.idx) - return; - /* - * Performance monitor interrupts come even when interrupts - * are soft-disabled, as long as interrupts are hard-enabled. - * Therefore we treat them like NMIs. - */ - do { - prev = atomic64_read(&counter->hw.prev_count); - barrier(); - val = read_pmc(counter->hw.idx); - } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev); - - /* The counters are only 32 bits wide */ - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &counter->hw.period_left); -} - -/* - * On some machines, PMC5 and PMC6 can't be written, don't respect - * the freeze conditions, and don't generate interrupts. This tells - * us if `counter' is using such a PMC. - */ -static int is_limited_pmc(int pmcnum) -{ - return (ppmu->flags & PPMU_LIMITED_PMC5_6) - && (pmcnum == 5 || pmcnum == 6); -} - -static void freeze_limited_counters(struct cpu_hw_counters *cpuhw, - unsigned long pmc5, unsigned long pmc6) -{ - struct perf_counter *counter; - u64 val, prev, delta; - int i; - - for (i = 0; i < cpuhw->n_limited; ++i) { - counter = cpuhw->limited_counter[i]; - if (!counter->hw.idx) - continue; - val = (counter->hw.idx == 5) ? pmc5 : pmc6; - prev = atomic64_read(&counter->hw.prev_count); - counter->hw.idx = 0; - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - } -} - -static void thaw_limited_counters(struct cpu_hw_counters *cpuhw, - unsigned long pmc5, unsigned long pmc6) -{ - struct perf_counter *counter; - u64 val; - int i; - - for (i = 0; i < cpuhw->n_limited; ++i) { - counter = cpuhw->limited_counter[i]; - counter->hw.idx = cpuhw->limited_hwidx[i]; - val = (counter->hw.idx == 5) ? pmc5 : pmc6; - atomic64_set(&counter->hw.prev_count, val); - perf_counter_update_userpage(counter); - } -} - -/* - * Since limited counters don't respect the freeze conditions, we - * have to read them immediately after freezing or unfreezing the - * other counters. We try to keep the values from the limited - * counters as consistent as possible by keeping the delay (in - * cycles and instructions) between freezing/unfreezing and reading - * the limited counters as small and consistent as possible. - * Therefore, if any limited counters are in use, we read them - * both, and always in the same order, to minimize variability, - * and do it inside the same asm that writes MMCR0. - */ -static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) -{ - unsigned long pmc5, pmc6; - - if (!cpuhw->n_limited) { - mtspr(SPRN_MMCR0, mmcr0); - return; - } - - /* - * Write MMCR0, then read PMC5 and PMC6 immediately. - * To ensure we don't get a performance monitor interrupt - * between writing MMCR0 and freezing/thawing the limited - * counters, we first write MMCR0 with the counter overflow - * interrupt enable bits turned off. - */ - asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" - : "=&r" (pmc5), "=&r" (pmc6) - : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), - "i" (SPRN_MMCR0), - "i" (SPRN_PMC5), "i" (SPRN_PMC6)); - - if (mmcr0 & MMCR0_FC) - freeze_limited_counters(cpuhw, pmc5, pmc6); - else - thaw_limited_counters(cpuhw, pmc5, pmc6); - - /* - * Write the full MMCR0 including the counter overflow interrupt - * enable bits, if necessary. - */ - if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) - mtspr(SPRN_MMCR0, mmcr0); -} - -/* - * Disable all counters to prevent PMU interrupts and to allow - * counters to be added or removed. - */ -void hw_perf_disable(void) -{ - struct cpu_hw_counters *cpuhw; - unsigned long flags; - - if (!ppmu) - return; - local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_counters); - - if (!cpuhw->disabled) { - cpuhw->disabled = 1; - cpuhw->n_added = 0; - - /* - * Check if we ever enabled the PMU on this cpu. - */ - if (!cpuhw->pmcs_enabled) { - ppc_enable_pmcs(); - cpuhw->pmcs_enabled = 1; - } - - /* - * Disable instruction sampling if it was enabled - */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mtspr(SPRN_MMCRA, - cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mb(); - } - - /* - * Set the 'freeze counters' bit. - * The barrier is to make sure the mtspr has been - * executed and the PMU has frozen the counters - * before we return. - */ - write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); - mb(); - } - local_irq_restore(flags); -} - -/* - * Re-enable all counters if disable == 0. - * If we were previously disabled and counters were added, then - * put the new config on the PMU. - */ -void hw_perf_enable(void) -{ - struct perf_counter *counter; - struct cpu_hw_counters *cpuhw; - unsigned long flags; - long i; - unsigned long val; - s64 left; - unsigned int hwc_index[MAX_HWCOUNTERS]; - int n_lim; - int idx; - - if (!ppmu) - return; - local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_counters); - if (!cpuhw->disabled) { - local_irq_restore(flags); - return; - } - cpuhw->disabled = 0; - - /* - * If we didn't change anything, or only removed counters, - * no need to recalculate MMCR* settings and reset the PMCs. - * Just reenable the PMU with the current MMCR* settings - * (possibly updated for removal of counters). - */ - if (!cpuhw->n_added) { - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - if (cpuhw->n_counters == 0) - ppc_set_pmu_inuse(0); - goto out_enable; - } - - /* - * Compute MMCR* values for the new set of counters - */ - if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index, - cpuhw->mmcr)) { - /* shouldn't ever get here */ - printk(KERN_ERR "oops compute_mmcr failed\n"); - goto out; - } - - /* - * Add in MMCR0 freeze bits corresponding to the - * attr.exclude_* bits for the first counter. - * We have already checked that all counters have the - * same values for these bits as the first counter. - */ - counter = cpuhw->counter[0]; - if (counter->attr.exclude_user) - cpuhw->mmcr[0] |= MMCR0_FCP; - if (counter->attr.exclude_kernel) - cpuhw->mmcr[0] |= freeze_counters_kernel; - if (counter->attr.exclude_hv) - cpuhw->mmcr[0] |= MMCR0_FCHV; - - /* - * Write the new configuration to MMCR* with the freeze - * bit set and set the hardware counters to their initial values. - * Then unfreeze the counters. - */ - ppc_set_pmu_inuse(1); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) - | MMCR0_FC); - - /* - * Read off any pre-existing counters that need to move - * to another PMC. - */ - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { - power_pmu_read(counter); - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; - } - } - - /* - * Initialize the PMCs for all the new and moved counters. - */ - cpuhw->n_limited = n_lim = 0; - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter->hw.idx) - continue; - idx = hwc_index[i] + 1; - if (is_limited_pmc(idx)) { - cpuhw->limited_counter[n_lim] = counter; - cpuhw->limited_hwidx[n_lim] = idx; - ++n_lim; - continue; - } - val = 0; - if (counter->hw.sample_period) { - left = atomic64_read(&counter->hw.period_left); - if (left < 0x80000000L) - val = 0x80000000L - left; - } - atomic64_set(&counter->hw.prev_count, val); - counter->hw.idx = idx; - write_pmc(idx, val); - perf_counter_update_userpage(counter); - } - cpuhw->n_limited = n_lim; - cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; - - out_enable: - mb(); - write_mmcr0(cpuhw, cpuhw->mmcr[0]); - - /* - * Enable instruction sampling if necessary - */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mb(); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); - } - - out: - local_irq_restore(flags); -} - -static int collect_events(struct perf_counter *group, int max_count, - struct perf_counter *ctrs[], u64 *events, - unsigned int *flags) -{ - int n = 0; - struct perf_counter *counter; - - if (!is_software_counter(group)) { - if (n >= max_count) - return -1; - ctrs[n] = group; - flags[n] = group->hw.counter_base; - events[n++] = group->hw.config; - } - list_for_each_entry(counter, &group->sibling_list, list_entry) { - if (!is_software_counter(counter) && - counter->state != PERF_COUNTER_STATE_OFF) { - if (n >= max_count) - return -1; - ctrs[n] = counter; - flags[n] = counter->hw.counter_base; - events[n++] = counter->hw.config; - } - } - return n; -} - -static void counter_sched_in(struct perf_counter *counter, int cpu) -{ - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; - counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; - if (is_software_counter(counter)) - counter->pmu->enable(counter); -} - -/* - * Called to enable a whole group of counters. - * Returns 1 if the group was enabled, or -EAGAIN if it could not be. - * Assumes the caller has disabled interrupts and has - * frozen the PMU with hw_perf_save_disable. - */ -int hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu) -{ - struct cpu_hw_counters *cpuhw; - long i, n, n0; - struct perf_counter *sub; - - if (!ppmu) - return 0; - cpuhw = &__get_cpu_var(cpu_hw_counters); - n0 = cpuhw->n_counters; - n = collect_events(group_leader, ppmu->n_counter - n0, - &cpuhw->counter[n0], &cpuhw->events[n0], - &cpuhw->flags[n0]); - if (n < 0) - return -EAGAIN; - if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) - return -EAGAIN; - i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); - if (i < 0) - return -EAGAIN; - cpuhw->n_counters = n0 + n; - cpuhw->n_added += n; - - /* - * OK, this group can go on; update counter states etc., - * and enable any software counters - */ - for (i = n0; i < n0 + n; ++i) - cpuhw->counter[i]->hw.config = cpuhw->events[i]; - cpuctx->active_oncpu += n; - n = 1; - counter_sched_in(group_leader, cpu); - list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { - if (sub->state != PERF_COUNTER_STATE_OFF) { - counter_sched_in(sub, cpu); - ++n; - } - } - ctx->nr_active += n; - - return 1; -} - -/* - * Add a counter to the PMU. - * If all counters are not already frozen, then we disable and - * re-enable the PMU in order to get hw_perf_enable to do the - * actual work of reconfiguring the PMU. - */ -static int power_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuhw; - unsigned long flags; - int n0; - int ret = -EAGAIN; - - local_irq_save(flags); - perf_disable(); - - /* - * Add the counter to the list (if there is room) - * and check whether the total set is still feasible. - */ - cpuhw = &__get_cpu_var(cpu_hw_counters); - n0 = cpuhw->n_counters; - if (n0 >= ppmu->n_counter) - goto out; - cpuhw->counter[n0] = counter; - cpuhw->events[n0] = counter->hw.config; - cpuhw->flags[n0] = counter->hw.counter_base; - if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) - goto out; - if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) - goto out; - - counter->hw.config = cpuhw->events[n0]; - ++cpuhw->n_counters; - ++cpuhw->n_added; - - ret = 0; - out: - perf_enable(); - local_irq_restore(flags); - return ret; -} - -/* - * Remove a counter from the PMU. - */ -static void power_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuhw; - long i; - unsigned long flags; - - local_irq_save(flags); - perf_disable(); - - power_pmu_read(counter); - - cpuhw = &__get_cpu_var(cpu_hw_counters); - for (i = 0; i < cpuhw->n_counters; ++i) { - if (counter == cpuhw->counter[i]) { - while (++i < cpuhw->n_counters) - cpuhw->counter[i-1] = cpuhw->counter[i]; - --cpuhw->n_counters; - ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); - if (counter->hw.idx) { - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; - } - perf_counter_update_userpage(counter); - break; - } - } - for (i = 0; i < cpuhw->n_limited; ++i) - if (counter == cpuhw->limited_counter[i]) - break; - if (i < cpuhw->n_limited) { - while (++i < cpuhw->n_limited) { - cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; - cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; - } - --cpuhw->n_limited; - } - if (cpuhw->n_counters == 0) { - /* disable exceptions if no counters are running */ - cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); - } - - perf_enable(); - local_irq_restore(flags); -} - -/* - * Re-enable interrupts on a counter after they were throttled - * because they were coming too fast. - */ -static void power_pmu_unthrottle(struct perf_counter *counter) -{ - s64 val, left; - unsigned long flags; - - if (!counter->hw.idx || !counter->hw.sample_period) - return; - local_irq_save(flags); - perf_disable(); - power_pmu_read(counter); - left = counter->hw.sample_period; - counter->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(counter->hw.idx, val); - atomic64_set(&counter->hw.prev_count, val); - atomic64_set(&counter->hw.period_left, left); - perf_counter_update_userpage(counter); - perf_enable(); - local_irq_restore(flags); -} - -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, -}; - -/* - * Return 1 if we might be able to put counter on a limited PMC, - * or 0 if not. - * A counter can only go on a limited PMC if it counts something - * that a limited PMC can count, doesn't require interrupts, and - * doesn't exclude any processor mode. - */ -static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, - unsigned int flags) -{ - int n; - u64 alt[MAX_EVENT_ALTERNATIVES]; - - if (counter->attr.exclude_user - || counter->attr.exclude_kernel - || counter->attr.exclude_hv - || counter->attr.sample_period) - return 0; - - if (ppmu->limited_pmc_event(ev)) - return 1; - - /* - * The requested event isn't on a limited PMC already; - * see if any alternative code goes on a limited PMC. - */ - if (!ppmu->get_alternatives) - return 0; - - flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; - n = ppmu->get_alternatives(ev, flags, alt); - - return n > 0; -} - -/* - * Find an alternative event that goes on a normal PMC, if possible, - * and return the event code, or 0 if there is no such alternative. - * (Note: event code 0 is "don't count" on all machines.) - */ -static u64 normal_pmc_alternative(u64 ev, unsigned long flags) -{ - u64 alt[MAX_EVENT_ALTERNATIVES]; - int n; - - flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); - n = ppmu->get_alternatives(ev, flags, alt); - if (!n) - return 0; - return alt[0]; -} - -/* Number of perf_counters counting hardware events */ -static atomic_t num_counters; -/* Used to avoid races in calling reserve/release_pmc_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); - -/* - * Release the PMU if this is the last perf_counter. - */ -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - if (!atomic_add_unless(&num_counters, -1, 1)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_dec_return(&num_counters) == 0) - release_pmc_hardware(); - mutex_unlock(&pmc_reserve_mutex); - } -} - -/* - * Translate a generic cache event config to a raw event code. - */ -static int hw_perf_cache_event(u64 config, u64 *eventp) -{ - unsigned long type, op, result; - int ev; - - if (!ppmu->cache_events) - return -EINVAL; - - /* unpack config */ - type = config & 0xff; - op = (config >> 8) & 0xff; - result = (config >> 16) & 0xff; - - if (type >= PERF_COUNT_HW_CACHE_MAX || - op >= PERF_COUNT_HW_CACHE_OP_MAX || - result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - ev = (*ppmu->cache_events)[type][op][result]; - if (ev == 0) - return -EOPNOTSUPP; - if (ev == -1) - return -EINVAL; - *eventp = ev; - return 0; -} - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - u64 ev; - unsigned long flags; - struct perf_counter *ctrs[MAX_HWCOUNTERS]; - u64 events[MAX_HWCOUNTERS]; - unsigned int cflags[MAX_HWCOUNTERS]; - int n; - int err; - struct cpu_hw_counters *cpuhw; - - if (!ppmu) - return ERR_PTR(-ENXIO); - switch (counter->attr.type) { - case PERF_TYPE_HARDWARE: - ev = counter->attr.config; - if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); - ev = ppmu->generic_events[ev]; - break; - case PERF_TYPE_HW_CACHE: - err = hw_perf_cache_event(counter->attr.config, &ev); - if (err) - return ERR_PTR(err); - break; - case PERF_TYPE_RAW: - ev = counter->attr.config; - break; - default: - return ERR_PTR(-EINVAL); - } - counter->hw.config_base = ev; - counter->hw.idx = 0; - - /* - * If we are not running on a hypervisor, force the - * exclude_hv bit to 0 so that we don't care what - * the user set it to. - */ - if (!firmware_has_feature(FW_FEATURE_LPAR)) - counter->attr.exclude_hv = 0; - - /* - * If this is a per-task counter, then we can use - * PM_RUN_* events interchangeably with their non RUN_* - * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. - * XXX we should check if the task is an idle task. - */ - flags = 0; - if (counter->ctx->task) - flags |= PPMU_ONLY_COUNT_RUN; - - /* - * If this machine has limited counters, check whether this - * event could go on a limited counter. - */ - if (ppmu->flags & PPMU_LIMITED_PMC5_6) { - if (can_go_on_limited_pmc(counter, ev, flags)) { - flags |= PPMU_LIMITED_PMC_OK; - } else if (ppmu->limited_pmc_event(ev)) { - /* - * The requested event is on a limited PMC, - * but we can't use a limited PMC; see if any - * alternative goes on a normal PMC. - */ - ev = normal_pmc_alternative(ev, flags); - if (!ev) - return ERR_PTR(-EINVAL); - } - } - - /* - * If this is in a group, check if it can go on with all the - * other hardware counters in the group. We assume the counter - * hasn't been linked into its leader's sibling list at this point. - */ - n = 0; - if (counter->group_leader != counter) { - n = collect_events(counter->group_leader, ppmu->n_counter - 1, - ctrs, events, cflags); - if (n < 0) - return ERR_PTR(-EINVAL); - } - events[n] = ev; - ctrs[n] = counter; - cflags[n] = flags; - if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); - - cpuhw = &get_cpu_var(cpu_hw_counters); - err = power_check_constraints(cpuhw, events, cflags, n + 1); - put_cpu_var(cpu_hw_counters); - if (err) - return ERR_PTR(-EINVAL); - - counter->hw.config = events[n]; - counter->hw.counter_base = cflags[n]; - counter->hw.last_period = counter->hw.sample_period; - atomic64_set(&counter->hw.period_left, counter->hw.last_period); - - /* - * See if we need to reserve the PMU. - * If no counters are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing - * reserve_pmc_hardware or release_pmc_hardware. - */ - err = 0; - if (!atomic_inc_not_zero(&num_counters)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&num_counters) == 0 && - reserve_pmc_hardware(perf_counter_interrupt)) - err = -EBUSY; - else - atomic_inc(&num_counters); - mutex_unlock(&pmc_reserve_mutex); - } - counter->destroy = hw_perf_counter_destroy; - - if (err) - return ERR_PTR(err); - return &power_pmu; -} - -/* - * A counter has overflowed; update its count and record - * things if requested. Note that interrupts are hard-disabled - * here so there is no possibility of being interrupted. - */ -static void record_and_restart(struct perf_counter *counter, unsigned long val, - struct pt_regs *regs, int nmi) -{ - u64 period = counter->hw.sample_period; - s64 prev, delta, left; - int record = 0; - - /* we don't have to worry about interrupts here */ - prev = atomic64_read(&counter->hw.prev_count); - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - - /* - * See if the total period for this counter has expired, - * and update for the next period. - */ - val = 0; - left = atomic64_read(&counter->hw.period_left) - delta; - if (period) { - if (left <= 0) { - left += period; - if (left <= 0) - left = period; - record = 1; - } - if (left < 0x80000000LL) - val = 0x80000000LL - left; - } - - /* - * Finally record data if requested. - */ - if (record) { - struct perf_sample_data data = { - .addr = 0, - .period = counter->hw.last_period, - }; - - if (counter->attr.sample_type & PERF_SAMPLE_ADDR) - perf_get_data_addr(regs, &data.addr); - - if (perf_counter_overflow(counter, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the counter to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each counter counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } - } - - write_pmc(counter->hw.idx, val); - atomic64_set(&counter->hw.prev_count, val); - atomic64_set(&counter->hw.period_left, left); - perf_counter_update_userpage(counter); -} - -/* - * Called from generic code to get the misc flags (i.e. processor mode) - * for an event. - */ -unsigned long perf_misc_flags(struct pt_regs *regs) -{ - u32 flags = perf_get_misc_flags(regs); - - if (flags) - return flags; - return user_mode(regs) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; -} - -/* - * Called from generic code to get the instruction pointer - * for an event. - */ -unsigned long perf_instruction_pointer(struct pt_regs *regs) -{ - unsigned long ip; - - if (TRAP(regs) != 0xf00) - return regs->nip; /* not a PMU interrupt */ - - ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); - return ip; -} - -/* - * Performance monitor interrupt stuff - */ -static void perf_counter_interrupt(struct pt_regs *regs) -{ - int i; - struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); - struct perf_counter *counter; - unsigned long val; - int found = 0; - int nmi; - - if (cpuhw->n_limited) - freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), - mfspr(SPRN_PMC6)); - - perf_read_regs(regs); - - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); - - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (!counter->hw.idx || is_limited_pmc(counter->hw.idx)) - continue; - val = read_pmc(counter->hw.idx); - if ((int)val < 0) { - /* counter has overflowed */ - found = 1; - record_and_restart(counter, val, regs, nmi); - } - } - - /* - * In case we didn't find and reset the counter that caused - * the interrupt, scan all counters and reset any that are - * negative, to avoid getting continual interrupts. - * Any that we processed in the previous loop will not be negative. - */ - if (!found) { - for (i = 0; i < ppmu->n_counter; ++i) { - if (is_limited_pmc(i + 1)) - continue; - val = read_pmc(i + 1); - if ((int)val < 0) - write_pmc(i + 1, 0); - } - } - - /* - * Reset MMCR0 to its normal value. This will set PMXE and - * clear FC (freeze counters) and PMAO (perf mon alert occurred) - * and thus allow interrupts to occur again. - * XXX might want to use MSR.PM to keep the counters frozen until - * we get back out of this interrupt. - */ - write_mmcr0(cpuhw, cpuhw->mmcr[0]); - - if (nmi) - nmi_exit(); - else - irq_exit(); -} - -void hw_perf_counter_setup(int cpu) -{ - struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); - - if (!ppmu) - return; - memset(cpuhw, 0, sizeof(*cpuhw)); - cpuhw->mmcr[0] = MMCR0_FC; -} - -int register_power_pmu(struct power_pmu *pmu) -{ - if (ppmu) - return -EBUSY; /* something's already registered */ - - ppmu = pmu; - pr_info("%s performance monitor hardware support registered\n", - pmu->name); - -#ifdef MSR_HV - /* - * Use FCHV to ignore kernel events if MSR.HV is set. - */ - if (mfmsr() & MSR_HV) - freeze_counters_kernel = MMCR0_FCHV; -#endif /* CONFIG_PPC64 */ - - return 0; -} diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c new file mode 100644 index 00000000000..c98321fcb45 --- /dev/null +++ b/arch/powerpc/kernel/perf_event.c @@ -0,0 +1,1315 @@ +/* + * Performance event support - powerpc architecture code + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct cpu_hw_events { + int n_events; + int n_percpu; + int disabled; + int n_added; + int n_limited; + u8 pmcs_enabled; + struct perf_event *event[MAX_HWEVENTS]; + u64 events[MAX_HWEVENTS]; + unsigned int flags[MAX_HWEVENTS]; + unsigned long mmcr[3]; + struct perf_event *limited_event[MAX_LIMITED_HWEVENTS]; + u8 limited_hwidx[MAX_LIMITED_HWEVENTS]; + u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; + unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; + unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; +}; +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +struct power_pmu *ppmu; + +/* + * Normally, to ignore kernel events we set the FCS (freeze events + * in supervisor mode) bit in MMCR0, but if the kernel runs with the + * hypervisor bit set in the MSR, or if we are running on a processor + * where the hypervisor bit is forced to 1 (as on Apple G5 processors), + * then we need to use the FCHV bit to ignore kernel events. + */ +static unsigned int freeze_events_kernel = MMCR0_FCS; + +/* + * 32-bit doesn't have MMCRA but does have an MMCR2, + * and a few other names are different. + */ +#ifdef CONFIG_PPC32 + +#define MMCR0_FCHV 0 +#define MMCR0_PMCjCE MMCR0_PMCnCE + +#define SPRN_MMCRA SPRN_MMCR2 +#define MMCRA_SAMPLE_ENABLE 0 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_read_regs(struct pt_regs *regs) { } +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return 0; +} + +#endif /* CONFIG_PPC32 */ + +/* + * Things that are specific to 64-bit implementations. + */ +#ifdef CONFIG_PPC64 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { + unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; + if (slot > 1) + return 4 * (slot - 1); + } + return 0; +} + +/* + * The user wants a data address recorded. + * If we're not doing instruction sampling, give them the SDAR + * (sampled data address). If we are doing instruction sampling, then + * only give them the SDAR if it corresponds to the instruction + * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC + * bit in MMCRA. + */ +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) +{ + unsigned long mmcra = regs->dsisr; + unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? + POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; + + if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) + *addrp = mfspr(SPRN_SDAR); +} + +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if (TRAP(regs) != 0xf00) + return 0; /* not a PMU interrupt */ + + if (ppmu->flags & PPMU_ALT_SIPR) { + if (mmcra & POWER6_MMCRA_SIHV) + return PERF_RECORD_MISC_HYPERVISOR; + return (mmcra & POWER6_MMCRA_SIPR) ? + PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL; + } + if (mmcra & MMCRA_SIHV) + return PERF_RECORD_MISC_HYPERVISOR; + return (mmcra & MMCRA_SIPR) ? PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; +} + +/* + * Overload regs->dsisr to store MMCRA so we only need to read it once + * on each interrupt. + */ +static inline void perf_read_regs(struct pt_regs *regs) +{ + regs->dsisr = mfspr(SPRN_MMCRA); +} + +/* + * If interrupts were soft-disabled when a PMU interrupt occurs, treat + * it as an NMI. + */ +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return !regs->softe; +} + +#endif /* CONFIG_PPC64 */ + +static void perf_event_interrupt(struct pt_regs *regs); + +void perf_event_print_debug(void) +{ +} + +/* + * Read one performance monitor event (PMC). + */ +static unsigned long read_pmc(int idx) +{ + unsigned long val; + + switch (idx) { + case 1: + val = mfspr(SPRN_PMC1); + break; + case 2: + val = mfspr(SPRN_PMC2); + break; + case 3: + val = mfspr(SPRN_PMC3); + break; + case 4: + val = mfspr(SPRN_PMC4); + break; + case 5: + val = mfspr(SPRN_PMC5); + break; + case 6: + val = mfspr(SPRN_PMC6); + break; +#ifdef CONFIG_PPC64 + case 7: + val = mfspr(SPRN_PMC7); + break; + case 8: + val = mfspr(SPRN_PMC8); + break; +#endif /* CONFIG_PPC64 */ + default: + printk(KERN_ERR "oops trying to read PMC%d\n", idx); + val = 0; + } + return val; +} + +/* + * Write one PMC. + */ +static void write_pmc(int idx, unsigned long val) +{ + switch (idx) { + case 1: + mtspr(SPRN_PMC1, val); + break; + case 2: + mtspr(SPRN_PMC2, val); + break; + case 3: + mtspr(SPRN_PMC3, val); + break; + case 4: + mtspr(SPRN_PMC4, val); + break; + case 5: + mtspr(SPRN_PMC5, val); + break; + case 6: + mtspr(SPRN_PMC6, val); + break; +#ifdef CONFIG_PPC64 + case 7: + mtspr(SPRN_PMC7, val); + break; + case 8: + mtspr(SPRN_PMC8, val); + break; +#endif /* CONFIG_PPC64 */ + default: + printk(KERN_ERR "oops trying to write PMC%d\n", idx); + } +} + +/* + * Check if a set of events can all go on the PMU at once. + * If they can't, this will look at alternative codes for the events + * and see if any combination of alternative codes is feasible. + * The feasible set is returned in event_id[]. + */ +static int power_check_constraints(struct cpu_hw_events *cpuhw, + u64 event_id[], unsigned int cflags[], + int n_ev) +{ + unsigned long mask, value, nv; + unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; + int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS]; + int i, j; + unsigned long addf = ppmu->add_fields; + unsigned long tadd = ppmu->test_adder; + + if (n_ev > ppmu->n_event) + return -1; + + /* First see if the events will go on as-is */ + for (i = 0; i < n_ev; ++i) { + if ((cflags[i] & PPMU_LIMITED_PMC_REQD) + && !ppmu->limited_pmc_event(event_id[i])) { + ppmu->get_alternatives(event_id[i], cflags[i], + cpuhw->alternatives[i]); + event_id[i] = cpuhw->alternatives[i][0]; + } + if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], + &cpuhw->avalues[i][0])) + return -1; + } + value = mask = 0; + for (i = 0; i < n_ev; ++i) { + nv = (value | cpuhw->avalues[i][0]) + + (value & cpuhw->avalues[i][0] & addf); + if ((((nv + tadd) ^ value) & mask) != 0 || + (((nv + tadd) ^ cpuhw->avalues[i][0]) & + cpuhw->amasks[i][0]) != 0) + break; + value = nv; + mask |= cpuhw->amasks[i][0]; + } + if (i == n_ev) + return 0; /* all OK */ + + /* doesn't work, gather alternatives... */ + if (!ppmu->get_alternatives) + return -1; + for (i = 0; i < n_ev; ++i) { + choice[i] = 0; + n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i], + cpuhw->alternatives[i]); + for (j = 1; j < n_alt[i]; ++j) + ppmu->get_constraint(cpuhw->alternatives[i][j], + &cpuhw->amasks[i][j], + &cpuhw->avalues[i][j]); + } + + /* enumerate all possibilities and see if any will work */ + i = 0; + j = -1; + value = mask = nv = 0; + while (i < n_ev) { + if (j >= 0) { + /* we're backtracking, restore context */ + value = svalues[i]; + mask = smasks[i]; + j = choice[i]; + } + /* + * See if any alternative k for event_id i, + * where k > j, will satisfy the constraints. + */ + while (++j < n_alt[i]) { + nv = (value | cpuhw->avalues[i][j]) + + (value & cpuhw->avalues[i][j] & addf); + if ((((nv + tadd) ^ value) & mask) == 0 && + (((nv + tadd) ^ cpuhw->avalues[i][j]) + & cpuhw->amasks[i][j]) == 0) + break; + } + if (j >= n_alt[i]) { + /* + * No feasible alternative, backtrack + * to event_id i-1 and continue enumerating its + * alternatives from where we got up to. + */ + if (--i < 0) + return -1; + } else { + /* + * Found a feasible alternative for event_id i, + * remember where we got up to with this event_id, + * go on to the next event_id, and start with + * the first alternative for it. + */ + choice[i] = j; + svalues[i] = value; + smasks[i] = mask; + value = nv; + mask |= cpuhw->amasks[i][j]; + ++i; + j = -1; + } + } + + /* OK, we have a feasible combination, tell the caller the solution */ + for (i = 0; i < n_ev; ++i) + event_id[i] = cpuhw->alternatives[i][choice[i]]; + return 0; +} + +/* + * Check if newly-added events have consistent settings for + * exclude_{user,kernel,hv} with each other and any previously + * added events. + */ +static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], + int n_prev, int n_new) +{ + int eu = 0, ek = 0, eh = 0; + int i, n, first; + struct perf_event *event; + + n = n_prev + n_new; + if (n <= 1) + return 0; + + first = 1; + for (i = 0; i < n; ++i) { + if (cflags[i] & PPMU_LIMITED_PMC_OK) { + cflags[i] &= ~PPMU_LIMITED_PMC_REQD; + continue; + } + event = ctrs[i]; + if (first) { + eu = event->attr.exclude_user; + ek = event->attr.exclude_kernel; + eh = event->attr.exclude_hv; + first = 0; + } else if (event->attr.exclude_user != eu || + event->attr.exclude_kernel != ek || + event->attr.exclude_hv != eh) { + return -EAGAIN; + } + } + + if (eu || ek || eh) + for (i = 0; i < n; ++i) + if (cflags[i] & PPMU_LIMITED_PMC_OK) + cflags[i] |= PPMU_LIMITED_PMC_REQD; + + return 0; +} + +static void power_pmu_read(struct perf_event *event) +{ + s64 val, delta, prev; + + if (!event->hw.idx) + return; + /* + * Performance monitor interrupts come even when interrupts + * are soft-disabled, as long as interrupts are hard-enabled. + * Therefore we treat them like NMIs. + */ + do { + prev = atomic64_read(&event->hw.prev_count); + barrier(); + val = read_pmc(event->hw.idx); + } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + + /* The events are only 32 bits wide */ + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + atomic64_sub(delta, &event->hw.period_left); +} + +/* + * On some machines, PMC5 and PMC6 can't be written, don't respect + * the freeze conditions, and don't generate interrupts. This tells + * us if `event' is using such a PMC. + */ +static int is_limited_pmc(int pmcnum) +{ + return (ppmu->flags & PPMU_LIMITED_PMC5_6) + && (pmcnum == 5 || pmcnum == 6); +} + +static void freeze_limited_events(struct cpu_hw_events *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_event *event; + u64 val, prev, delta; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + event = cpuhw->limited_event[i]; + if (!event->hw.idx) + continue; + val = (event->hw.idx == 5) ? pmc5 : pmc6; + prev = atomic64_read(&event->hw.prev_count); + event->hw.idx = 0; + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + } +} + +static void thaw_limited_events(struct cpu_hw_events *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_event *event; + u64 val; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + event = cpuhw->limited_event[i]; + event->hw.idx = cpuhw->limited_hwidx[i]; + val = (event->hw.idx == 5) ? pmc5 : pmc6; + atomic64_set(&event->hw.prev_count, val); + perf_event_update_userpage(event); + } +} + +/* + * Since limited events don't respect the freeze conditions, we + * have to read them immediately after freezing or unfreezing the + * other events. We try to keep the values from the limited + * events as consistent as possible by keeping the delay (in + * cycles and instructions) between freezing/unfreezing and reading + * the limited events as small and consistent as possible. + * Therefore, if any limited events are in use, we read them + * both, and always in the same order, to minimize variability, + * and do it inside the same asm that writes MMCR0. + */ +static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) +{ + unsigned long pmc5, pmc6; + + if (!cpuhw->n_limited) { + mtspr(SPRN_MMCR0, mmcr0); + return; + } + + /* + * Write MMCR0, then read PMC5 and PMC6 immediately. + * To ensure we don't get a performance monitor interrupt + * between writing MMCR0 and freezing/thawing the limited + * events, we first write MMCR0 with the event overflow + * interrupt enable bits turned off. + */ + asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" + : "=&r" (pmc5), "=&r" (pmc6) + : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), + "i" (SPRN_MMCR0), + "i" (SPRN_PMC5), "i" (SPRN_PMC6)); + + if (mmcr0 & MMCR0_FC) + freeze_limited_events(cpuhw, pmc5, pmc6); + else + thaw_limited_events(cpuhw, pmc5, pmc6); + + /* + * Write the full MMCR0 including the event overflow interrupt + * enable bits, if necessary. + */ + if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) + mtspr(SPRN_MMCR0, mmcr0); +} + +/* + * Disable all events to prevent PMU interrupts and to allow + * events to be added or removed. + */ +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + if (!ppmu) + return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!cpuhw->disabled) { + cpuhw->disabled = 1; + cpuhw->n_added = 0; + + /* + * Check if we ever enabled the PMU on this cpu. + */ + if (!cpuhw->pmcs_enabled) { + ppc_enable_pmcs(); + cpuhw->pmcs_enabled = 1; + } + + /* + * Disable instruction sampling if it was enabled + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mtspr(SPRN_MMCRA, + cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mb(); + } + + /* + * Set the 'freeze events' bit. + * The barrier is to make sure the mtspr has been + * executed and the PMU has frozen the events + * before we return. + */ + write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); + mb(); + } + local_irq_restore(flags); +} + +/* + * Re-enable all events if disable == 0. + * If we were previously disabled and events were added, then + * put the new config on the PMU. + */ +void hw_perf_enable(void) +{ + struct perf_event *event; + struct cpu_hw_events *cpuhw; + unsigned long flags; + long i; + unsigned long val; + s64 left; + unsigned int hwc_index[MAX_HWEVENTS]; + int n_lim; + int idx; + + if (!ppmu) + return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + if (!cpuhw->disabled) { + local_irq_restore(flags); + return; + } + cpuhw->disabled = 0; + + /* + * If we didn't change anything, or only removed events, + * no need to recalculate MMCR* settings and reset the PMCs. + * Just reenable the PMU with the current MMCR* settings + * (possibly updated for removal of events). + */ + if (!cpuhw->n_added) { + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + if (cpuhw->n_events == 0) + ppc_set_pmu_inuse(0); + goto out_enable; + } + + /* + * Compute MMCR* values for the new set of events + */ + if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, + cpuhw->mmcr)) { + /* shouldn't ever get here */ + printk(KERN_ERR "oops compute_mmcr failed\n"); + goto out; + } + + /* + * Add in MMCR0 freeze bits corresponding to the + * attr.exclude_* bits for the first event. + * We have already checked that all events have the + * same values for these bits as the first event. + */ + event = cpuhw->event[0]; + if (event->attr.exclude_user) + cpuhw->mmcr[0] |= MMCR0_FCP; + if (event->attr.exclude_kernel) + cpuhw->mmcr[0] |= freeze_events_kernel; + if (event->attr.exclude_hv) + cpuhw->mmcr[0] |= MMCR0_FCHV; + + /* + * Write the new configuration to MMCR* with the freeze + * bit set and set the hardware events to their initial values. + * Then unfreeze the events. + */ + ppc_set_pmu_inuse(1); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) + | MMCR0_FC); + + /* + * Read off any pre-existing events that need to move + * to another PMC. + */ + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) { + power_pmu_read(event); + write_pmc(event->hw.idx, 0); + event->hw.idx = 0; + } + } + + /* + * Initialize the PMCs for all the new and moved events. + */ + cpuhw->n_limited = n_lim = 0; + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (event->hw.idx) + continue; + idx = hwc_index[i] + 1; + if (is_limited_pmc(idx)) { + cpuhw->limited_event[n_lim] = event; + cpuhw->limited_hwidx[n_lim] = idx; + ++n_lim; + continue; + } + val = 0; + if (event->hw.sample_period) { + left = atomic64_read(&event->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + atomic64_set(&event->hw.prev_count, val); + event->hw.idx = idx; + write_pmc(idx, val); + perf_event_update_userpage(event); + } + cpuhw->n_limited = n_lim; + cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + + out_enable: + mb(); + write_mmcr0(cpuhw, cpuhw->mmcr[0]); + + /* + * Enable instruction sampling if necessary + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mb(); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + } + + out: + local_irq_restore(flags); +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *ctrs[], u64 *events, + unsigned int *flags) +{ + int n = 0; + struct perf_event *event; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + ctrs[n] = group; + flags[n] = group->hw.event_base; + events[n++] = group->hw.config; + } + list_for_each_entry(event, &group->sibling_list, list_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + ctrs[n] = event; + flags[n] = event->hw.event_base; + events[n++] = event->hw.config; + } + } + return n; +} + +static void event_sched_in(struct perf_event *event, int cpu) +{ + event->state = PERF_EVENT_STATE_ACTIVE; + event->oncpu = cpu; + event->tstamp_running += event->ctx->time - event->tstamp_stopped; + if (is_software_event(event)) + event->pmu->enable(event); +} + +/* + * Called to enable a whole group of events. + * Returns 1 if the group was enabled, or -EAGAIN if it could not be. + * Assumes the caller has disabled interrupts and has + * frozen the PMU with hw_perf_save_disable. + */ +int hw_perf_group_sched_in(struct perf_event *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu) +{ + struct cpu_hw_events *cpuhw; + long i, n, n0; + struct perf_event *sub; + + if (!ppmu) + return 0; + cpuhw = &__get_cpu_var(cpu_hw_events); + n0 = cpuhw->n_events; + n = collect_events(group_leader, ppmu->n_event - n0, + &cpuhw->event[n0], &cpuhw->events[n0], + &cpuhw->flags[n0]); + if (n < 0) + return -EAGAIN; + if (check_excludes(cpuhw->event, cpuhw->flags, n0, n)) + return -EAGAIN; + i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); + if (i < 0) + return -EAGAIN; + cpuhw->n_events = n0 + n; + cpuhw->n_added += n; + + /* + * OK, this group can go on; update event states etc., + * and enable any software events + */ + for (i = n0; i < n0 + n; ++i) + cpuhw->event[i]->hw.config = cpuhw->events[i]; + cpuctx->active_oncpu += n; + n = 1; + event_sched_in(group_leader, cpu); + list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { + if (sub->state != PERF_EVENT_STATE_OFF) { + event_sched_in(sub, cpu); + ++n; + } + } + ctx->nr_active += n; + + return 1; +} + +/* + * Add a event to the PMU. + * If all events are not already frozen, then we disable and + * re-enable the PMU in order to get hw_perf_enable to do the + * actual work of reconfiguring the PMU. + */ +static int power_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + int n0; + int ret = -EAGAIN; + + local_irq_save(flags); + perf_disable(); + + /* + * Add the event to the list (if there is room) + * and check whether the total set is still feasible. + */ + cpuhw = &__get_cpu_var(cpu_hw_events); + n0 = cpuhw->n_events; + if (n0 >= ppmu->n_event) + goto out; + cpuhw->event[n0] = event; + cpuhw->events[n0] = event->hw.config; + cpuhw->flags[n0] = event->hw.event_base; + if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) + goto out; + if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) + goto out; + + event->hw.config = cpuhw->events[n0]; + ++cpuhw->n_events; + ++cpuhw->n_added; + + ret = 0; + out: + perf_enable(); + local_irq_restore(flags); + return ret; +} + +/* + * Remove a event from the PMU. + */ +static void power_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + long i; + unsigned long flags; + + local_irq_save(flags); + perf_disable(); + + power_pmu_read(event); + + cpuhw = &__get_cpu_var(cpu_hw_events); + for (i = 0; i < cpuhw->n_events; ++i) { + if (event == cpuhw->event[i]) { + while (++i < cpuhw->n_events) + cpuhw->event[i-1] = cpuhw->event[i]; + --cpuhw->n_events; + ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); + if (event->hw.idx) { + write_pmc(event->hw.idx, 0); + event->hw.idx = 0; + } + perf_event_update_userpage(event); + break; + } + } + for (i = 0; i < cpuhw->n_limited; ++i) + if (event == cpuhw->limited_event[i]) + break; + if (i < cpuhw->n_limited) { + while (++i < cpuhw->n_limited) { + cpuhw->limited_event[i-1] = cpuhw->limited_event[i]; + cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; + } + --cpuhw->n_limited; + } + if (cpuhw->n_events == 0) { + /* disable exceptions if no events are running */ + cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); + } + + perf_enable(); + local_irq_restore(flags); +} + +/* + * Re-enable interrupts on a event after they were throttled + * because they were coming too fast. + */ +static void power_pmu_unthrottle(struct perf_event *event) +{ + s64 val, left; + unsigned long flags; + + if (!event->hw.idx || !event->hw.sample_period) + return; + local_irq_save(flags); + perf_disable(); + power_pmu_read(event); + left = event->hw.sample_period; + event->hw.last_period = left; + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + perf_enable(); + local_irq_restore(flags); +} + +struct pmu power_pmu = { + .enable = power_pmu_enable, + .disable = power_pmu_disable, + .read = power_pmu_read, + .unthrottle = power_pmu_unthrottle, +}; + +/* + * Return 1 if we might be able to put event on a limited PMC, + * or 0 if not. + * A event can only go on a limited PMC if it counts something + * that a limited PMC can count, doesn't require interrupts, and + * doesn't exclude any processor mode. + */ +static int can_go_on_limited_pmc(struct perf_event *event, u64 ev, + unsigned int flags) +{ + int n; + u64 alt[MAX_EVENT_ALTERNATIVES]; + + if (event->attr.exclude_user + || event->attr.exclude_kernel + || event->attr.exclude_hv + || event->attr.sample_period) + return 0; + + if (ppmu->limited_pmc_event(ev)) + return 1; + + /* + * The requested event_id isn't on a limited PMC already; + * see if any alternative code goes on a limited PMC. + */ + if (!ppmu->get_alternatives) + return 0; + + flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; + n = ppmu->get_alternatives(ev, flags, alt); + + return n > 0; +} + +/* + * Find an alternative event_id that goes on a normal PMC, if possible, + * and return the event_id code, or 0 if there is no such alternative. + * (Note: event_id code 0 is "don't count" on all machines.) + */ +static u64 normal_pmc_alternative(u64 ev, unsigned long flags) +{ + u64 alt[MAX_EVENT_ALTERNATIVES]; + int n; + + flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); + n = ppmu->get_alternatives(ev, flags, alt); + if (!n) + return 0; + return alt[0]; +} + +/* Number of perf_events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Release the PMU if this is the last perf_event. + */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* + * Translate a generic cache event_id config to a raw event_id code. + */ +static int hw_perf_cache_event(u64 config, u64 *eventp) +{ + unsigned long type, op, result; + int ev; + + if (!ppmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*ppmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *eventp = ev; + return 0; +} + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + u64 ev; + unsigned long flags; + struct perf_event *ctrs[MAX_HWEVENTS]; + u64 events[MAX_HWEVENTS]; + unsigned int cflags[MAX_HWEVENTS]; + int n; + int err; + struct cpu_hw_events *cpuhw; + + if (!ppmu) + return ERR_PTR(-ENXIO); + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + ev = event->attr.config; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) + return ERR_PTR(-EOPNOTSUPP); + ev = ppmu->generic_events[ev]; + break; + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(event->attr.config, &ev); + if (err) + return ERR_PTR(err); + break; + case PERF_TYPE_RAW: + ev = event->attr.config; + break; + default: + return ERR_PTR(-EINVAL); + } + event->hw.config_base = ev; + event->hw.idx = 0; + + /* + * If we are not running on a hypervisor, force the + * exclude_hv bit to 0 so that we don't care what + * the user set it to. + */ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + event->attr.exclude_hv = 0; + + /* + * If this is a per-task event, then we can use + * PM_RUN_* events interchangeably with their non RUN_* + * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. + * XXX we should check if the task is an idle task. + */ + flags = 0; + if (event->ctx->task) + flags |= PPMU_ONLY_COUNT_RUN; + + /* + * If this machine has limited events, check whether this + * event_id could go on a limited event. + */ + if (ppmu->flags & PPMU_LIMITED_PMC5_6) { + if (can_go_on_limited_pmc(event, ev, flags)) { + flags |= PPMU_LIMITED_PMC_OK; + } else if (ppmu->limited_pmc_event(ev)) { + /* + * The requested event_id is on a limited PMC, + * but we can't use a limited PMC; see if any + * alternative goes on a normal PMC. + */ + ev = normal_pmc_alternative(ev, flags); + if (!ev) + return ERR_PTR(-EINVAL); + } + } + + /* + * If this is in a group, check if it can go on with all the + * other hardware events in the group. We assume the event + * hasn't been linked into its leader's sibling list at this point. + */ + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, ppmu->n_event - 1, + ctrs, events, cflags); + if (n < 0) + return ERR_PTR(-EINVAL); + } + events[n] = ev; + ctrs[n] = event; + cflags[n] = flags; + if (check_excludes(ctrs, cflags, n, 1)) + return ERR_PTR(-EINVAL); + + cpuhw = &get_cpu_var(cpu_hw_events); + err = power_check_constraints(cpuhw, events, cflags, n + 1); + put_cpu_var(cpu_hw_events); + if (err) + return ERR_PTR(-EINVAL); + + event->hw.config = events[n]; + event->hw.event_base = cflags[n]; + event->hw.last_period = event->hw.sample_period; + atomic64_set(&event->hw.period_left, event->hw.last_period); + + /* + * See if we need to reserve the PMU. + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && + reserve_pmc_hardware(perf_event_interrupt)) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + if (err) + return ERR_PTR(err); + return &power_pmu; +} + +/* + * A event has overflowed; update its count and record + * things if requested. Note that interrupts are hard-disabled + * here so there is no possibility of being interrupted. + */ +static void record_and_restart(struct perf_event *event, unsigned long val, + struct pt_regs *regs, int nmi) +{ + u64 period = event->hw.sample_period; + s64 prev, delta, left; + int record = 0; + + /* we don't have to worry about interrupts here */ + prev = atomic64_read(&event->hw.prev_count); + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + + /* + * See if the total period for this event has expired, + * and update for the next period. + */ + val = 0; + left = atomic64_read(&event->hw.period_left) - delta; + if (period) { + if (left <= 0) { + left += period; + if (left <= 0) + left = period; + record = 1; + } + if (left < 0x80000000LL) + val = 0x80000000LL - left; + } + + /* + * Finally record data if requested. + */ + if (record) { + struct perf_sample_data data = { + .addr = 0, + .period = event->hw.last_period, + }; + + if (event->attr.sample_type & PERF_SAMPLE_ADDR) + perf_get_data_addr(regs, &data.addr); + + if (perf_event_overflow(event, nmi, &data, regs)) { + /* + * Interrupts are coming too fast - throttle them + * by setting the event to 0, so it will be + * at least 2^30 cycles until the next interrupt + * (assuming each event counts at most 2 counts + * per cycle). + */ + val = 0; + left = ~0ULL >> 1; + } + } + + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); +} + +/* + * Called from generic code to get the misc flags (i.e. processor mode) + * for an event_id. + */ +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + u32 flags = perf_get_misc_flags(regs); + + if (flags) + return flags; + return user_mode(regs) ? PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; +} + +/* + * Called from generic code to get the instruction pointer + * for an event_id. + */ +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + unsigned long ip; + + if (TRAP(regs) != 0xf00) + return regs->nip; /* not a PMU interrupt */ + + ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); + return ip; +} + +/* + * Performance monitor interrupt stuff + */ +static void perf_event_interrupt(struct pt_regs *regs) +{ + int i; + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + unsigned long val; + int found = 0; + int nmi; + + if (cpuhw->n_limited) + freeze_limited_events(cpuhw, mfspr(SPRN_PMC5), + mfspr(SPRN_PMC6)); + + perf_read_regs(regs); + + nmi = perf_intr_is_nmi(regs); + if (nmi) + nmi_enter(); + else + irq_enter(); + + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (!event->hw.idx || is_limited_pmc(event->hw.idx)) + continue; + val = read_pmc(event->hw.idx); + if ((int)val < 0) { + /* event has overflowed */ + found = 1; + record_and_restart(event, val, regs, nmi); + } + } + + /* + * In case we didn't find and reset the event that caused + * the interrupt, scan all events and reset any that are + * negative, to avoid getting continual interrupts. + * Any that we processed in the previous loop will not be negative. + */ + if (!found) { + for (i = 0; i < ppmu->n_event; ++i) { + if (is_limited_pmc(i + 1)) + continue; + val = read_pmc(i + 1); + if ((int)val < 0) + write_pmc(i + 1, 0); + } + } + + /* + * Reset MMCR0 to its normal value. This will set PMXE and + * clear FC (freeze events) and PMAO (perf mon alert occurred) + * and thus allow interrupts to occur again. + * XXX might want to use MSR.PM to keep the events frozen until + * we get back out of this interrupt. + */ + write_mmcr0(cpuhw, cpuhw->mmcr[0]); + + if (nmi) + nmi_exit(); + else + irq_exit(); +} + +void hw_perf_event_setup(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + if (!ppmu) + return; + memset(cpuhw, 0, sizeof(*cpuhw)); + cpuhw->mmcr[0] = MMCR0_FC; +} + +int register_power_pmu(struct power_pmu *pmu) +{ + if (ppmu) + return -EBUSY; /* something's already registered */ + + ppmu = pmu; + pr_info("%s performance monitor hardware support registered\n", + pmu->name); + +#ifdef MSR_HV + /* + * Use FCHV to ignore kernel events if MSR.HV is set. + */ + if (mfmsr() & MSR_HV) + freeze_events_kernel = MMCR0_FCHV; +#endif /* CONFIG_PPC64 */ + + return 0; +} diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 3c90a3d9173..2a361cdda63 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 31918af3e35..0f4c1c73a6a 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 867f6f66396..c351b3a57fb 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index fa21890531d..ca399ba5034 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 018d094d92f..28a4daacdc0 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 75dccb71a04..479574413a9 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 465e498bcb3..df45a7449a6 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include @@ -527,25 +527,25 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32) -DEFINE_PER_CPU(u8, perf_counter_pending); +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) +DEFINE_PER_CPU(u8, perf_event_pending); -void set_perf_counter_pending(void) +void set_perf_event_pending(void) { - get_cpu_var(perf_counter_pending) = 1; + get_cpu_var(perf_event_pending) = 1; set_dec(1); - put_cpu_var(perf_counter_pending); + put_cpu_var(perf_event_pending); } -#define test_perf_counter_pending() __get_cpu_var(perf_counter_pending) -#define clear_perf_counter_pending() __get_cpu_var(perf_counter_pending) = 0 +#define test_perf_event_pending() __get_cpu_var(perf_event_pending) +#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 -#else /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ +#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ -#define test_perf_counter_pending() 0 -#define clear_perf_counter_pending() +#define test_perf_event_pending() 0 +#define clear_perf_event_pending() -#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ +#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ /* * For iSeries shared processors, we have to let the hypervisor @@ -573,9 +573,9 @@ void timer_interrupt(struct pt_regs * regs) set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 - if (test_perf_counter_pending()) { - clear_perf_counter_pending(); - perf_counter_do_pending(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 830bef0a113..e7dae82c128 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include @@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -312,7 +312,7 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { @@ -323,7 +323,7 @@ good_area: #endif } else { current->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } up_read(&mm->mmap_sem); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9efc8bda01b..e382cae678b 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -280,9 +280,9 @@ config PPC_HAVE_PMU_SUPPORT config PPC_PERF_CTRS def_bool y - depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT + depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT help - This enables the powerpc-specific perf_counter back-end. + This enables the powerpc-specific perf_event back-end. config SMP depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 1c866efd217..43c0acad716 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -94,7 +94,7 @@ config S390 select HAVE_KVM if 64BIT select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config SCHED_OMIT_FRAME_POINTER bool diff --git a/arch/s390/include/asm/perf_counter.h b/arch/s390/include/asm/perf_counter.h deleted file mode 100644 index 7015188c2cc..00000000000 --- a/arch/s390/include/asm/perf_counter.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Performance counter support - s390 specific definitions. - * - * Copyright 2009 Martin Schwidefsky, IBM Corporation. - */ - -static inline void set_perf_counter_pending(void) {} -static inline void clear_perf_counter_pending(void) {} - -#define PERF_COUNTER_INDEX_OFFSET 0 diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h new file mode 100644 index 00000000000..3840cbe7763 --- /dev/null +++ b/arch/s390/include/asm/perf_event.h @@ -0,0 +1,10 @@ +/* + * Performance event support - s390 specific definitions. + * + * Copyright 2009 Martin Schwidefsky, IBM Corporation. + */ + +static inline void set_perf_event_pending(void) {} +static inline void clear_perf_event_pending(void) {} + +#define PERF_EVENT_INDEX_OFFSET 0 diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index c80602d7c88..cb5232df151 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -268,7 +268,7 @@ #define __NR_preadv 328 #define __NR_pwritev 329 #define __NR_rt_tgsigqueueinfo 330 -#define __NR_perf_counter_open 331 +#define __NR_perf_event_open 331 #define NR_syscalls 332 /* diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 88a83366819..624790042d4 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1832,11 +1832,11 @@ compat_sys_rt_tgsigqueueinfo_wrapper: llgtr %r5,%r5 # struct compat_siginfo * jg compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call - .globl sys_perf_counter_open_wrapper -sys_perf_counter_open_wrapper: - llgtr %r2,%r2 # const struct perf_counter_attr * + .globl sys_perf_event_open_wrapper +sys_perf_event_open_wrapper: + llgtr %r2,%r2 # const struct perf_event_attr * lgfr %r3,%r3 # pid_t lgfr %r4,%r4 # int lgfr %r5,%r5 # int llgfr %r6,%r6 # unsigned long - jg sys_perf_counter_open # branch to system call + jg sys_perf_event_open # branch to system call diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index ad1acd20038..0b5083681e7 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -339,4 +339,4 @@ SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper) SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper) SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper) SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */ -SYSCALL(sys_perf_counter_open,sys_perf_counter_open,sys_perf_counter_open_wrapper) +SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1abbadd497e..6d507462967 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -10,7 +10,7 @@ * Copyright (C) 1995 Linus Torvalds */ -#include +#include #include #include #include @@ -306,7 +306,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write) * interrupts again and then search the VMAs */ local_irq_enable(); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); down_read(&mm->mmap_sem); si_code = SEGV_MAPERR; @@ -366,11 +366,11 @@ good_area: } if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } up_read(&mm->mmap_sem); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 4df3570fe51..b940424f8cc 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -16,7 +16,7 @@ config SUPERH select HAVE_IOREMAP_PROT if MMU select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA diff --git a/arch/sh/include/asm/perf_counter.h b/arch/sh/include/asm/perf_counter.h deleted file mode 100644 index d8e6bb9c0cc..00000000000 --- a/arch/sh/include/asm/perf_counter.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_SH_PERF_COUNTER_H -#define __ASM_SH_PERF_COUNTER_H - -/* SH only supports software counters through this interface. */ -static inline void set_perf_counter_pending(void) {} - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#endif /* __ASM_SH_PERF_COUNTER_H */ diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h new file mode 100644 index 00000000000..11a302297ab --- /dev/null +++ b/arch/sh/include/asm/perf_event.h @@ -0,0 +1,9 @@ +#ifndef __ASM_SH_PERF_EVENT_H +#define __ASM_SH_PERF_EVENT_H + +/* SH only supports software events through this interface. */ +static inline void set_perf_event_pending(void) {} + +#define PERF_EVENT_INDEX_OFFSET 0 + +#endif /* __ASM_SH_PERF_EVENT_H */ diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h index 925dd40d9d5..f3fd1b9eb6b 100644 --- a/arch/sh/include/asm/unistd_32.h +++ b/arch/sh/include/asm/unistd_32.h @@ -344,7 +344,7 @@ #define __NR_preadv 333 #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 -#define __NR_perf_counter_open 336 +#define __NR_perf_event_open 336 #define NR_syscalls 337 diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h index 2b84bc916bc..343ce8f073e 100644 --- a/arch/sh/include/asm/unistd_64.h +++ b/arch/sh/include/asm/unistd_64.h @@ -384,7 +384,7 @@ #define __NR_preadv 361 #define __NR_pwritev 362 #define __NR_rt_tgsigqueueinfo 363 -#define __NR_perf_counter_open 364 +#define __NR_perf_event_open 364 #ifdef __KERNEL__ diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S index 16ba225ede8..19fd11dd987 100644 --- a/arch/sh/kernel/syscalls_32.S +++ b/arch/sh/kernel/syscalls_32.S @@ -352,4 +352,4 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S index af6fb7410c2..5bfde6c7749 100644 --- a/arch/sh/kernel/syscalls_64.S +++ b/arch/sh/kernel/syscalls_64.S @@ -390,4 +390,4 @@ sys_call_table: .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c index 781b413ff82..47530104e0a 100644 --- a/arch/sh/mm/fault_32.c +++ b/arch/sh/mm/fault_32.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -157,7 +157,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if ((regs->sr & SR_IMASK) != SR_IMASK) local_irq_enable(); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -208,11 +208,11 @@ survive: } if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c index 2dcc48528f7..de0b0e88182 100644 --- a/arch/sh/mm/tlbflush_64.c +++ b/arch/sh/mm/tlbflush_64.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess, /* Not an IO address, so reenable interrupts */ local_irq_enable(); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt or have no user @@ -201,11 +201,11 @@ survive: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 86b82348b97..97fca4695e0 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -25,7 +25,7 @@ config SPARC select ARCH_WANT_OPTIONAL_GPIOLIB select RTC_CLASS select RTC_DRV_M48T59 - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG @@ -47,7 +47,7 @@ config SPARC64 select RTC_DRV_BQ4802 select RTC_DRV_SUN4V select RTC_DRV_STARFIRE - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config ARCH_DEFCONFIG string diff --git a/arch/sparc/include/asm/perf_counter.h b/arch/sparc/include/asm/perf_counter.h deleted file mode 100644 index 5d7a8ca0e49..00000000000 --- a/arch/sparc/include/asm/perf_counter.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __ASM_SPARC_PERF_COUNTER_H -#define __ASM_SPARC_PERF_COUNTER_H - -extern void set_perf_counter_pending(void); - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#ifdef CONFIG_PERF_COUNTERS -extern void init_hw_perf_counters(void); -#else -static inline void init_hw_perf_counters(void) { } -#endif - -#endif diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h new file mode 100644 index 00000000000..7e2669894ce --- /dev/null +++ b/arch/sparc/include/asm/perf_event.h @@ -0,0 +1,14 @@ +#ifndef __ASM_SPARC_PERF_EVENT_H +#define __ASM_SPARC_PERF_EVENT_H + +extern void set_perf_event_pending(void); + +#define PERF_EVENT_INDEX_OFFSET 0 + +#ifdef CONFIG_PERF_EVENTS +extern void init_hw_perf_events(void); +#else +static inline void init_hw_perf_events(void) { } +#endif + +#endif diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index 706df669f3b..42f2316c3ea 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -395,7 +395,7 @@ #define __NR_preadv 324 #define __NR_pwritev 325 #define __NR_rt_tgsigqueueinfo 326 -#define __NR_perf_counter_open 327 +#define __NR_perf_event_open 327 #define NR_SYSCALLS 328 diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 247cc620cee..3a048fad7ee 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -104,5 +104,5 @@ obj-$(CONFIG_AUDIT) += audit.o audit--$(CONFIG_AUDIT) := compat_audit.o obj-$(CONFIG_COMPAT) += $(audit--y) -pc--$(CONFIG_PERF_COUNTERS) := perf_counter.o +pc--$(CONFIG_PERF_EVENTS) := perf_event.o obj-$(CONFIG_SPARC64) += $(pc--y) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 378eb53e077..b129611590a 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include @@ -265,7 +265,7 @@ int __init nmi_init(void) } } if (!err) - init_hw_perf_counters(); + init_hw_perf_events(); return err; } diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 68ff0010707..2d94e7a03af 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include @@ -15,7 +15,7 @@ /* This code is shared between various users of the performance * counters. Users will be oprofile, pseudo-NMI watchdog, and the - * perf_counter support layer. + * perf_event support layer. */ #define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) @@ -42,14 +42,14 @@ void deferred_pcr_work_irq(int irq, struct pt_regs *regs) old_regs = set_irq_regs(regs); irq_enter(); -#ifdef CONFIG_PERF_COUNTERS - perf_counter_do_pending(); +#ifdef CONFIG_PERF_EVENTS + perf_event_do_pending(); #endif irq_exit(); set_irq_regs(old_regs); } -void set_perf_counter_pending(void) +void set_perf_event_pending(void) { set_softint(1 << PIL_DEFERRED_PCR_WORK); } diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c deleted file mode 100644 index b1265ce8a05..00000000000 --- a/arch/sparc/kernel/perf_counter.c +++ /dev/null @@ -1,556 +0,0 @@ -/* Performance counter support for sparc64. - * - * Copyright (C) 2009 David S. Miller - * - * This code is based almost entirely upon the x86 perf counter - * code, which is: - * - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* Sparc64 chips have two performance counters, 32-bits each, with - * overflow interrupts generated on transition from 0xffffffff to 0. - * The counters are accessed in one go using a 64-bit register. - * - * Both counters are controlled using a single control register. The - * only way to stop all sampling is to clear all of the context (user, - * supervisor, hypervisor) sampling enable bits. But these bits apply - * to both counters, thus the two counters can't be enabled/disabled - * individually. - * - * The control register has two event fields, one for each of the two - * counters. It's thus nearly impossible to have one counter going - * while keeping the other one stopped. Therefore it is possible to - * get overflow interrupts for counters not currently "in use" and - * that condition must be checked in the overflow interrupt handler. - * - * So we use a hack, in that we program inactive counters with the - * "sw_count0" and "sw_count1" events. These count how many times - * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an - * unusual way to encode a NOP and therefore will not trigger in - * normal code. - */ - -#define MAX_HWCOUNTERS 2 -#define MAX_PERIOD ((1UL << 32) - 1) - -#define PIC_UPPER_INDEX 0 -#define PIC_LOWER_INDEX 1 - -struct cpu_hw_counters { - struct perf_counter *counters[MAX_HWCOUNTERS]; - unsigned long used_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)]; - unsigned long active_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)]; - int enabled; -}; -DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, }; - -struct perf_event_map { - u16 encoding; - u8 pic_mask; -#define PIC_NONE 0x00 -#define PIC_UPPER 0x01 -#define PIC_LOWER 0x02 -}; - -struct sparc_pmu { - const struct perf_event_map *(*event_map)(int); - int max_events; - int upper_shift; - int lower_shift; - int event_mask; - int hv_bit; - int irq_bit; - int upper_nop; - int lower_nop; -}; - -static const struct perf_event_map ultra3i_perfmon_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, - [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, -}; - -static const struct perf_event_map *ultra3i_event_map(int event) -{ - return &ultra3i_perfmon_event_map[event]; -} - -static const struct sparc_pmu ultra3i_pmu = { - .event_map = ultra3i_event_map, - .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), - .upper_shift = 11, - .lower_shift = 4, - .event_mask = 0x3f, - .upper_nop = 0x1c, - .lower_nop = 0x14, -}; - -static const struct perf_event_map niagara2_perfmon_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, -}; - -static const struct perf_event_map *niagara2_event_map(int event) -{ - return &niagara2_perfmon_event_map[event]; -} - -static const struct sparc_pmu niagara2_pmu = { - .event_map = niagara2_event_map, - .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), - .upper_shift = 19, - .lower_shift = 6, - .event_mask = 0xfff, - .hv_bit = 0x8, - .irq_bit = 0x03, - .upper_nop = 0x220, - .lower_nop = 0x220, -}; - -static const struct sparc_pmu *sparc_pmu __read_mostly; - -static u64 event_encoding(u64 event, int idx) -{ - if (idx == PIC_UPPER_INDEX) - event <<= sparc_pmu->upper_shift; - else - event <<= sparc_pmu->lower_shift; - return event; -} - -static u64 mask_for_index(int idx) -{ - return event_encoding(sparc_pmu->event_mask, idx); -} - -static u64 nop_for_index(int idx) -{ - return event_encoding(idx == PIC_UPPER_INDEX ? - sparc_pmu->upper_nop : - sparc_pmu->lower_nop, idx); -} - -static inline void sparc_pmu_enable_counter(struct hw_perf_counter *hwc, - int idx) -{ - u64 val, mask = mask_for_index(idx); - - val = pcr_ops->read(); - pcr_ops->write((val & ~mask) | hwc->config); -} - -static inline void sparc_pmu_disable_counter(struct hw_perf_counter *hwc, - int idx) -{ - u64 mask = mask_for_index(idx); - u64 nop = nop_for_index(idx); - u64 val = pcr_ops->read(); - - pcr_ops->write((val & ~mask) | nop); -} - -void hw_perf_enable(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - int i; - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - val = pcr_ops->read(); - - for (i = 0; i < MAX_HWCOUNTERS; i++) { - struct perf_counter *cp = cpuc->counters[i]; - struct hw_perf_counter *hwc; - - if (!cp) - continue; - hwc = &cp->hw; - val |= hwc->config_base; - } - - pcr_ops->write(val); -} - -void hw_perf_disable(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - - val = pcr_ops->read(); - val &= ~(PCR_UTRACE | PCR_STRACE | - sparc_pmu->hv_bit | sparc_pmu->irq_bit); - pcr_ops->write(val); -} - -static u32 read_pmc(int idx) -{ - u64 val; - - read_pic(val); - if (idx == PIC_UPPER_INDEX) - val >>= 32; - - return val & 0xffffffff; -} - -static void write_pmc(int idx, u64 val) -{ - u64 shift, mask, pic; - - shift = 0; - if (idx == PIC_UPPER_INDEX) - shift = 32; - - mask = ((u64) 0xffffffff) << shift; - val <<= shift; - - read_pic(pic); - pic &= ~mask; - pic |= val; - write_pic(pic); -} - -static int sparc_perf_counter_set_period(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int ret = 0; - - if (unlikely(left <= -period)) { - left = period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - if (left > MAX_PERIOD) - left = MAX_PERIOD; - - atomic64_set(&hwc->prev_count, (u64)-left); - - write_pmc(idx, (u64)(-left) & 0xffffffff); - - perf_counter_update_userpage(counter); - - return ret; -} - -static int sparc_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - - if (test_and_set_bit(idx, cpuc->used_mask)) - return -EAGAIN; - - sparc_pmu_disable_counter(hwc, idx); - - cpuc->counters[idx] = counter; - set_bit(idx, cpuc->active_mask); - - sparc_perf_counter_set_period(counter, hwc, idx); - sparc_pmu_enable_counter(hwc, idx); - perf_counter_update_userpage(counter); - return 0; -} - -static u64 sparc_perf_counter_update(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - int shift = 64 - 32; - u64 prev_raw_count, new_raw_count; - s64 delta; - -again: - prev_raw_count = atomic64_read(&hwc->prev_count); - new_raw_count = read_pmc(idx); - - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - delta = (new_raw_count << shift) - (prev_raw_count << shift); - delta >>= shift; - - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static void sparc_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - - clear_bit(idx, cpuc->active_mask); - sparc_pmu_disable_counter(hwc, idx); - - barrier(); - - sparc_perf_counter_update(counter, hwc, idx); - cpuc->counters[idx] = NULL; - clear_bit(idx, cpuc->used_mask); - - perf_counter_update_userpage(counter); -} - -static void sparc_pmu_read(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - sparc_perf_counter_update(counter, hwc, hwc->idx); -} - -static void sparc_pmu_unthrottle(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - sparc_pmu_enable_counter(hwc, hwc->idx); -} - -static atomic_t active_counters = ATOMIC_INIT(0); -static DEFINE_MUTEX(pmc_grab_mutex); - -void perf_counter_grab_pmc(void) -{ - if (atomic_inc_not_zero(&active_counters)) - return; - - mutex_lock(&pmc_grab_mutex); - if (atomic_read(&active_counters) == 0) { - if (atomic_read(&nmi_active) > 0) { - on_each_cpu(stop_nmi_watchdog, NULL, 1); - BUG_ON(atomic_read(&nmi_active) != 0); - } - atomic_inc(&active_counters); - } - mutex_unlock(&pmc_grab_mutex); -} - -void perf_counter_release_pmc(void) -{ - if (atomic_dec_and_mutex_lock(&active_counters, &pmc_grab_mutex)) { - if (atomic_read(&nmi_active) == 0) - on_each_cpu(start_nmi_watchdog, NULL, 1); - mutex_unlock(&pmc_grab_mutex); - } -} - -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - perf_counter_release_pmc(); -} - -static int __hw_perf_counter_init(struct perf_counter *counter) -{ - struct perf_counter_attr *attr = &counter->attr; - struct hw_perf_counter *hwc = &counter->hw; - const struct perf_event_map *pmap; - u64 enc; - - if (atomic_read(&nmi_active) < 0) - return -ENODEV; - - if (attr->type != PERF_TYPE_HARDWARE) - return -EOPNOTSUPP; - - if (attr->config >= sparc_pmu->max_events) - return -EINVAL; - - perf_counter_grab_pmc(); - counter->destroy = hw_perf_counter_destroy; - - /* We save the enable bits in the config_base. So to - * turn off sampling just write 'config', and to enable - * things write 'config | config_base'. - */ - hwc->config_base = sparc_pmu->irq_bit; - if (!attr->exclude_user) - hwc->config_base |= PCR_UTRACE; - if (!attr->exclude_kernel) - hwc->config_base |= PCR_STRACE; - if (!attr->exclude_hv) - hwc->config_base |= sparc_pmu->hv_bit; - - if (!hwc->sample_period) { - hwc->sample_period = MAX_PERIOD; - hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); - } - - pmap = sparc_pmu->event_map(attr->config); - - enc = pmap->encoding; - if (pmap->pic_mask & PIC_UPPER) { - hwc->idx = PIC_UPPER_INDEX; - enc <<= sparc_pmu->upper_shift; - } else { - hwc->idx = PIC_LOWER_INDEX; - enc <<= sparc_pmu->lower_shift; - } - - hwc->config |= enc; - return 0; -} - -static const struct pmu pmu = { - .enable = sparc_pmu_enable, - .disable = sparc_pmu_disable, - .read = sparc_pmu_read, - .unthrottle = sparc_pmu_unthrottle, -}; - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - int err = __hw_perf_counter_init(counter); - - if (err) - return ERR_PTR(err); - return &pmu; -} - -void perf_counter_print_debug(void) -{ - unsigned long flags; - u64 pcr, pic; - int cpu; - - if (!sparc_pmu) - return; - - local_irq_save(flags); - - cpu = smp_processor_id(); - - pcr = pcr_ops->read(); - read_pic(pic); - - pr_info("\n"); - pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", - cpu, pcr, pic); - - local_irq_restore(flags); -} - -static int __kprobes perf_counter_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) -{ - struct die_args *args = __args; - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct pt_regs *regs; - int idx; - - if (!atomic_read(&active_counters)) - return NOTIFY_DONE; - - switch (cmd) { - case DIE_NMI: - break; - - default: - return NOTIFY_DONE; - } - - regs = args->regs; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - for (idx = 0; idx < MAX_HWCOUNTERS; idx++) { - struct perf_counter *counter = cpuc->counters[idx]; - struct hw_perf_counter *hwc; - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - hwc = &counter->hw; - val = sparc_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << 31)) - continue; - - data.period = counter->hw.last_period; - if (!sparc_perf_counter_set_period(counter, hwc, idx)) - continue; - - if (perf_counter_overflow(counter, 1, &data, regs)) - sparc_pmu_disable_counter(hwc, idx); - } - - return NOTIFY_STOP; -} - -static __read_mostly struct notifier_block perf_counter_nmi_notifier = { - .notifier_call = perf_counter_nmi_handler, -}; - -static bool __init supported_pmu(void) -{ - if (!strcmp(sparc_pmu_type, "ultra3i")) { - sparc_pmu = &ultra3i_pmu; - return true; - } - if (!strcmp(sparc_pmu_type, "niagara2")) { - sparc_pmu = &niagara2_pmu; - return true; - } - return false; -} - -void __init init_hw_perf_counters(void) -{ - pr_info("Performance counters: "); - - if (!supported_pmu()) { - pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); - return; - } - - pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); - - /* All sparc64 PMUs currently have 2 counters. But this simple - * driver only supports one active counter at a time. - */ - perf_max_counters = 1; - - register_die_notifier(&perf_counter_nmi_notifier); -} diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c new file mode 100644 index 00000000000..2d6a1b10c81 --- /dev/null +++ b/arch/sparc/kernel/perf_event.c @@ -0,0 +1,556 @@ +/* Performance event support for sparc64. + * + * Copyright (C) 2009 David S. Miller + * + * This code is based almost entirely upon the x86 perf event + * code, which is: + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Sparc64 chips have two performance counters, 32-bits each, with + * overflow interrupts generated on transition from 0xffffffff to 0. + * The counters are accessed in one go using a 64-bit register. + * + * Both counters are controlled using a single control register. The + * only way to stop all sampling is to clear all of the context (user, + * supervisor, hypervisor) sampling enable bits. But these bits apply + * to both counters, thus the two counters can't be enabled/disabled + * individually. + * + * The control register has two event fields, one for each of the two + * counters. It's thus nearly impossible to have one counter going + * while keeping the other one stopped. Therefore it is possible to + * get overflow interrupts for counters not currently "in use" and + * that condition must be checked in the overflow interrupt handler. + * + * So we use a hack, in that we program inactive counters with the + * "sw_count0" and "sw_count1" events. These count how many times + * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an + * unusual way to encode a NOP and therefore will not trigger in + * normal code. + */ + +#define MAX_HWEVENTS 2 +#define MAX_PERIOD ((1UL << 32) - 1) + +#define PIC_UPPER_INDEX 0 +#define PIC_LOWER_INDEX 1 + +struct cpu_hw_events { + struct perf_event *events[MAX_HWEVENTS]; + unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + int enabled; +}; +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; + +struct perf_event_map { + u16 encoding; + u8 pic_mask; +#define PIC_NONE 0x00 +#define PIC_UPPER 0x01 +#define PIC_LOWER 0x02 +}; + +struct sparc_pmu { + const struct perf_event_map *(*event_map)(int); + int max_events; + int upper_shift; + int lower_shift; + int event_mask; + int hv_bit; + int irq_bit; + int upper_nop; + int lower_nop; +}; + +static const struct perf_event_map ultra3i_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, +}; + +static const struct perf_event_map *ultra3i_event_map(int event_id) +{ + return &ultra3i_perfmon_event_map[event_id]; +} + +static const struct sparc_pmu ultra3i_pmu = { + .event_map = ultra3i_event_map, + .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), + .upper_shift = 11, + .lower_shift = 4, + .event_mask = 0x3f, + .upper_nop = 0x1c, + .lower_nop = 0x14, +}; + +static const struct perf_event_map niagara2_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, +}; + +static const struct perf_event_map *niagara2_event_map(int event_id) +{ + return &niagara2_perfmon_event_map[event_id]; +} + +static const struct sparc_pmu niagara2_pmu = { + .event_map = niagara2_event_map, + .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), + .upper_shift = 19, + .lower_shift = 6, + .event_mask = 0xfff, + .hv_bit = 0x8, + .irq_bit = 0x03, + .upper_nop = 0x220, + .lower_nop = 0x220, +}; + +static const struct sparc_pmu *sparc_pmu __read_mostly; + +static u64 event_encoding(u64 event_id, int idx) +{ + if (idx == PIC_UPPER_INDEX) + event_id <<= sparc_pmu->upper_shift; + else + event_id <<= sparc_pmu->lower_shift; + return event_id; +} + +static u64 mask_for_index(int idx) +{ + return event_encoding(sparc_pmu->event_mask, idx); +} + +static u64 nop_for_index(int idx) +{ + return event_encoding(idx == PIC_UPPER_INDEX ? + sparc_pmu->upper_nop : + sparc_pmu->lower_nop, idx); +} + +static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, + int idx) +{ + u64 val, mask = mask_for_index(idx); + + val = pcr_ops->read(); + pcr_ops->write((val & ~mask) | hwc->config); +} + +static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + u64 mask = mask_for_index(idx); + u64 nop = nop_for_index(idx); + u64 val = pcr_ops->read(); + + pcr_ops->write((val & ~mask) | nop); +} + +void hw_perf_enable(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + int i; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + val = pcr_ops->read(); + + for (i = 0; i < MAX_HWEVENTS; i++) { + struct perf_event *cp = cpuc->events[i]; + struct hw_perf_event *hwc; + + if (!cp) + continue; + hwc = &cp->hw; + val |= hwc->config_base; + } + + pcr_ops->write(val); +} + +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + + val = pcr_ops->read(); + val &= ~(PCR_UTRACE | PCR_STRACE | + sparc_pmu->hv_bit | sparc_pmu->irq_bit); + pcr_ops->write(val); +} + +static u32 read_pmc(int idx) +{ + u64 val; + + read_pic(val); + if (idx == PIC_UPPER_INDEX) + val >>= 32; + + return val & 0xffffffff; +} + +static void write_pmc(int idx, u64 val) +{ + u64 shift, mask, pic; + + shift = 0; + if (idx == PIC_UPPER_INDEX) + shift = 32; + + mask = ((u64) 0xffffffff) << shift; + val <<= shift; + + read_pic(pic); + pic &= ~mask; + pic |= val; + write_pic(pic); +} + +static int sparc_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + s64 left = atomic64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + if (left > MAX_PERIOD) + left = MAX_PERIOD; + + atomic64_set(&hwc->prev_count, (u64)-left); + + write_pmc(idx, (u64)(-left) & 0xffffffff); + + perf_event_update_userpage(event); + + return ret; +} + +static int sparc_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (test_and_set_bit(idx, cpuc->used_mask)) + return -EAGAIN; + + sparc_pmu_disable_event(hwc, idx); + + cpuc->events[idx] = event; + set_bit(idx, cpuc->active_mask); + + sparc_perf_event_set_period(event, hwc, idx); + sparc_pmu_enable_event(hwc, idx); + perf_event_update_userpage(event); + return 0; +} + +static u64 sparc_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + int shift = 64 - 32; + u64 prev_raw_count, new_raw_count; + s64 delta; + +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + new_raw_count = read_pmc(idx); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + atomic64_add(delta, &event->count); + atomic64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static void sparc_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + clear_bit(idx, cpuc->active_mask); + sparc_pmu_disable_event(hwc, idx); + + barrier(); + + sparc_perf_event_update(event, hwc, idx); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +static void sparc_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + sparc_perf_event_update(event, hwc, hwc->idx); +} + +static void sparc_pmu_unthrottle(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + sparc_pmu_enable_event(hwc, hwc->idx); +} + +static atomic_t active_events = ATOMIC_INIT(0); +static DEFINE_MUTEX(pmc_grab_mutex); + +void perf_event_grab_pmc(void) +{ + if (atomic_inc_not_zero(&active_events)) + return; + + mutex_lock(&pmc_grab_mutex); + if (atomic_read(&active_events) == 0) { + if (atomic_read(&nmi_active) > 0) { + on_each_cpu(stop_nmi_watchdog, NULL, 1); + BUG_ON(atomic_read(&nmi_active) != 0); + } + atomic_inc(&active_events); + } + mutex_unlock(&pmc_grab_mutex); +} + +void perf_event_release_pmc(void) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) { + if (atomic_read(&nmi_active) == 0) + on_each_cpu(start_nmi_watchdog, NULL, 1); + mutex_unlock(&pmc_grab_mutex); + } +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + perf_event_release_pmc(); +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + const struct perf_event_map *pmap; + u64 enc; + + if (atomic_read(&nmi_active) < 0) + return -ENODEV; + + if (attr->type != PERF_TYPE_HARDWARE) + return -EOPNOTSUPP; + + if (attr->config >= sparc_pmu->max_events) + return -EINVAL; + + perf_event_grab_pmc(); + event->destroy = hw_perf_event_destroy; + + /* We save the enable bits in the config_base. So to + * turn off sampling just write 'config', and to enable + * things write 'config | config_base'. + */ + hwc->config_base = sparc_pmu->irq_bit; + if (!attr->exclude_user) + hwc->config_base |= PCR_UTRACE; + if (!attr->exclude_kernel) + hwc->config_base |= PCR_STRACE; + if (!attr->exclude_hv) + hwc->config_base |= sparc_pmu->hv_bit; + + if (!hwc->sample_period) { + hwc->sample_period = MAX_PERIOD; + hwc->last_period = hwc->sample_period; + atomic64_set(&hwc->period_left, hwc->sample_period); + } + + pmap = sparc_pmu->event_map(attr->config); + + enc = pmap->encoding; + if (pmap->pic_mask & PIC_UPPER) { + hwc->idx = PIC_UPPER_INDEX; + enc <<= sparc_pmu->upper_shift; + } else { + hwc->idx = PIC_LOWER_INDEX; + enc <<= sparc_pmu->lower_shift; + } + + hwc->config |= enc; + return 0; +} + +static const struct pmu pmu = { + .enable = sparc_pmu_enable, + .disable = sparc_pmu_disable, + .read = sparc_pmu_read, + .unthrottle = sparc_pmu_unthrottle, +}; + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + int err = __hw_perf_event_init(event); + + if (err) + return ERR_PTR(err); + return &pmu; +} + +void perf_event_print_debug(void) +{ + unsigned long flags; + u64 pcr, pic; + int cpu; + + if (!sparc_pmu) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + pcr = pcr_ops->read(); + read_pic(pic); + + pr_info("\n"); + pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", + cpu, pcr, pic); + + local_irq_restore(flags); +} + +static int __kprobes perf_event_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *__args) +{ + struct die_args *args = __args; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + if (!atomic_read(&active_events)) + return NOTIFY_DONE; + + switch (cmd) { + case DIE_NMI: + break; + + default: + return NOTIFY_DONE; + } + + regs = args->regs; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx < MAX_HWEVENTS; idx++) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + hwc = &event->hw; + val = sparc_perf_event_update(event, hwc, idx); + if (val & (1ULL << 31)) + continue; + + data.period = event->hw.last_period; + if (!sparc_perf_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 1, &data, regs)) + sparc_pmu_disable_event(hwc, idx); + } + + return NOTIFY_STOP; +} + +static __read_mostly struct notifier_block perf_event_nmi_notifier = { + .notifier_call = perf_event_nmi_handler, +}; + +static bool __init supported_pmu(void) +{ + if (!strcmp(sparc_pmu_type, "ultra3i")) { + sparc_pmu = &ultra3i_pmu; + return true; + } + if (!strcmp(sparc_pmu_type, "niagara2")) { + sparc_pmu = &niagara2_pmu; + return true; + } + return false; +} + +void __init init_hw_perf_events(void) +{ + pr_info("Performance events: "); + + if (!supported_pmu()) { + pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); + return; + } + + pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); + + /* All sparc64 PMUs currently have 2 events. But this simple + * driver only supports one active event at a time. + */ + perf_max_events = 1; + + register_die_notifier(&perf_event_nmi_notifier); +} diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 04181577cb6..0f1658d3749 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -82,5 +82,5 @@ sys_call_table: /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv -/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open +/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 91b06b7f7ac..009825f6e73 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -83,7 +83,7 @@ sys_call_table32: /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv - .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_counter_open + .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open #endif /* CONFIG_COMPAT */ @@ -158,4 +158,4 @@ sys_call_table: /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv - .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open + .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 51c59015b28..e4ff5d1280c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -24,7 +24,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE select HAVE_OPROFILE - select HAVE_PERF_COUNTERS if (!M386 && !M486) + select HAVE_PERF_EVENTS if (!M386 && !M486) select HAVE_IOREMAP_PROT select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index ba331bfd111..74619c4f9fd 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -831,5 +831,5 @@ ia32_sys_call_table: .quad compat_sys_preadv .quad compat_sys_pwritev .quad compat_sys_rt_tgsigqueueinfo /* 335 */ - .quad sys_perf_counter_open + .quad sys_perf_event_open ia32_syscall_end: diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 5e3f2044f0d..f5693c81a1d 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -49,7 +49,7 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) #endif diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h deleted file mode 100644 index e7b7c938ae2..00000000000 --- a/arch/x86/include/asm/perf_counter.h +++ /dev/null @@ -1,108 +0,0 @@ -#ifndef _ASM_X86_PERF_COUNTER_H -#define _ASM_X86_PERF_COUNTER_H - -/* - * Performance counter hw details: - */ - -#define X86_PMC_MAX_GENERIC 8 -#define X86_PMC_MAX_FIXED 3 - -#define X86_PMC_IDX_GENERIC 0 -#define X86_PMC_IDX_FIXED 32 -#define X86_PMC_IDX_MAX 64 - -#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 -#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 - -#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 -#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 - -#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) -#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) -#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) -#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) - -/* - * Includes eventsel and unit mask as well: - */ -#define ARCH_PERFMON_EVENT_MASK 0xffff - -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ - (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) - -#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 - -/* - * Intel "Architectural Performance Monitoring" CPUID - * detection/enumeration details: - */ -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - -union cpuid10_edx { - struct { - unsigned int num_counters_fixed:4; - unsigned int reserved:28; - } split; - unsigned int full; -}; - - -/* - * Fixed-purpose performance counters: - */ - -/* - * All 3 fixed-mode PMCs are configured via this single MSR: - */ -#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d - -/* - * The counts are available in three separate MSRs: - */ - -/* Instr_Retired.Any: */ -#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 -#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) - -/* CPU_CLK_Unhalted.Core: */ -#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a -#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) - -/* CPU_CLK_Unhalted.Ref: */ -#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b -#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) - -/* - * We model BTS tracing as another fixed-mode PMC. - * - * We choose a value in the middle of the fixed counter range, since lower - * values are used by actual fixed counters and higher values are used - * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. - */ -#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) - - -#ifdef CONFIG_PERF_COUNTERS -extern void init_hw_perf_counters(void); -extern void perf_counters_lapic_init(void); - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#else -static inline void init_hw_perf_counters(void) { } -static inline void perf_counters_lapic_init(void) { } -#endif - -#endif /* _ASM_X86_PERF_COUNTER_H */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h new file mode 100644 index 00000000000..ad7ce3fd506 --- /dev/null +++ b/arch/x86/include/asm/perf_event.h @@ -0,0 +1,108 @@ +#ifndef _ASM_X86_PERF_EVENT_H +#define _ASM_X86_PERF_EVENT_H + +/* + * Performance event hw details: + */ + +#define X86_PMC_MAX_GENERIC 8 +#define X86_PMC_MAX_FIXED 3 + +#define X86_PMC_IDX_GENERIC 0 +#define X86_PMC_IDX_FIXED 32 +#define X86_PMC_IDX_MAX 64 + +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 + +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 + +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) + +/* + * Includes eventsel and unit mask as well: + */ +#define ARCH_PERFMON_EVENT_MASK 0xffff + +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ + (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 + +/* + * Intel "Architectural Performance Monitoring" CPUID + * detection/enumeration details: + */ +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_events:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +union cpuid10_edx { + struct { + unsigned int num_events_fixed:4; + unsigned int reserved:28; + } split; + unsigned int full; +}; + + +/* + * Fixed-purpose performance events: + */ + +/* + * All 3 fixed-mode PMCs are configured via this single MSR: + */ +#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d + +/* + * The counts are available in three separate MSRs: + */ + +/* Instr_Retired.Any: */ +#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 +#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) + +/* CPU_CLK_Unhalted.Core: */ +#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a +#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) + +/* CPU_CLK_Unhalted.Ref: */ +#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b +#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) + +/* + * We model BTS tracing as another fixed-mode PMC. + * + * We choose a value in the middle of the fixed event range, since lower + * values are used by actual fixed events and higher values are used + * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. + */ +#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) + + +#ifdef CONFIG_PERF_EVENTS +extern void init_hw_perf_events(void); +extern void perf_events_lapic_init(void); + +#define PERF_EVENT_INDEX_OFFSET 0 + +#else +static inline void init_hw_perf_events(void) { } +static inline void perf_events_lapic_init(void) { } +#endif + +#endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 8deaada61bc..6fb3c209a7e 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -341,7 +341,7 @@ #define __NR_preadv 333 #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 -#define __NR_perf_counter_open 336 +#define __NR_perf_event_open 336 #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index b9f3c60de5f..8d3ad0adbc6 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -659,8 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv) __SYSCALL(__NR_pwritev, sys_pwritev) #define __NR_rt_tgsigqueueinfo 297 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) -#define __NR_perf_counter_open 298 -__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) +#define __NR_perf_event_open 298 +__SYSCALL(__NR_perf_event_open, sys_perf_event_open) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a34601f5298..754174d09de 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -14,7 +14,7 @@ * Mikael Pettersson : PM converted to driver model. */ -#include +#include #include #include #include @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include #include @@ -1189,7 +1189,7 @@ void __cpuinit setup_local_APIC(void) apic_write(APIC_ESR, 0); } #endif - perf_counters_lapic_init(); + perf_events_lapic_init(); preempt_disable(); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 8dd30638fe4..68537e957a9 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2fea97eccf7..cc25c2b4a56 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include #include @@ -869,7 +869,7 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - init_hw_perf_counters(); + init_hw_perf_events(); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c deleted file mode 100644 index b1f115696c8..00000000000 --- a/arch/x86/kernel/cpu/perf_counter.c +++ /dev/null @@ -1,2298 +0,0 @@ -/* - * Performance counter x86 architecture code - * - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - * Copyright (C) 2009 Intel Corporation, - * - * For licencing details see kernel-base/COPYING - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static u64 perf_counter_mask __read_mostly; - -/* The maximal number of PEBS counters: */ -#define MAX_PEBS_COUNTERS 4 - -/* The size of a BTS record in bytes: */ -#define BTS_RECORD_SIZE 24 - -/* The size of a per-cpu BTS buffer in bytes: */ -#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) - -/* The BTS overflow threshold in bytes from the end of the buffer: */ -#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) - - -/* - * Bits in the debugctlmsr controlling branch tracing. - */ -#define X86_DEBUGCTL_TR (1 << 6) -#define X86_DEBUGCTL_BTS (1 << 7) -#define X86_DEBUGCTL_BTINT (1 << 8) -#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) -#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) - -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; -}; - -struct cpu_hw_counters { - struct perf_counter *counters[X86_PMC_IDX_MAX]; - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long interrupts; - int enabled; - struct debug_store *ds; -}; - -/* - * struct x86_pmu - generic x86 pmu - */ -struct x86_pmu { - const char *name; - int version; - int (*handle_irq)(struct pt_regs *); - void (*disable_all)(void); - void (*enable_all)(void); - void (*enable)(struct hw_perf_counter *, int); - void (*disable)(struct hw_perf_counter *, int); - unsigned eventsel; - unsigned perfctr; - u64 (*event_map)(int); - u64 (*raw_event)(u64); - int max_events; - int num_counters; - int num_counters_fixed; - int counter_bits; - u64 counter_mask; - int apic; - u64 max_period; - u64 intel_ctrl; - void (*enable_bts)(u64 config); - void (*disable_bts)(void); -}; - -static struct x86_pmu x86_pmu __read_mostly; - -static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { - .enabled = 1, -}; - -/* - * Not sure about some of these - */ -static const u64 p6_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, -}; - -static u64 p6_pmu_event_map(int hw_event) -{ - return p6_perfmon_event_map[hw_event]; -} - -/* - * Counter setting that is specified not to count anything. - * We use this to effectively disable a counter. - * - * L2_RQSTS with 0 MESI unit mask. - */ -#define P6_NOP_COUNTER 0x0000002EULL - -static u64 p6_pmu_raw_event(u64 hw_event) -{ -#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL -#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL -#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL -#define P6_EVNTSEL_INV_MASK 0x00800000ULL -#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL - -#define P6_EVNTSEL_MASK \ - (P6_EVNTSEL_EVENT_MASK | \ - P6_EVNTSEL_UNIT_MASK | \ - P6_EVNTSEL_EDGE_MASK | \ - P6_EVNTSEL_INV_MASK | \ - P6_EVNTSEL_COUNTER_MASK) - - return hw_event & P6_EVNTSEL_MASK; -} - - -/* - * Intel PerfMon v3. Used on Core2 and later. - */ -static const u64 intel_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, -}; - -static u64 intel_pmu_event_map(int hw_event) -{ - return intel_perfmon_event_map[hw_event]; -} - -/* - * Generalized hw caching related hw_event table, filled - * in on a per model basis. A value of 0 means - * 'not supported', -1 means 'hw_event makes no sense on - * this CPU', any other value means the raw hw_event - * ID. - */ - -#define C(x) PERF_COUNT_HW_CACHE_##x - -static u64 __read_mostly hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - -static const u64 nehalem_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ - [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ - [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ - [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static const u64 core2_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ - [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ - [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static const u64 atom_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static u64 intel_pmu_raw_event(u64 hw_event) -{ -#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL -#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL -#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL -#define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL - -#define CORE_EVNTSEL_MASK \ - (CORE_EVNTSEL_EVENT_MASK | \ - CORE_EVNTSEL_UNIT_MASK | \ - CORE_EVNTSEL_EDGE_MASK | \ - CORE_EVNTSEL_INV_MASK | \ - CORE_EVNTSEL_COUNTER_MASK) - - return hw_event & CORE_EVNTSEL_MASK; -} - -static const u64 amd_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ - [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ - [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ - [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ - [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ - [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -/* - * AMD Performance Monitor K7 and later. - */ -static const u64 amd_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, -}; - -static u64 amd_pmu_event_map(int hw_event) -{ - return amd_perfmon_event_map[hw_event]; -} - -static u64 amd_pmu_raw_event(u64 hw_event) -{ -#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL -#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL -#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL -#define K7_EVNTSEL_INV_MASK 0x000800000ULL -#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL - -#define K7_EVNTSEL_MASK \ - (K7_EVNTSEL_EVENT_MASK | \ - K7_EVNTSEL_UNIT_MASK | \ - K7_EVNTSEL_EDGE_MASK | \ - K7_EVNTSEL_INV_MASK | \ - K7_EVNTSEL_COUNTER_MASK) - - return hw_event & K7_EVNTSEL_MASK; -} - -/* - * Propagate counter elapsed time into the generic counter. - * Can only be executed on the CPU where the counter is active. - * Returns the delta events processed. - */ -static u64 -x86_perf_counter_update(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - int shift = 64 - x86_pmu.counter_bits; - u64 prev_raw_count, new_raw_count; - s64 delta; - - if (idx == X86_PMC_IDX_FIXED_BTS) - return 0; - - /* - * Careful: an NMI might modify the previous counter value. - * - * Our tactic to handle this is to first atomically read and - * exchange a new raw count - then add that new-prev delta - * count to the generic counter atomically: - */ -again: - prev_raw_count = atomic64_read(&hwc->prev_count); - rdmsrl(hwc->counter_base + idx, new_raw_count); - - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - /* - * Now we have the new raw value and have updated the prev - * timestamp already. We can now calculate the elapsed delta - * (counter-)time and add that to the generic counter. - * - * Careful, not all hw sign-extends above the physical width - * of the count. - */ - delta = (new_raw_count << shift) - (prev_raw_count << shift); - delta >>= shift; - - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static atomic_t active_counters; -static DEFINE_MUTEX(pmc_reserve_mutex); - -static bool reserve_pmc_hardware(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - int i; - - if (nmi_watchdog == NMI_LOCAL_APIC) - disable_lapic_nmi_watchdog(); - - for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) - goto perfctr_fail; - } - - for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) - goto eventsel_fail; - } -#endif - - return true; - -#ifdef CONFIG_X86_LOCAL_APIC -eventsel_fail: - for (i--; i >= 0; i--) - release_evntsel_nmi(x86_pmu.eventsel + i); - - i = x86_pmu.num_counters; - -perfctr_fail: - for (i--; i >= 0; i--) - release_perfctr_nmi(x86_pmu.perfctr + i); - - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); - - return false; -#endif -} - -static void release_pmc_hardware(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - int i; - - for (i = 0; i < x86_pmu.num_counters; i++) { - release_perfctr_nmi(x86_pmu.perfctr + i); - release_evntsel_nmi(x86_pmu.eventsel + i); - } - - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); -#endif -} - -static inline bool bts_available(void) -{ - return x86_pmu.enable_bts != NULL; -} - -static inline void init_debug_store_on_cpu(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; - - if (!ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, - (u32)((u64)(unsigned long)ds), - (u32)((u64)(unsigned long)ds >> 32)); -} - -static inline void fini_debug_store_on_cpu(int cpu) -{ - if (!per_cpu(cpu_hw_counters, cpu).ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); -} - -static void release_bts_hardware(void) -{ - int cpu; - - if (!bts_available()) - return; - - get_online_cpus(); - - for_each_online_cpu(cpu) - fini_debug_store_on_cpu(cpu); - - for_each_possible_cpu(cpu) { - struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; - - if (!ds) - continue; - - per_cpu(cpu_hw_counters, cpu).ds = NULL; - - kfree((void *)(unsigned long)ds->bts_buffer_base); - kfree(ds); - } - - put_online_cpus(); -} - -static int reserve_bts_hardware(void) -{ - int cpu, err = 0; - - if (!bts_available()) - return 0; - - get_online_cpus(); - - for_each_possible_cpu(cpu) { - struct debug_store *ds; - void *buffer; - - err = -ENOMEM; - buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); - if (unlikely(!buffer)) - break; - - ds = kzalloc(sizeof(*ds), GFP_KERNEL); - if (unlikely(!ds)) { - kfree(buffer); - break; - } - - ds->bts_buffer_base = (u64)(unsigned long)buffer; - ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = - ds->bts_buffer_base + BTS_BUFFER_SIZE; - ds->bts_interrupt_threshold = - ds->bts_absolute_maximum - BTS_OVFL_TH; - - per_cpu(cpu_hw_counters, cpu).ds = ds; - err = 0; - } - - if (err) - release_bts_hardware(); - else { - for_each_online_cpu(cpu) - init_debug_store_on_cpu(cpu); - } - - put_online_cpus(); - - return err; -} - -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { - release_pmc_hardware(); - release_bts_hardware(); - mutex_unlock(&pmc_reserve_mutex); - } -} - -static inline int x86_pmu_initialized(void) -{ - return x86_pmu.handle_irq != NULL; -} - -static inline int -set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) -{ - unsigned int cache_type, cache_op, cache_result; - u64 config, val; - - config = attr->config; - - cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - val = hw_cache_event_ids[cache_type][cache_op][cache_result]; - - if (val == 0) - return -ENOENT; - - if (val == -1) - return -EINVAL; - - hwc->config |= val; - - return 0; -} - -static void intel_pmu_enable_bts(u64 config) -{ - unsigned long debugctlmsr; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr |= X86_DEBUGCTL_TR; - debugctlmsr |= X86_DEBUGCTL_BTS; - debugctlmsr |= X86_DEBUGCTL_BTINT; - - if (!(config & ARCH_PERFMON_EVENTSEL_OS)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; - - if (!(config & ARCH_PERFMON_EVENTSEL_USR)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; - - update_debugctlmsr(debugctlmsr); -} - -static void intel_pmu_disable_bts(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - unsigned long debugctlmsr; - - if (!cpuc->ds) - return; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr &= - ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | - X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); - - update_debugctlmsr(debugctlmsr); -} - -/* - * Setup the hardware configuration for a given attr_type - */ -static int __hw_perf_counter_init(struct perf_counter *counter) -{ - struct perf_counter_attr *attr = &counter->attr; - struct hw_perf_counter *hwc = &counter->hw; - u64 config; - int err; - - if (!x86_pmu_initialized()) - return -ENODEV; - - err = 0; - if (!atomic_inc_not_zero(&active_counters)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_counters) == 0) { - if (!reserve_pmc_hardware()) - err = -EBUSY; - else - err = reserve_bts_hardware(); - } - if (!err) - atomic_inc(&active_counters); - mutex_unlock(&pmc_reserve_mutex); - } - if (err) - return err; - - counter->destroy = hw_perf_counter_destroy; - - /* - * Generate PMC IRQs: - * (keep 'enabled' bit clear for now) - */ - hwc->config = ARCH_PERFMON_EVENTSEL_INT; - - /* - * Count user and OS events unless requested not to. - */ - if (!attr->exclude_user) - hwc->config |= ARCH_PERFMON_EVENTSEL_USR; - if (!attr->exclude_kernel) - hwc->config |= ARCH_PERFMON_EVENTSEL_OS; - - if (!hwc->sample_period) { - hwc->sample_period = x86_pmu.max_period; - hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); - } else { - /* - * If we have a PMU initialized but no APIC - * interrupts, we cannot sample hardware - * counters (user-space has to fall back and - * sample via a hrtimer based software counter): - */ - if (!x86_pmu.apic) - return -EOPNOTSUPP; - } - - /* - * Raw hw_event type provide the config in the hw_event structure - */ - if (attr->type == PERF_TYPE_RAW) { - hwc->config |= x86_pmu.raw_event(attr->config); - return 0; - } - - if (attr->type == PERF_TYPE_HW_CACHE) - return set_ext_hw_attr(hwc, attr); - - if (attr->config >= x86_pmu.max_events) - return -EINVAL; - - /* - * The generic map: - */ - config = x86_pmu.event_map(attr->config); - - if (config == 0) - return -ENOENT; - - if (config == -1LL) - return -EINVAL; - - /* - * Branch tracing: - */ - if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && - (hwc->sample_period == 1)) { - /* BTS is not supported by this architecture. */ - if (!bts_available()) - return -EOPNOTSUPP; - - /* BTS is currently only allowed for user-mode. */ - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) - return -EOPNOTSUPP; - } - - hwc->config |= config; - - return 0; -} - -static void p6_pmu_disable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - - /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); -} - -static void intel_pmu_disable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) - intel_pmu_disable_bts(); -} - -static void amd_pmu_disable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - int idx; - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - /* - * ensure we write the disable before we start disabling the - * counters proper, so that amd_pmu_enable_counter() does the - * right thing. - */ - barrier(); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - rdmsrl(MSR_K7_EVNTSEL0 + idx, val); - if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) - continue; - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); - } -} - -void hw_perf_disable(void) -{ - if (!x86_pmu_initialized()) - return; - return x86_pmu.disable_all(); -} - -static void p6_pmu_enable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - unsigned long val; - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); -} - -static void intel_pmu_enable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); - - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { - struct perf_counter *counter = - cpuc->counters[X86_PMC_IDX_FIXED_BTS]; - - if (WARN_ON_ONCE(!counter)) - return; - - intel_pmu_enable_bts(counter->hw.config); - } -} - -static void amd_pmu_enable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - int idx; - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct perf_counter *counter = cpuc->counters[idx]; - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - val = counter->hw.config; - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); - } -} - -void hw_perf_enable(void) -{ - if (!x86_pmu_initialized()) - return; - x86_pmu.enable_all(); -} - -static inline u64 intel_pmu_get_status(void) -{ - u64 status; - - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - - return status; -} - -static inline void intel_pmu_ack_status(u64 ack) -{ - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); -} - -static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - (void)checking_wrmsrl(hwc->config_base + idx, - hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); -} - -static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); -} - -static inline void -intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) -{ - int idx = __idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, mask; - - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - (void)checking_wrmsrl(hwc->config_base, ctrl_val); -} - -static inline void -p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val = P6_NOP_COUNTER; - - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - - (void)checking_wrmsrl(hwc->config_base + idx, val); -} - -static inline void -intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { - intel_pmu_disable_bts(); - return; - } - - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_disable_fixed(hwc, idx); - return; - } - - x86_pmu_disable_counter(hwc, idx); -} - -static inline void -amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - x86_pmu_disable_counter(hwc, idx); -} - -static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); - -/* - * Set the next IRQ period, based on the hwc->period_left value. - * To be called with the counter disabled in hw: - */ -static int -x86_perf_counter_set_period(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int err, ret = 0; - - if (idx == X86_PMC_IDX_FIXED_BTS) - return 0; - - /* - * If we are way outside a reasoable range then just skip forward: - */ - if (unlikely(left <= -period)) { - left = period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - /* - * Quirk: certain CPUs dont like it if just 1 hw_event is left: - */ - if (unlikely(left < 2)) - left = 2; - - if (left > x86_pmu.max_period) - left = x86_pmu.max_period; - - per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; - - /* - * The hw counter starts counting from this counter offset, - * mark it to be able to extra future deltas: - */ - atomic64_set(&hwc->prev_count, (u64)-left); - - err = checking_wrmsrl(hwc->counter_base + idx, - (u64)(-left) & x86_pmu.counter_mask); - - perf_counter_update_userpage(counter); - - return ret; -} - -static inline void -intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) -{ - int idx = __idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, bits, mask; - int err; - - /* - * Enable IRQ generation (0x8), - * and enable ring-3 counting (0x2) and ring-0 counting (0x1) - * if requested: - */ - bits = 0x8ULL; - if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) - bits |= 0x2; - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) - bits |= 0x1; - bits <<= (idx * 4); - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - ctrl_val |= bits; - err = checking_wrmsrl(hwc->config_base, ctrl_val); -} - -static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - - val = hwc->config; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - - (void)checking_wrmsrl(hwc->config_base + idx, val); -} - - -static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { - if (!__get_cpu_var(cpu_hw_counters).enabled) - return; - - intel_pmu_enable_bts(hwc->config); - return; - } - - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_enable_fixed(hwc, idx); - return; - } - - x86_pmu_enable_counter(hwc, idx); -} - -static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - if (cpuc->enabled) - x86_pmu_enable_counter(hwc, idx); -} - -static int -fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) -{ - unsigned int hw_event; - - hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; - - if (unlikely((hw_event == - x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && - (hwc->sample_period == 1))) - return X86_PMC_IDX_FIXED_BTS; - - if (!x86_pmu.num_counters_fixed) - return -1; - - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) - return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) - return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) - return X86_PMC_IDX_FIXED_BUS_CYCLES; - - return -1; -} - -/* - * Find a PMC slot for the freshly enabled / scheduled in counter: - */ -static int x86_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx; - - idx = fixed_mode_idx(counter, hwc); - if (idx == X86_PMC_IDX_FIXED_BTS) { - /* BTS is already occupied. */ - if (test_and_set_bit(idx, cpuc->used_mask)) - return -EAGAIN; - - hwc->config_base = 0; - hwc->counter_base = 0; - hwc->idx = idx; - } else if (idx >= 0) { - /* - * Try to get the fixed counter, if that is already taken - * then try to get a generic counter: - */ - if (test_and_set_bit(idx, cpuc->used_mask)) - goto try_generic; - - hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - /* - * We set it so that counter_base + idx in wrmsr/rdmsr maps to - * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: - */ - hwc->counter_base = - MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; - hwc->idx = idx; - } else { - idx = hwc->idx; - /* Try to get the previous generic counter again */ - if (test_and_set_bit(idx, cpuc->used_mask)) { -try_generic: - idx = find_first_zero_bit(cpuc->used_mask, - x86_pmu.num_counters); - if (idx == x86_pmu.num_counters) - return -EAGAIN; - - set_bit(idx, cpuc->used_mask); - hwc->idx = idx; - } - hwc->config_base = x86_pmu.eventsel; - hwc->counter_base = x86_pmu.perfctr; - } - - perf_counters_lapic_init(); - - x86_pmu.disable(hwc, idx); - - cpuc->counters[idx] = counter; - set_bit(idx, cpuc->active_mask); - - x86_perf_counter_set_period(counter, hwc, idx); - x86_pmu.enable(hwc, idx); - - perf_counter_update_userpage(counter); - - return 0; -} - -static void x86_pmu_unthrottle(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - - if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || - cpuc->counters[hwc->idx] != counter)) - return; - - x86_pmu.enable(hwc, hwc->idx); -} - -void perf_counter_print_debug(void) -{ - u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; - struct cpu_hw_counters *cpuc; - unsigned long flags; - int cpu, idx; - - if (!x86_pmu.num_counters) - return; - - local_irq_save(flags); - - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); - - if (x86_pmu.version >= 2) { - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); - - pr_info("\n"); - pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); - pr_info("CPU#%d: status: %016llx\n", cpu, status); - pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); - pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); - } - pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); - rdmsrl(x86_pmu.perfctr + idx, pmc_count); - - prev_left = per_cpu(pmc_prev_left[idx], cpu); - - pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", - cpu, idx, pmc_ctrl); - pr_info("CPU#%d: gen-PMC%d count: %016llx\n", - cpu, idx, pmc_count); - pr_info("CPU#%d: gen-PMC%d left: %016llx\n", - cpu, idx, prev_left); - } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); - - pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", - cpu, idx, pmc_count); - } - local_irq_restore(flags); -} - -static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc) -{ - struct debug_store *ds = cpuc->ds; - struct bts_record { - u64 from; - u64 to; - u64 flags; - }; - struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; - struct bts_record *at, *top; - struct perf_output_handle handle; - struct perf_event_header header; - struct perf_sample_data data; - struct pt_regs regs; - - if (!counter) - return; - - if (!ds) - return; - - at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; - top = (struct bts_record *)(unsigned long)ds->bts_index; - - if (top <= at) - return; - - ds->bts_index = ds->bts_buffer_base; - - - data.period = counter->hw.last_period; - data.addr = 0; - regs.ip = 0; - - /* - * Prepare a generic sample, i.e. fill in the invariant fields. - * We will overwrite the from and to address before we output - * the sample. - */ - perf_prepare_sample(&header, &data, counter, ®s); - - if (perf_output_begin(&handle, counter, - header.size * (top - at), 1, 1)) - return; - - for (; at < top; at++) { - data.ip = at->from; - data.addr = at->to; - - perf_output_sample(&handle, &header, &data, counter); - } - - perf_output_end(&handle); - - /* There's new data available. */ - counter->hw.interrupts++; - counter->pending_kill = POLL_IN; -} - -static void x86_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - - /* - * Must be done before we disable, otherwise the nmi handler - * could reenable again: - */ - clear_bit(idx, cpuc->active_mask); - x86_pmu.disable(hwc, idx); - - /* - * Make sure the cleared pointer becomes visible before we - * (potentially) free the counter: - */ - barrier(); - - /* - * Drain the remaining delta count out of a counter - * that we are disabling: - */ - x86_perf_counter_update(counter, hwc, idx); - - /* Drain the remaining BTS records. */ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) - intel_pmu_drain_bts_buffer(cpuc); - - cpuc->counters[idx] = NULL; - clear_bit(idx, cpuc->used_mask); - - perf_counter_update_userpage(counter); -} - -/* - * Save and restart an expired counter. Called by NMI contexts, - * so it has to be careful about preempting normal counter ops: - */ -static int intel_pmu_save_and_restart(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - int ret; - - x86_perf_counter_update(counter, hwc, idx); - ret = x86_perf_counter_set_period(counter, hwc, idx); - - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - intel_pmu_enable_counter(hwc, idx); - - return ret; -} - -static void intel_pmu_reset(void) -{ - struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; - unsigned long flags; - int idx; - - if (!x86_pmu.num_counters) - return; - - local_irq_save(flags); - - printk("clearing PMU state on CPU#%d\n", smp_processor_id()); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); - checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); - } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { - checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); - } - if (ds) - ds->bts_index = ds->bts_buffer_base; - - local_irq_restore(flags); -} - -static int p6_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct perf_counter *counter; - struct hw_perf_counter *hwc; - int idx, handled = 0; - u64 val; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) - continue; - - counter = cpuc->counters[idx]; - hwc = &counter->hw; - - val = x86_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << (x86_pmu.counter_bits - 1))) - continue; - - /* - * counter overflow - */ - handled = 1; - data.period = counter->hw.last_period; - - if (!x86_perf_counter_set_period(counter, hwc, idx)) - continue; - - if (perf_counter_overflow(counter, 1, &data, regs)) - p6_pmu_disable_counter(hwc, idx); - } - - if (handled) - inc_irq_stat(apic_perf_irqs); - - return handled; -} - -/* - * This handler is triggered by the local APIC, so the APIC IRQ handling - * rules apply: - */ -static int intel_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - int bit, loops; - u64 ack, status; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - - perf_disable(); - intel_pmu_drain_bts_buffer(cpuc); - status = intel_pmu_get_status(); - if (!status) { - perf_enable(); - return 0; - } - - loops = 0; -again: - if (++loops > 100) { - WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); - perf_counter_print_debug(); - intel_pmu_reset(); - perf_enable(); - return 1; - } - - inc_irq_stat(apic_perf_irqs); - ack = status; - for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { - struct perf_counter *counter = cpuc->counters[bit]; - - clear_bit(bit, (unsigned long *) &status); - if (!test_bit(bit, cpuc->active_mask)) - continue; - - if (!intel_pmu_save_and_restart(counter)) - continue; - - data.period = counter->hw.last_period; - - if (perf_counter_overflow(counter, 1, &data, regs)) - intel_pmu_disable_counter(&counter->hw, bit); - } - - intel_pmu_ack_status(ack); - - /* - * Repeat if there is more work to be done: - */ - status = intel_pmu_get_status(); - if (status) - goto again; - - perf_enable(); - - return 1; -} - -static int amd_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct perf_counter *counter; - struct hw_perf_counter *hwc; - int idx, handled = 0; - u64 val; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) - continue; - - counter = cpuc->counters[idx]; - hwc = &counter->hw; - - val = x86_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << (x86_pmu.counter_bits - 1))) - continue; - - /* - * counter overflow - */ - handled = 1; - data.period = counter->hw.last_period; - - if (!x86_perf_counter_set_period(counter, hwc, idx)) - continue; - - if (perf_counter_overflow(counter, 1, &data, regs)) - amd_pmu_disable_counter(hwc, idx); - } - - if (handled) - inc_irq_stat(apic_perf_irqs); - - return handled; -} - -void smp_perf_pending_interrupt(struct pt_regs *regs) -{ - irq_enter(); - ack_APIC_irq(); - inc_irq_stat(apic_pending_irqs); - perf_counter_do_pending(); - irq_exit(); -} - -void set_perf_counter_pending(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - apic->send_IPI_self(LOCAL_PENDING_VECTOR); -#endif -} - -void perf_counters_lapic_init(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - if (!x86_pmu.apic || !x86_pmu_initialized()) - return; - - /* - * Always use NMI for PMU - */ - apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif -} - -static int __kprobes -perf_counter_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) -{ - struct die_args *args = __args; - struct pt_regs *regs; - - if (!atomic_read(&active_counters)) - return NOTIFY_DONE; - - switch (cmd) { - case DIE_NMI: - case DIE_NMI_IPI: - break; - - default: - return NOTIFY_DONE; - } - - regs = args->regs; - -#ifdef CONFIG_X86_LOCAL_APIC - apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif - /* - * Can't rely on the handled return value to say it was our NMI, two - * counters could trigger 'simultaneously' raising two back-to-back NMIs. - * - * If the first NMI handles both, the latter will be empty and daze - * the CPU. - */ - x86_pmu.handle_irq(regs); - - return NOTIFY_STOP; -} - -static __read_mostly struct notifier_block perf_counter_nmi_notifier = { - .notifier_call = perf_counter_nmi_handler, - .next = NULL, - .priority = 1 -}; - -static struct x86_pmu p6_pmu = { - .name = "p6", - .handle_irq = p6_pmu_handle_irq, - .disable_all = p6_pmu_disable_all, - .enable_all = p6_pmu_enable_all, - .enable = p6_pmu_enable_counter, - .disable = p6_pmu_disable_counter, - .eventsel = MSR_P6_EVNTSEL0, - .perfctr = MSR_P6_PERFCTR0, - .event_map = p6_pmu_event_map, - .raw_event = p6_pmu_raw_event, - .max_events = ARRAY_SIZE(p6_perfmon_event_map), - .apic = 1, - .max_period = (1ULL << 31) - 1, - .version = 0, - .num_counters = 2, - /* - * Counters have 40 bits implemented. However they are designed such - * that bits [32-39] are sign extensions of bit 31. As such the - * effective width of a counter for P6-like PMU is 32 bits only. - * - * See IA-32 Intel Architecture Software developer manual Vol 3B - */ - .counter_bits = 32, - .counter_mask = (1ULL << 32) - 1, -}; - -static struct x86_pmu intel_pmu = { - .name = "Intel", - .handle_irq = intel_pmu_handle_irq, - .disable_all = intel_pmu_disable_all, - .enable_all = intel_pmu_enable_all, - .enable = intel_pmu_enable_counter, - .disable = intel_pmu_disable_counter, - .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = intel_pmu_event_map, - .raw_event = intel_pmu_raw_event, - .max_events = ARRAY_SIZE(intel_perfmon_event_map), - .apic = 1, - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic counter period: - */ - .max_period = (1ULL << 31) - 1, - .enable_bts = intel_pmu_enable_bts, - .disable_bts = intel_pmu_disable_bts, -}; - -static struct x86_pmu amd_pmu = { - .name = "AMD", - .handle_irq = amd_pmu_handle_irq, - .disable_all = amd_pmu_disable_all, - .enable_all = amd_pmu_enable_all, - .enable = amd_pmu_enable_counter, - .disable = amd_pmu_disable_counter, - .eventsel = MSR_K7_EVNTSEL0, - .perfctr = MSR_K7_PERFCTR0, - .event_map = amd_pmu_event_map, - .raw_event = amd_pmu_raw_event, - .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = 4, - .counter_bits = 48, - .counter_mask = (1ULL << 48) - 1, - .apic = 1, - /* use highest bit to detect overflow */ - .max_period = (1ULL << 47) - 1, -}; - -static int p6_pmu_init(void) -{ - switch (boot_cpu_data.x86_model) { - case 1: - case 3: /* Pentium Pro */ - case 5: - case 6: /* Pentium II */ - case 7: - case 8: - case 11: /* Pentium III */ - break; - case 9: - case 13: - /* Pentium M */ - break; - default: - pr_cont("unsupported p6 CPU model %d ", - boot_cpu_data.x86_model); - return -ENODEV; - } - - x86_pmu = p6_pmu; - - if (!cpu_has_apic) { - pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); - pr_info("no hardware sampling interrupt available.\n"); - x86_pmu.apic = 0; - } - - return 0; -} - -static int intel_pmu_init(void) -{ - union cpuid10_edx edx; - union cpuid10_eax eax; - unsigned int unused; - unsigned int ebx; - int version; - - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - /* check for P6 processor family */ - if (boot_cpu_data.x86 == 6) { - return p6_pmu_init(); - } else { - return -ENODEV; - } - } - - /* - * Check whether the Architectural PerfMon supports - * Branch Misses Retired hw_event or not. - */ - cpuid(10, &eax.full, &ebx, &unused, &edx.full); - if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) - return -ENODEV; - - version = eax.split.version_id; - if (version < 2) - return -ENODEV; - - x86_pmu = intel_pmu; - x86_pmu.version = version; - x86_pmu.num_counters = eax.split.num_counters; - x86_pmu.counter_bits = eax.split.bit_width; - x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; - - /* - * Quirk: v2 perfmon does not report fixed-purpose counters, so - * assume at least 3 counters: - */ - x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); - - /* - * Install the hw-cache-events table: - */ - switch (boot_cpu_data.x86_model) { - case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 29: /* six-core 45 nm xeon "Dunnington" */ - memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Core2 events, "); - break; - default: - case 26: - memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Nehalem/Corei7 events, "); - break; - case 28: - memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Atom events, "); - break; - } - return 0; -} - -static int amd_pmu_init(void) -{ - /* Performance-monitoring supported from K7 and later: */ - if (boot_cpu_data.x86 < 6) - return -ENODEV; - - x86_pmu = amd_pmu; - - /* Events are common for all AMDs */ - memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - return 0; -} - -void __init init_hw_perf_counters(void) -{ - int err; - - pr_info("Performance Counters: "); - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - err = intel_pmu_init(); - break; - case X86_VENDOR_AMD: - err = amd_pmu_init(); - break; - default: - return; - } - if (err != 0) { - pr_cont("no PMU driver, software counters only.\n"); - return; - } - - pr_cont("%s PMU driver.\n", x86_pmu.name); - - if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { - WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", - x86_pmu.num_counters, X86_PMC_MAX_GENERIC); - x86_pmu.num_counters = X86_PMC_MAX_GENERIC; - } - perf_counter_mask = (1 << x86_pmu.num_counters) - 1; - perf_max_counters = x86_pmu.num_counters; - - if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { - WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", - x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); - x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; - } - - perf_counter_mask |= - ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; - x86_pmu.intel_ctrl = perf_counter_mask; - - perf_counters_lapic_init(); - register_die_notifier(&perf_counter_nmi_notifier); - - pr_info("... version: %d\n", x86_pmu.version); - pr_info("... bit width: %d\n", x86_pmu.counter_bits); - pr_info("... generic counters: %d\n", x86_pmu.num_counters); - pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); - pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed); - pr_info("... counter mask: %016Lx\n", perf_counter_mask); -} - -static inline void x86_pmu_read(struct perf_counter *counter) -{ - x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); -} - -static const struct pmu pmu = { - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, - .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, -}; - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - int err; - - err = __hw_perf_counter_init(counter); - if (err) { - if (counter->destroy) - counter->destroy(counter); - return ERR_PTR(err); - } - - return &pmu; -} - -/* - * callchain support - */ - -static inline -void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); -static DEFINE_PER_CPU(int, in_nmi_frame); - - -static void -backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - /* Ignore warnings */ -} - -static void backtrace_warning(void *data, char *msg) -{ - /* Ignore warnings */ -} - -static int backtrace_stack(void *data, char *name) -{ - per_cpu(in_nmi_frame, smp_processor_id()) = - x86_is_stack_id(NMI_STACK, name); - - return 0; -} - -static void backtrace_address(void *data, unsigned long addr, int reliable) -{ - struct perf_callchain_entry *entry = data; - - if (per_cpu(in_nmi_frame, smp_processor_id())) - return; - - if (reliable) - callchain_store(entry, addr); -} - -static const struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, - .stack = backtrace_stack, - .address = backtrace_address, -}; - -#include "../dumpstack.h" - -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->ip); - - dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); -} - -/* - * best effort, GUP based copy_from_user() that assumes IRQ or NMI context - */ -static unsigned long -copy_from_user_nmi(void *to, const void __user *from, unsigned long n) -{ - unsigned long offset, addr = (unsigned long)from; - int type = in_nmi() ? KM_NMI : KM_IRQ0; - unsigned long size, len = 0; - struct page *page; - void *map; - int ret; - - do { - ret = __get_user_pages_fast(addr, 1, 0, &page); - if (!ret) - break; - - offset = addr & (PAGE_SIZE - 1); - size = min(PAGE_SIZE - offset, n - len); - - map = kmap_atomic(page, type); - memcpy(to, map+offset, size); - kunmap_atomic(map, type); - put_page(page); - - len += size; - to += size; - addr += size; - - } while (len < n); - - return len; -} - -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) -{ - unsigned long bytes; - - bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); - - return bytes == sizeof(*frame); -} - -static void -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - struct stack_frame frame; - const void __user *fp; - - if (!user_mode(regs)) - regs = task_pt_regs(current); - - fp = (void __user *)regs->bp; - - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->ip); - - while (entry->nr < PERF_MAX_STACK_DEPTH) { - frame.next_frame = NULL; - frame.return_address = 0; - - if (!copy_stack_frame(fp, &frame)) - break; - - if ((unsigned long)fp < regs->sp) - break; - - callchain_store(entry, frame.return_address); - fp = frame.next_frame; - } -} - -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!current || current->pid == 0) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry; - - if (in_nmi()) - entry = &__get_cpu_var(pmc_nmi_entry); - else - entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} - -void hw_perf_counter_setup_online(int cpu) -{ - init_debug_store_on_cpu(cpu); -} diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c new file mode 100644 index 00000000000..0d03629fb1a --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event.c @@ -0,0 +1,2298 @@ +/* + * Performance events x86 architecture code + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2009 Intel Corporation, + * + * For licencing details see kernel-base/COPYING + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static u64 perf_event_mask __read_mostly; + +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS 4 + +/* The size of a BTS record in bytes: */ +#define BTS_RECORD_SIZE 24 + +/* The size of a per-cpu BTS buffer in bytes: */ +#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) + +/* The BTS overflow threshold in bytes from the end of the buffer: */ +#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) + + +/* + * Bits in the debugctlmsr controlling branch tracing. + */ +#define X86_DEBUGCTL_TR (1 << 6) +#define X86_DEBUGCTL_BTS (1 << 7) +#define X86_DEBUGCTL_BTINT (1 << 8) +#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) +#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_event_reset[MAX_PEBS_EVENTS]; +}; + +struct cpu_hw_events { + struct perf_event *events[X86_PMC_IDX_MAX]; + unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long interrupts; + int enabled; + struct debug_store *ds; +}; + +/* + * struct x86_pmu - generic x86 pmu + */ +struct x86_pmu { + const char *name; + int version; + int (*handle_irq)(struct pt_regs *); + void (*disable_all)(void); + void (*enable_all)(void); + void (*enable)(struct hw_perf_event *, int); + void (*disable)(struct hw_perf_event *, int); + unsigned eventsel; + unsigned perfctr; + u64 (*event_map)(int); + u64 (*raw_event)(u64); + int max_events; + int num_events; + int num_events_fixed; + int event_bits; + u64 event_mask; + int apic; + u64 max_period; + u64 intel_ctrl; + void (*enable_bts)(u64 config); + void (*disable_bts)(void); +}; + +static struct x86_pmu x86_pmu __read_mostly; + +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .enabled = 1, +}; + +/* + * Not sure about some of these + */ +static const u64 p6_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, +}; + +static u64 p6_pmu_event_map(int hw_event) +{ + return p6_perfmon_event_map[hw_event]; +} + +/* + * Event setting that is specified not to count anything. + * We use this to effectively disable a counter. + * + * L2_RQSTS with 0 MESI unit mask. + */ +#define P6_NOP_EVENT 0x0000002EULL + +static u64 p6_pmu_raw_event(u64 hw_event) +{ +#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL +#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL +#define P6_EVNTSEL_INV_MASK 0x00800000ULL +#define P6_EVNTSEL_REG_MASK 0xFF000000ULL + +#define P6_EVNTSEL_MASK \ + (P6_EVNTSEL_EVENT_MASK | \ + P6_EVNTSEL_UNIT_MASK | \ + P6_EVNTSEL_EDGE_MASK | \ + P6_EVNTSEL_INV_MASK | \ + P6_EVNTSEL_REG_MASK) + + return hw_event & P6_EVNTSEL_MASK; +} + + +/* + * Intel PerfMon v3. Used on Core2 and later. + */ +static const u64 intel_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, +}; + +static u64 intel_pmu_event_map(int hw_event) +{ + return intel_perfmon_event_map[hw_event]; +} + +/* + * Generalized hw caching related hw_event table, filled + * in on a per model basis. A value of 0 means + * 'not supported', -1 means 'hw_event makes no sense on + * this CPU', any other value means the raw hw_event + * ID. + */ + +#define C(x) PERF_COUNT_HW_CACHE_##x + +static u64 __read_mostly hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + +static const u64 nehalem_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ + [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ + [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ + [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ + [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ + [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static const u64 core2_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ + [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ + [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ + [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ + [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ + [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static const u64 atom_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ + [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ + [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ + [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static u64 intel_pmu_raw_event(u64 hw_event) +{ +#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL +#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL +#define CORE_EVNTSEL_INV_MASK 0x00800000ULL +#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL + +#define CORE_EVNTSEL_MASK \ + (CORE_EVNTSEL_EVENT_MASK | \ + CORE_EVNTSEL_UNIT_MASK | \ + CORE_EVNTSEL_EDGE_MASK | \ + CORE_EVNTSEL_INV_MASK | \ + CORE_EVNTSEL_REG_MASK) + + return hw_event & CORE_EVNTSEL_MASK; +} + +static const u64 amd_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ + [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ + [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ + [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ + [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ + [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +/* + * AMD Performance Monitor K7 and later. + */ +static const u64 amd_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, +}; + +static u64 amd_pmu_event_map(int hw_event) +{ + return amd_perfmon_event_map[hw_event]; +} + +static u64 amd_pmu_raw_event(u64 hw_event) +{ +#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL +#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL +#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL +#define K7_EVNTSEL_INV_MASK 0x000800000ULL +#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL + +#define K7_EVNTSEL_MASK \ + (K7_EVNTSEL_EVENT_MASK | \ + K7_EVNTSEL_UNIT_MASK | \ + K7_EVNTSEL_EDGE_MASK | \ + K7_EVNTSEL_INV_MASK | \ + K7_EVNTSEL_REG_MASK) + + return hw_event & K7_EVNTSEL_MASK; +} + +/* + * Propagate event elapsed time into the generic event. + * Can only be executed on the CPU where the event is active. + * Returns the delta events processed. + */ +static u64 +x86_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + int shift = 64 - x86_pmu.event_bits; + u64 prev_raw_count, new_raw_count; + s64 delta; + + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + + /* + * Careful: an NMI might modify the previous event value. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic event atomically: + */ +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + rdmsrl(hwc->event_base + idx, new_raw_count); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + atomic64_add(delta, &event->count); + atomic64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static atomic_t active_events; +static DEFINE_MUTEX(pmc_reserve_mutex); + +static bool reserve_pmc_hardware(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int i; + + if (nmi_watchdog == NMI_LOCAL_APIC) + disable_lapic_nmi_watchdog(); + + for (i = 0; i < x86_pmu.num_events; i++) { + if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) + goto perfctr_fail; + } + + for (i = 0; i < x86_pmu.num_events; i++) { + if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) + goto eventsel_fail; + } +#endif + + return true; + +#ifdef CONFIG_X86_LOCAL_APIC +eventsel_fail: + for (i--; i >= 0; i--) + release_evntsel_nmi(x86_pmu.eventsel + i); + + i = x86_pmu.num_events; + +perfctr_fail: + for (i--; i >= 0; i--) + release_perfctr_nmi(x86_pmu.perfctr + i); + + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); + + return false; +#endif +} + +static void release_pmc_hardware(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int i; + + for (i = 0; i < x86_pmu.num_events; i++) { + release_perfctr_nmi(x86_pmu.perfctr + i); + release_evntsel_nmi(x86_pmu.eventsel + i); + } + + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); +#endif +} + +static inline bool bts_available(void) +{ + return x86_pmu.enable_bts != NULL; +} + +static inline void init_debug_store_on_cpu(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, + (u32)((u64)(unsigned long)ds), + (u32)((u64)(unsigned long)ds >> 32)); +} + +static inline void fini_debug_store_on_cpu(int cpu) +{ + if (!per_cpu(cpu_hw_events, cpu).ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); +} + +static void release_bts_hardware(void) +{ + int cpu; + + if (!bts_available()) + return; + + get_online_cpus(); + + for_each_online_cpu(cpu) + fini_debug_store_on_cpu(cpu); + + for_each_possible_cpu(cpu) { + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + continue; + + per_cpu(cpu_hw_events, cpu).ds = NULL; + + kfree((void *)(unsigned long)ds->bts_buffer_base); + kfree(ds); + } + + put_online_cpus(); +} + +static int reserve_bts_hardware(void) +{ + int cpu, err = 0; + + if (!bts_available()) + return 0; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + struct debug_store *ds; + void *buffer; + + err = -ENOMEM; + buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + break; + + ds = kzalloc(sizeof(*ds), GFP_KERNEL); + if (unlikely(!ds)) { + kfree(buffer); + break; + } + + ds->bts_buffer_base = (u64)(unsigned long)buffer; + ds->bts_index = ds->bts_buffer_base; + ds->bts_absolute_maximum = + ds->bts_buffer_base + BTS_BUFFER_SIZE; + ds->bts_interrupt_threshold = + ds->bts_absolute_maximum - BTS_OVFL_TH; + + per_cpu(cpu_hw_events, cpu).ds = ds; + err = 0; + } + + if (err) + release_bts_hardware(); + else { + for_each_online_cpu(cpu) + init_debug_store_on_cpu(cpu); + } + + put_online_cpus(); + + return err; +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { + release_pmc_hardware(); + release_bts_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +static inline int x86_pmu_initialized(void) +{ + return x86_pmu.handle_irq != NULL; +} + +static inline int +set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) +{ + unsigned int cache_type, cache_op, cache_result; + u64 config, val; + + config = attr->config; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + val = hw_cache_event_ids[cache_type][cache_op][cache_result]; + + if (val == 0) + return -ENOENT; + + if (val == -1) + return -EINVAL; + + hwc->config |= val; + + return 0; +} + +static void intel_pmu_enable_bts(u64 config) +{ + unsigned long debugctlmsr; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr |= X86_DEBUGCTL_TR; + debugctlmsr |= X86_DEBUGCTL_BTS; + debugctlmsr |= X86_DEBUGCTL_BTINT; + + if (!(config & ARCH_PERFMON_EVENTSEL_OS)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + + if (!(config & ARCH_PERFMON_EVENTSEL_USR)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_bts(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + unsigned long debugctlmsr; + + if (!cpuc->ds) + return; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr &= + ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | + X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + + update_debugctlmsr(debugctlmsr); +} + +/* + * Setup the hardware configuration for a given attr_type + */ +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + u64 config; + int err; + + if (!x86_pmu_initialized()) + return -ENODEV; + + err = 0; + if (!atomic_inc_not_zero(&active_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&active_events) == 0) { + if (!reserve_pmc_hardware()) + err = -EBUSY; + else + err = reserve_bts_hardware(); + } + if (!err) + atomic_inc(&active_events); + mutex_unlock(&pmc_reserve_mutex); + } + if (err) + return err; + + event->destroy = hw_perf_event_destroy; + + /* + * Generate PMC IRQs: + * (keep 'enabled' bit clear for now) + */ + hwc->config = ARCH_PERFMON_EVENTSEL_INT; + + /* + * Count user and OS events unless requested not to. + */ + if (!attr->exclude_user) + hwc->config |= ARCH_PERFMON_EVENTSEL_USR; + if (!attr->exclude_kernel) + hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + + if (!hwc->sample_period) { + hwc->sample_period = x86_pmu.max_period; + hwc->last_period = hwc->sample_period; + atomic64_set(&hwc->period_left, hwc->sample_period); + } else { + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * events (user-space has to fall back and + * sample via a hrtimer based software event): + */ + if (!x86_pmu.apic) + return -EOPNOTSUPP; + } + + /* + * Raw hw_event type provide the config in the hw_event structure + */ + if (attr->type == PERF_TYPE_RAW) { + hwc->config |= x86_pmu.raw_event(attr->config); + return 0; + } + + if (attr->type == PERF_TYPE_HW_CACHE) + return set_ext_hw_attr(hwc, attr); + + if (attr->config >= x86_pmu.max_events) + return -EINVAL; + + /* + * The generic map: + */ + config = x86_pmu.event_map(attr->config); + + if (config == 0) + return -ENOENT; + + if (config == -1LL) + return -EINVAL; + + /* + * Branch tracing: + */ + if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && + (hwc->sample_period == 1)) { + /* BTS is not supported by this architecture. */ + if (!bts_available()) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + return -EOPNOTSUPP; + } + + hwc->config |= config; + + return 0; +} + +static void p6_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + +static void intel_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + intel_pmu_disable_bts(); +} + +static void amd_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + /* + * ensure we write the disable before we start disabling the + * events proper, so that amd_pmu_enable_event() does the + * right thing. + */ + barrier(); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + rdmsrl(MSR_K7_EVNTSEL0 + idx, val); + if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) + continue; + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + } +} + +void hw_perf_disable(void) +{ + if (!x86_pmu_initialized()) + return; + return x86_pmu.disable_all(); +} + +static void p6_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + unsigned long val; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + +static void intel_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { + struct perf_event *event = + cpuc->events[X86_PMC_IDX_FIXED_BTS]; + + if (WARN_ON_ONCE(!event)) + return; + + intel_pmu_enable_bts(event->hw.config); + } +} + +static void amd_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + struct perf_event *event = cpuc->events[idx]; + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + val = event->hw.config; + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + } +} + +void hw_perf_enable(void) +{ + if (!x86_pmu_initialized()) + return; + x86_pmu.enable_all(); +} + +static inline u64 intel_pmu_get_status(void) +{ + u64 status; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + + return status; +} + +static inline void intel_pmu_ack_status(u64 ack) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); +} + +static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + (void)checking_wrmsrl(hwc->config_base + idx, + hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); +} + +static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); +} + +static inline void +intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, mask; + + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + (void)checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static inline void +p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val = P6_NOP_EVENT; + + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + (void)checking_wrmsrl(hwc->config_base + idx, val); +} + +static inline void +intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + intel_pmu_disable_bts(); + return; + } + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + intel_pmu_disable_fixed(hwc, idx); + return; + } + + x86_pmu_disable_event(hwc, idx); +} + +static inline void +amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + x86_pmu_disable_event(hwc, idx); +} + +static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); + +/* + * Set the next IRQ period, based on the hwc->period_left value. + * To be called with the event disabled in hw: + */ +static int +x86_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + s64 left = atomic64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int err, ret = 0; + + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + + /* + * If we are way outside a reasoable range then just skip forward: + */ + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + /* + * Quirk: certain CPUs dont like it if just 1 hw_event is left: + */ + if (unlikely(left < 2)) + left = 2; + + if (left > x86_pmu.max_period) + left = x86_pmu.max_period; + + per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; + + /* + * The hw event starts counting from this event offset, + * mark it to be able to extra future deltas: + */ + atomic64_set(&hwc->prev_count, (u64)-left); + + err = checking_wrmsrl(hwc->event_base + idx, + (u64)(-left) & x86_pmu.event_mask); + + perf_event_update_userpage(event); + + return ret; +} + +static inline void +intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, bits, mask; + int err; + + /* + * Enable IRQ generation (0x8), + * and enable ring-3 counting (0x2) and ring-0 counting (0x1) + * if requested: + */ + bits = 0x8ULL; + if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) + bits |= 0x2; + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + bits |= 0x1; + bits <<= (idx * 4); + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + ctrl_val |= bits; + err = checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + + val = hwc->config; + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + (void)checking_wrmsrl(hwc->config_base + idx, val); +} + + +static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + if (!__get_cpu_var(cpu_hw_events).enabled) + return; + + intel_pmu_enable_bts(hwc->config); + return; + } + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + intel_pmu_enable_fixed(hwc, idx); + return; + } + + x86_pmu_enable_event(hwc, idx); +} + +static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->enabled) + x86_pmu_enable_event(hwc, idx); +} + +static int +fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) +{ + unsigned int hw_event; + + hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; + + if (unlikely((hw_event == + x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && + (hwc->sample_period == 1))) + return X86_PMC_IDX_FIXED_BTS; + + if (!x86_pmu.num_events_fixed) + return -1; + + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) + return X86_PMC_IDX_FIXED_INSTRUCTIONS; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) + return X86_PMC_IDX_FIXED_CPU_CYCLES; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) + return X86_PMC_IDX_FIXED_BUS_CYCLES; + + return -1; +} + +/* + * Find a PMC slot for the freshly enabled / scheduled in event: + */ +static int x86_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = fixed_mode_idx(event, hwc); + if (idx == X86_PMC_IDX_FIXED_BTS) { + /* BTS is already occupied. */ + if (test_and_set_bit(idx, cpuc->used_mask)) + return -EAGAIN; + + hwc->config_base = 0; + hwc->event_base = 0; + hwc->idx = idx; + } else if (idx >= 0) { + /* + * Try to get the fixed event, if that is already taken + * then try to get a generic event: + */ + if (test_and_set_bit(idx, cpuc->used_mask)) + goto try_generic; + + hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; + /* + * We set it so that event_base + idx in wrmsr/rdmsr maps to + * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: + */ + hwc->event_base = + MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; + hwc->idx = idx; + } else { + idx = hwc->idx; + /* Try to get the previous generic event again */ + if (test_and_set_bit(idx, cpuc->used_mask)) { +try_generic: + idx = find_first_zero_bit(cpuc->used_mask, + x86_pmu.num_events); + if (idx == x86_pmu.num_events) + return -EAGAIN; + + set_bit(idx, cpuc->used_mask); + hwc->idx = idx; + } + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; + } + + perf_events_lapic_init(); + + x86_pmu.disable(hwc, idx); + + cpuc->events[idx] = event; + set_bit(idx, cpuc->active_mask); + + x86_perf_event_set_period(event, hwc, idx); + x86_pmu.enable(hwc, idx); + + perf_event_update_userpage(event); + + return 0; +} + +static void x86_pmu_unthrottle(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || + cpuc->events[hwc->idx] != event)) + return; + + x86_pmu.enable(hwc, hwc->idx); +} + +void perf_event_print_debug(void) +{ + u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + struct cpu_hw_events *cpuc; + unsigned long flags; + int cpu, idx; + + if (!x86_pmu.num_events) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + cpuc = &per_cpu(cpu_hw_events, cpu); + + if (x86_pmu.version >= 2) { + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); + + pr_info("\n"); + pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); + pr_info("CPU#%d: status: %016llx\n", cpu, status); + pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); + pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); + } + pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); + rdmsrl(x86_pmu.perfctr + idx, pmc_count); + + prev_left = per_cpu(pmc_prev_left[idx], cpu); + + pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", + cpu, idx, pmc_ctrl); + pr_info("CPU#%d: gen-PMC%d count: %016llx\n", + cpu, idx, pmc_count); + pr_info("CPU#%d: gen-PMC%d left: %016llx\n", + cpu, idx, prev_left); + } + for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); + + pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", + cpu, idx, pmc_count); + } + local_irq_restore(flags); +} + +static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) +{ + struct debug_store *ds = cpuc->ds; + struct bts_record { + u64 from; + u64 to; + u64 flags; + }; + struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; + struct bts_record *at, *top; + struct perf_output_handle handle; + struct perf_event_header header; + struct perf_sample_data data; + struct pt_regs regs; + + if (!event) + return; + + if (!ds) + return; + + at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; + top = (struct bts_record *)(unsigned long)ds->bts_index; + + if (top <= at) + return; + + ds->bts_index = ds->bts_buffer_base; + + + data.period = event->hw.last_period; + data.addr = 0; + regs.ip = 0; + + /* + * Prepare a generic sample, i.e. fill in the invariant fields. + * We will overwrite the from and to address before we output + * the sample. + */ + perf_prepare_sample(&header, &data, event, ®s); + + if (perf_output_begin(&handle, event, + header.size * (top - at), 1, 1)) + return; + + for (; at < top; at++) { + data.ip = at->from; + data.addr = at->to; + + perf_output_sample(&handle, &header, &data, event); + } + + perf_output_end(&handle); + + /* There's new data available. */ + event->hw.interrupts++; + event->pending_kill = POLL_IN; +} + +static void x86_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * Must be done before we disable, otherwise the nmi handler + * could reenable again: + */ + clear_bit(idx, cpuc->active_mask); + x86_pmu.disable(hwc, idx); + + /* + * Make sure the cleared pointer becomes visible before we + * (potentially) free the event: + */ + barrier(); + + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + x86_perf_event_update(event, hwc, idx); + + /* Drain the remaining BTS records. */ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) + intel_pmu_drain_bts_buffer(cpuc); + + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +/* + * Save and restart an expired event. Called by NMI contexts, + * so it has to be careful about preempting normal event ops: + */ +static int intel_pmu_save_and_restart(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + int ret; + + x86_perf_event_update(event, hwc, idx); + ret = x86_perf_event_set_period(event, hwc, idx); + + if (event->state == PERF_EVENT_STATE_ACTIVE) + intel_pmu_enable_event(hwc, idx); + + return ret; +} + +static void intel_pmu_reset(void) +{ + struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; + unsigned long flags; + int idx; + + if (!x86_pmu.num_events) + return; + + local_irq_save(flags); + + printk("clearing PMU state on CPU#%d\n", smp_processor_id()); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); + checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); + } + for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { + checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); + } + if (ds) + ds->bts_index = ds->bts_buffer_base; + + local_irq_restore(flags); +} + +static int p6_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct perf_event *event; + struct hw_perf_event *hwc; + int idx, handled = 0; + u64 val; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + hwc = &event->hw; + + val = x86_perf_event_update(event, hwc, idx); + if (val & (1ULL << (x86_pmu.event_bits - 1))) + continue; + + /* + * event overflow + */ + handled = 1; + data.period = event->hw.last_period; + + if (!x86_perf_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 1, &data, regs)) + p6_pmu_disable_event(hwc, idx); + } + + if (handled) + inc_irq_stat(apic_perf_irqs); + + return handled; +} + +/* + * This handler is triggered by the local APIC, so the APIC IRQ handling + * rules apply: + */ +static int intel_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + int bit, loops; + u64 ack, status; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + + perf_disable(); + intel_pmu_drain_bts_buffer(cpuc); + status = intel_pmu_get_status(); + if (!status) { + perf_enable(); + return 0; + } + + loops = 0; +again: + if (++loops > 100) { + WARN_ONCE(1, "perfevents: irq loop stuck!\n"); + perf_event_print_debug(); + intel_pmu_reset(); + perf_enable(); + return 1; + } + + inc_irq_stat(apic_perf_irqs); + ack = status; + for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { + struct perf_event *event = cpuc->events[bit]; + + clear_bit(bit, (unsigned long *) &status); + if (!test_bit(bit, cpuc->active_mask)) + continue; + + if (!intel_pmu_save_and_restart(event)) + continue; + + data.period = event->hw.last_period; + + if (perf_event_overflow(event, 1, &data, regs)) + intel_pmu_disable_event(&event->hw, bit); + } + + intel_pmu_ack_status(ack); + + /* + * Repeat if there is more work to be done: + */ + status = intel_pmu_get_status(); + if (status) + goto again; + + perf_enable(); + + return 1; +} + +static int amd_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct perf_event *event; + struct hw_perf_event *hwc; + int idx, handled = 0; + u64 val; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + hwc = &event->hw; + + val = x86_perf_event_update(event, hwc, idx); + if (val & (1ULL << (x86_pmu.event_bits - 1))) + continue; + + /* + * event overflow + */ + handled = 1; + data.period = event->hw.last_period; + + if (!x86_perf_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 1, &data, regs)) + amd_pmu_disable_event(hwc, idx); + } + + if (handled) + inc_irq_stat(apic_perf_irqs); + + return handled; +} + +void smp_perf_pending_interrupt(struct pt_regs *regs) +{ + irq_enter(); + ack_APIC_irq(); + inc_irq_stat(apic_pending_irqs); + perf_event_do_pending(); + irq_exit(); +} + +void set_perf_event_pending(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + apic->send_IPI_self(LOCAL_PENDING_VECTOR); +#endif +} + +void perf_events_lapic_init(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + if (!x86_pmu.apic || !x86_pmu_initialized()) + return; + + /* + * Always use NMI for PMU + */ + apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif +} + +static int __kprobes +perf_event_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *__args) +{ + struct die_args *args = __args; + struct pt_regs *regs; + + if (!atomic_read(&active_events)) + return NOTIFY_DONE; + + switch (cmd) { + case DIE_NMI: + case DIE_NMI_IPI: + break; + + default: + return NOTIFY_DONE; + } + + regs = args->regs; + +#ifdef CONFIG_X86_LOCAL_APIC + apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif + /* + * Can't rely on the handled return value to say it was our NMI, two + * events could trigger 'simultaneously' raising two back-to-back NMIs. + * + * If the first NMI handles both, the latter will be empty and daze + * the CPU. + */ + x86_pmu.handle_irq(regs); + + return NOTIFY_STOP; +} + +static __read_mostly struct notifier_block perf_event_nmi_notifier = { + .notifier_call = perf_event_nmi_handler, + .next = NULL, + .priority = 1 +}; + +static struct x86_pmu p6_pmu = { + .name = "p6", + .handle_irq = p6_pmu_handle_irq, + .disable_all = p6_pmu_disable_all, + .enable_all = p6_pmu_enable_all, + .enable = p6_pmu_enable_event, + .disable = p6_pmu_disable_event, + .eventsel = MSR_P6_EVNTSEL0, + .perfctr = MSR_P6_PERFCTR0, + .event_map = p6_pmu_event_map, + .raw_event = p6_pmu_raw_event, + .max_events = ARRAY_SIZE(p6_perfmon_event_map), + .apic = 1, + .max_period = (1ULL << 31) - 1, + .version = 0, + .num_events = 2, + /* + * Events have 40 bits implemented. However they are designed such + * that bits [32-39] are sign extensions of bit 31. As such the + * effective width of a event for P6-like PMU is 32 bits only. + * + * See IA-32 Intel Architecture Software developer manual Vol 3B + */ + .event_bits = 32, + .event_mask = (1ULL << 32) - 1, +}; + +static struct x86_pmu intel_pmu = { + .name = "Intel", + .handle_irq = intel_pmu_handle_irq, + .disable_all = intel_pmu_disable_all, + .enable_all = intel_pmu_enable_all, + .enable = intel_pmu_enable_event, + .disable = intel_pmu_disable_event, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .raw_event = intel_pmu_raw_event, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + /* + * Intel PMCs cannot be accessed sanely above 32 bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL << 31) - 1, + .enable_bts = intel_pmu_enable_bts, + .disable_bts = intel_pmu_disable_bts, +}; + +static struct x86_pmu amd_pmu = { + .name = "AMD", + .handle_irq = amd_pmu_handle_irq, + .disable_all = amd_pmu_disable_all, + .enable_all = amd_pmu_enable_all, + .enable = amd_pmu_enable_event, + .disable = amd_pmu_disable_event, + .eventsel = MSR_K7_EVNTSEL0, + .perfctr = MSR_K7_PERFCTR0, + .event_map = amd_pmu_event_map, + .raw_event = amd_pmu_raw_event, + .max_events = ARRAY_SIZE(amd_perfmon_event_map), + .num_events = 4, + .event_bits = 48, + .event_mask = (1ULL << 48) - 1, + .apic = 1, + /* use highest bit to detect overflow */ + .max_period = (1ULL << 47) - 1, +}; + +static int p6_pmu_init(void) +{ + switch (boot_cpu_data.x86_model) { + case 1: + case 3: /* Pentium Pro */ + case 5: + case 6: /* Pentium II */ + case 7: + case 8: + case 11: /* Pentium III */ + break; + case 9: + case 13: + /* Pentium M */ + break; + default: + pr_cont("unsupported p6 CPU model %d ", + boot_cpu_data.x86_model); + return -ENODEV; + } + + x86_pmu = p6_pmu; + + if (!cpu_has_apic) { + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); + pr_info("no hardware sampling interrupt available.\n"); + x86_pmu.apic = 0; + } + + return 0; +} + +static int intel_pmu_init(void) +{ + union cpuid10_edx edx; + union cpuid10_eax eax; + unsigned int unused; + unsigned int ebx; + int version; + + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + /* check for P6 processor family */ + if (boot_cpu_data.x86 == 6) { + return p6_pmu_init(); + } else { + return -ENODEV; + } + } + + /* + * Check whether the Architectural PerfMon supports + * Branch Misses Retired hw_event or not. + */ + cpuid(10, &eax.full, &ebx, &unused, &edx.full); + if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) + return -ENODEV; + + version = eax.split.version_id; + if (version < 2) + return -ENODEV; + + x86_pmu = intel_pmu; + x86_pmu.version = version; + x86_pmu.num_events = eax.split.num_events; + x86_pmu.event_bits = eax.split.bit_width; + x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; + + /* + * Quirk: v2 perfmon does not report fixed-purpose events, so + * assume at least 3 events: + */ + x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); + + /* + * Install the hw-cache-events table: + */ + switch (boot_cpu_data.x86_model) { + case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ + case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ + case 29: /* six-core 45 nm xeon "Dunnington" */ + memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + pr_cont("Core2 events, "); + break; + default: + case 26: + memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + pr_cont("Nehalem/Corei7 events, "); + break; + case 28: + memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + pr_cont("Atom events, "); + break; + } + return 0; +} + +static int amd_pmu_init(void) +{ + /* Performance-monitoring supported from K7 and later: */ + if (boot_cpu_data.x86 < 6) + return -ENODEV; + + x86_pmu = amd_pmu; + + /* Events are common for all AMDs */ + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + return 0; +} + +void __init init_hw_perf_events(void) +{ + int err; + + pr_info("Performance Events: "); + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + err = intel_pmu_init(); + break; + case X86_VENDOR_AMD: + err = amd_pmu_init(); + break; + default: + return; + } + if (err != 0) { + pr_cont("no PMU driver, software events only.\n"); + return; + } + + pr_cont("%s PMU driver.\n", x86_pmu.name); + + if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { + WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", + x86_pmu.num_events, X86_PMC_MAX_GENERIC); + x86_pmu.num_events = X86_PMC_MAX_GENERIC; + } + perf_event_mask = (1 << x86_pmu.num_events) - 1; + perf_max_events = x86_pmu.num_events; + + if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { + WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", + x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); + x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; + } + + perf_event_mask |= + ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; + x86_pmu.intel_ctrl = perf_event_mask; + + perf_events_lapic_init(); + register_die_notifier(&perf_event_nmi_notifier); + + pr_info("... version: %d\n", x86_pmu.version); + pr_info("... bit width: %d\n", x86_pmu.event_bits); + pr_info("... generic events: %d\n", x86_pmu.num_events); + pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); + pr_info("... max period: %016Lx\n", x86_pmu.max_period); + pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); + pr_info("... event mask: %016Lx\n", perf_event_mask); +} + +static inline void x86_pmu_read(struct perf_event *event) +{ + x86_perf_event_update(event, &event->hw, event->hw.idx); +} + +static const struct pmu pmu = { + .enable = x86_pmu_enable, + .disable = x86_pmu_disable, + .read = x86_pmu_read, + .unthrottle = x86_pmu_unthrottle, +}; + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + int err; + + err = __hw_perf_event_init(event); + if (err) { + if (event->destroy) + event->destroy(event); + return ERR_PTR(err); + } + + return &pmu; +} + +/* + * callchain support + */ + +static inline +void callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} + +static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); +static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); +static DEFINE_PER_CPU(int, in_nmi_frame); + + +static void +backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + /* Ignore warnings */ +} + +static void backtrace_warning(void *data, char *msg) +{ + /* Ignore warnings */ +} + +static int backtrace_stack(void *data, char *name) +{ + per_cpu(in_nmi_frame, smp_processor_id()) = + x86_is_stack_id(NMI_STACK, name); + + return 0; +} + +static void backtrace_address(void *data, unsigned long addr, int reliable) +{ + struct perf_callchain_entry *entry = data; + + if (per_cpu(in_nmi_frame, smp_processor_id())) + return; + + if (reliable) + callchain_store(entry, addr); +} + +static const struct stacktrace_ops backtrace_ops = { + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, +}; + +#include "../dumpstack.h" + +static void +perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + callchain_store(entry, PERF_CONTEXT_KERNEL); + callchain_store(entry, regs->ip); + + dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); +} + +/* + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context + */ +static unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n) +{ + unsigned long offset, addr = (unsigned long)from; + int type = in_nmi() ? KM_NMI : KM_IRQ0; + unsigned long size, len = 0; + struct page *page; + void *map; + int ret; + + do { + ret = __get_user_pages_fast(addr, 1, 0, &page); + if (!ret) + break; + + offset = addr & (PAGE_SIZE - 1); + size = min(PAGE_SIZE - offset, n - len); + + map = kmap_atomic(page, type); + memcpy(to, map+offset, size); + kunmap_atomic(map, type); + put_page(page); + + len += size; + to += size; + addr += size; + + } while (len < n); + + return len; +} + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + unsigned long bytes; + + bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); + + return bytes == sizeof(*frame); +} + +static void +perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + struct stack_frame frame; + const void __user *fp; + + if (!user_mode(regs)) + regs = task_pt_regs(current); + + fp = (void __user *)regs->bp; + + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, regs->ip); + + while (entry->nr < PERF_MAX_STACK_DEPTH) { + frame.next_frame = NULL; + frame.return_address = 0; + + if (!copy_stack_frame(fp, &frame)) + break; + + if ((unsigned long)fp < regs->sp) + break; + + callchain_store(entry, frame.return_address); + fp = frame.next_frame; + } +} + +static void +perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + int is_user; + + if (!regs) + return; + + is_user = user_mode(regs); + + if (!current || current->pid == 0) + return; + + if (is_user && current->state != TASK_RUNNING) + return; + + if (!is_user) + perf_callchain_kernel(regs, entry); + + if (current->mm) + perf_callchain_user(regs, entry); +} + +struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + struct perf_callchain_entry *entry; + + if (in_nmi()) + entry = &__get_cpu_var(pmc_nmi_entry); + else + entry = &__get_cpu_var(pmc_irq_entry); + + entry->nr = 0; + + perf_do_callchain(regs, entry); + + return entry; +} + +void hw_perf_event_setup_online(int cpu) +{ + init_debug_store_on_cpu(cpu); +} diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 392bea43b89..fab786f60ed 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -20,7 +20,7 @@ #include #include -#include +#include struct nmi_watchdog_ctlblk { unsigned int cccr_msr; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index d59fe323807..681c3fda739 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1021,7 +1021,7 @@ apicinterrupt ERROR_APIC_VECTOR \ apicinterrupt SPURIOUS_APIC_VECTOR \ spurious_interrupt smp_spurious_interrupt -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS apicinterrupt LOCAL_PENDING_VECTOR \ perf_pending_interrupt smp_perf_pending_interrupt #endif diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 300883112e3..40f30773fb2 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -208,7 +208,7 @@ static void __init apic_intr_init(void) alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); /* Performance monitoring interrupts: */ -# ifdef CONFIG_PERF_COUNTERS +# ifdef CONFIG_PERF_EVENTS alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); # endif diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d51321ddafd..0157cd26d7c 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -335,4 +335,4 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 775a020990a..82728f2c6d5 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -10,7 +10,7 @@ #include /* max_low_pfn */ #include /* __kprobes, ... */ #include /* kmmio_handler, ... */ -#include /* perf_swcounter_event */ +#include /* perf_sw_event */ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ @@ -1017,7 +1017,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -1114,11 +1114,11 @@ good_area: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 4899215999d..8eb05878554 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -234,11 +234,11 @@ static void arch_perfmon_setup_counters(void) if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 15) { eax.split.version_id = 2; - eax.split.num_counters = 2; + eax.split.num_events = 2; eax.split.bit_width = 40; } - num_counters = eax.split.num_counters; + num_counters = eax.split.num_events; op_arch_perfmon_spec.num_counters = num_counters; op_arch_perfmon_spec.num_controls = num_counters; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index b83776180c7..7b8e75d1608 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -13,7 +13,7 @@ #define OP_X86_MODEL_H #include -#include +#include struct op_msr { unsigned long addr; diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 50eecfe1d72..44203ff599d 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -252,7 +252,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty) struct pt_regs *regs = get_irq_regs(); if (regs) show_regs(regs); - perf_counter_print_debug(); + perf_event_print_debug(); } static struct sysrq_key_op sysrq_showregs_op = { .handler = sysrq_handle_showregs, diff --git a/fs/exec.c b/fs/exec.c index 172ceb6edde..434dba778cc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include @@ -923,7 +923,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) task_lock(tsk); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); - perf_counter_comm(tsk); + perf_event_comm(tsk); } int flush_old_exec(struct linux_binprm * bprm) @@ -997,7 +997,7 @@ int flush_old_exec(struct linux_binprm * bprm) * security domain: */ if (!get_dumpable(current->mm)) - perf_counter_exit_task(current); + perf_event_exit_task(current); /* An exec changes our domain. We are no longer part of the thread group */ diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 1125e5a1ee5..d76b66acea9 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -620,8 +620,8 @@ __SYSCALL(__NR_move_pages, sys_move_pages) #define __NR_rt_tgsigqueueinfo 240 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) -#define __NR_perf_counter_open 241 -__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) +#define __NR_perf_event_open 241 +__SYSCALL(__NR_perf_event_open, sys_perf_event_open) #undef __NR_syscalls #define __NR_syscalls 242 diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9e7f2e8fc66..21a6f5d9af2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -106,13 +106,13 @@ extern struct group_info init_groups; extern struct cred init_cred; -#ifdef CONFIG_PERF_COUNTERS -# define INIT_PERF_COUNTERS(tsk) \ - .perf_counter_mutex = \ - __MUTEX_INITIALIZER(tsk.perf_counter_mutex), \ - .perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list), +#ifdef CONFIG_PERF_EVENTS +# define INIT_PERF_EVENTS(tsk) \ + .perf_event_mutex = \ + __MUTEX_INITIALIZER(tsk.perf_event_mutex), \ + .perf_event_list = LIST_HEAD_INIT(tsk.perf_event_list), #else -# define INIT_PERF_COUNTERS(tsk) +# define INIT_PERF_EVENTS(tsk) #endif /* @@ -178,7 +178,7 @@ extern struct cred init_cred; }, \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ - INIT_PERF_COUNTERS(tsk) \ + INIT_PERF_EVENTS(tsk) \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h deleted file mode 100644 index f6486273267..00000000000 --- a/include/linux/perf_counter.h +++ /dev/null @@ -1,858 +0,0 @@ -/* - * Performance counters: - * - * Copyright (C) 2008-2009, Thomas Gleixner - * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar - * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra - * - * Data type definitions, declarations, prototypes. - * - * Started by: Thomas Gleixner and Ingo Molnar - * - * For licencing details see kernel-base/COPYING - */ -#ifndef _LINUX_PERF_COUNTER_H -#define _LINUX_PERF_COUNTER_H - -#include -#include -#include - -/* - * User-space ABI bits: - */ - -/* - * attr.type - */ -enum perf_type_id { - PERF_TYPE_HARDWARE = 0, - PERF_TYPE_SOFTWARE = 1, - PERF_TYPE_TRACEPOINT = 2, - PERF_TYPE_HW_CACHE = 3, - PERF_TYPE_RAW = 4, - - PERF_TYPE_MAX, /* non-ABI */ -}; - -/* - * Generalized performance counter event types, used by the - * attr.event_id parameter of the sys_perf_counter_open() - * syscall: - */ -enum perf_hw_id { - /* - * Common hardware events, generalized by the kernel: - */ - PERF_COUNT_HW_CPU_CYCLES = 0, - PERF_COUNT_HW_INSTRUCTIONS = 1, - PERF_COUNT_HW_CACHE_REFERENCES = 2, - PERF_COUNT_HW_CACHE_MISSES = 3, - PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_HW_BRANCH_MISSES = 5, - PERF_COUNT_HW_BUS_CYCLES = 6, - - PERF_COUNT_HW_MAX, /* non-ABI */ -}; - -/* - * Generalized hardware cache counters: - * - * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x - * { read, write, prefetch } x - * { accesses, misses } - */ -enum perf_hw_cache_id { - PERF_COUNT_HW_CACHE_L1D = 0, - PERF_COUNT_HW_CACHE_L1I = 1, - PERF_COUNT_HW_CACHE_LL = 2, - PERF_COUNT_HW_CACHE_DTLB = 3, - PERF_COUNT_HW_CACHE_ITLB = 4, - PERF_COUNT_HW_CACHE_BPU = 5, - - PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ -}; - -enum perf_hw_cache_op_id { - PERF_COUNT_HW_CACHE_OP_READ = 0, - PERF_COUNT_HW_CACHE_OP_WRITE = 1, - PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, - - PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ -}; - -enum perf_hw_cache_op_result_id { - PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, - PERF_COUNT_HW_CACHE_RESULT_MISS = 1, - - PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ -}; - -/* - * Special "software" counters provided by the kernel, even if the hardware - * does not support performance counters. These counters measure various - * physical and sw events of the kernel (and allow the profiling of them as - * well): - */ -enum perf_sw_ids { - PERF_COUNT_SW_CPU_CLOCK = 0, - PERF_COUNT_SW_TASK_CLOCK = 1, - PERF_COUNT_SW_PAGE_FAULTS = 2, - PERF_COUNT_SW_CONTEXT_SWITCHES = 3, - PERF_COUNT_SW_CPU_MIGRATIONS = 4, - PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, - PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, - - PERF_COUNT_SW_MAX, /* non-ABI */ -}; - -/* - * Bits that can be set in attr.sample_type to request information - * in the overflow packets. - */ -enum perf_counter_sample_format { - PERF_SAMPLE_IP = 1U << 0, - PERF_SAMPLE_TID = 1U << 1, - PERF_SAMPLE_TIME = 1U << 2, - PERF_SAMPLE_ADDR = 1U << 3, - PERF_SAMPLE_READ = 1U << 4, - PERF_SAMPLE_CALLCHAIN = 1U << 5, - PERF_SAMPLE_ID = 1U << 6, - PERF_SAMPLE_CPU = 1U << 7, - PERF_SAMPLE_PERIOD = 1U << 8, - PERF_SAMPLE_STREAM_ID = 1U << 9, - PERF_SAMPLE_RAW = 1U << 10, - - PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ -}; - -/* - * The format of the data returned by read() on a perf counter fd, - * as specified by attr.read_format: - * - * struct read_format { - * { u64 value; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING - * { u64 id; } && PERF_FORMAT_ID - * } && !PERF_FORMAT_GROUP - * - * { u64 nr; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING - * { u64 value; - * { u64 id; } && PERF_FORMAT_ID - * } cntr[nr]; - * } && PERF_FORMAT_GROUP - * }; - */ -enum perf_counter_read_format { - PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, - PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, - PERF_FORMAT_ID = 1U << 2, - PERF_FORMAT_GROUP = 1U << 3, - - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ -}; - -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ - -/* - * Hardware event to monitor via a performance monitoring counter: - */ -struct perf_counter_attr { - - /* - * Major type: hardware/software/tracepoint/etc. - */ - __u32 type; - - /* - * Size of the attr structure, for fwd/bwd compat. - */ - __u32 size; - - /* - * Type specific configuration information. - */ - __u64 config; - - union { - __u64 sample_period; - __u64 sample_freq; - }; - - __u64 sample_type; - __u64 read_format; - - __u64 disabled : 1, /* off by default */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only group on PMU */ - exclude_user : 1, /* don't count user */ - exclude_kernel : 1, /* ditto kernel */ - exclude_hv : 1, /* ditto hypervisor */ - exclude_idle : 1, /* don't count when idle */ - mmap : 1, /* include mmap data */ - comm : 1, /* include comm data */ - freq : 1, /* use freq, not period */ - inherit_stat : 1, /* per task counts */ - enable_on_exec : 1, /* next exec enables */ - task : 1, /* trace fork/exit */ - watermark : 1, /* wakeup_watermark */ - - __reserved_1 : 49; - - union { - __u32 wakeup_events; /* wakeup every n events */ - __u32 wakeup_watermark; /* bytes before wakeup */ - }; - __u32 __reserved_2; - - __u64 __reserved_3; -}; - -/* - * Ioctls that can be done on a perf counter fd: - */ -#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) -#define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) -#define PERF_COUNTER_IOC_RESET _IO ('$', 3) -#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) -#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) - -enum perf_counter_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, -}; - -/* - * Structure of the page that can be mapped via mmap - */ -struct perf_counter_mmap_page { - __u32 version; /* version number of this structure */ - __u32 compat_version; /* lowest version this is compat with */ - - /* - * Bits needed to read the hw counters in user-space. - * - * u32 seq; - * s64 count; - * - * do { - * seq = pc->lock; - * - * barrier() - * if (pc->index) { - * count = pmc_read(pc->index - 1); - * count += pc->offset; - * } else - * goto regular_read; - * - * barrier(); - * } while (pc->lock != seq); - * - * NOTE: for obvious reason this only works on self-monitoring - * processes. - */ - __u32 lock; /* seqlock for synchronization */ - __u32 index; /* hardware counter identifier */ - __s64 offset; /* add to hardware counter value */ - __u64 time_enabled; /* time counter active */ - __u64 time_running; /* time counter on cpu */ - - /* - * Hole for extension of the self monitor capabilities - */ - - __u64 __reserved[123]; /* align to 1k */ - - /* - * Control data for the mmap() data buffer. - * - * User-space reading the @data_head value should issue an rmb(), on - * SMP capable platforms, after reading this value -- see - * perf_counter_wakeup(). - * - * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data. In this case - * the kernel will not over-write unread data. - */ - __u64 data_head; /* head in the data section */ - __u64 data_tail; /* user-space written tail */ -}; - -#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) -#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (2 << 0) -#define PERF_EVENT_MISC_HYPERVISOR (3 << 0) - -struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; -}; - -enum perf_event_type { - - /* - * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: - * - * struct { - * struct perf_event_header header; - * - * u32 pid, tid; - * u64 addr; - * u64 len; - * u64 pgoff; - * char filename[]; - * }; - */ - PERF_EVENT_MMAP = 1, - - /* - * struct { - * struct perf_event_header header; - * u64 id; - * u64 lost; - * }; - */ - PERF_EVENT_LOST = 2, - - /* - * struct { - * struct perf_event_header header; - * - * u32 pid, tid; - * char comm[]; - * }; - */ - PERF_EVENT_COMM = 3, - - /* - * struct { - * struct perf_event_header header; - * u32 pid, ppid; - * u32 tid, ptid; - * u64 time; - * }; - */ - PERF_EVENT_EXIT = 4, - - /* - * struct { - * struct perf_event_header header; - * u64 time; - * u64 id; - * u64 stream_id; - * }; - */ - PERF_EVENT_THROTTLE = 5, - PERF_EVENT_UNTHROTTLE = 6, - - /* - * struct { - * struct perf_event_header header; - * u32 pid, ppid; - * u32 tid, ptid; - * { u64 time; } && PERF_SAMPLE_TIME - * }; - */ - PERF_EVENT_FORK = 7, - - /* - * struct { - * struct perf_event_header header; - * u32 pid, tid; - * - * struct read_format values; - * }; - */ - PERF_EVENT_READ = 8, - - /* - * struct { - * struct perf_event_header header; - * - * { u64 ip; } && PERF_SAMPLE_IP - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 addr; } && PERF_SAMPLE_ADDR - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU - * { u64 period; } && PERF_SAMPLE_PERIOD - * - * { struct read_format values; } && PERF_SAMPLE_READ - * - * { u64 nr, - * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN - * - * # - * # The RAW record below is opaque data wrt the ABI - * # - * # That is, the ABI doesn't make any promises wrt to - * # the stability of its content, it may vary depending - * # on event, hardware, kernel version and phase of - * # the moon. - * # - * # In other words, PERF_SAMPLE_RAW contents are not an ABI. - * # - * - * { u32 size; - * char data[size];}&& PERF_SAMPLE_RAW - * }; - */ - PERF_EVENT_SAMPLE = 9, - - PERF_EVENT_MAX, /* non-ABI */ -}; - -enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, - - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, - - PERF_CONTEXT_MAX = (__u64)-4095, -}; - -#define PERF_FLAG_FD_NO_GROUP (1U << 0) -#define PERF_FLAG_FD_OUTPUT (1U << 1) - -#ifdef __KERNEL__ -/* - * Kernel-internal data types and definitions: - */ - -#ifdef CONFIG_PERF_COUNTERS -# include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define PERF_MAX_STACK_DEPTH 255 - -struct perf_callchain_entry { - __u64 nr; - __u64 ip[PERF_MAX_STACK_DEPTH]; -}; - -struct perf_raw_record { - u32 size; - void *data; -}; - -struct task_struct; - -/** - * struct hw_perf_counter - performance counter hardware details: - */ -struct hw_perf_counter { -#ifdef CONFIG_PERF_COUNTERS - union { - struct { /* hardware */ - u64 config; - unsigned long config_base; - unsigned long counter_base; - int idx; - }; - union { /* software */ - atomic64_t count; - struct hrtimer hrtimer; - }; - }; - atomic64_t prev_count; - u64 sample_period; - u64 last_period; - atomic64_t period_left; - u64 interrupts; - - u64 freq_count; - u64 freq_interrupts; - u64 freq_stamp; -#endif -}; - -struct perf_counter; - -/** - * struct pmu - generic performance monitoring unit - */ -struct pmu { - int (*enable) (struct perf_counter *counter); - void (*disable) (struct perf_counter *counter); - void (*read) (struct perf_counter *counter); - void (*unthrottle) (struct perf_counter *counter); -}; - -/** - * enum perf_counter_active_state - the states of a counter - */ -enum perf_counter_active_state { - PERF_COUNTER_STATE_ERROR = -2, - PERF_COUNTER_STATE_OFF = -1, - PERF_COUNTER_STATE_INACTIVE = 0, - PERF_COUNTER_STATE_ACTIVE = 1, -}; - -struct file; - -struct perf_mmap_data { - struct rcu_head rcu_head; - int nr_pages; /* nr of data pages */ - int writable; /* are we writable */ - int nr_locked; /* nr pages mlocked */ - - atomic_t poll; /* POLL_ for wakeups */ - atomic_t events; /* event limit */ - - atomic_long_t head; /* write position */ - atomic_long_t done_head; /* completed head */ - - atomic_t lock; /* concurrent writes */ - atomic_t wakeup; /* needs a wakeup */ - atomic_t lost; /* nr records lost */ - - long watermark; /* wakeup watermark */ - - struct perf_counter_mmap_page *user_page; - void *data_pages[0]; -}; - -struct perf_pending_entry { - struct perf_pending_entry *next; - void (*func)(struct perf_pending_entry *); -}; - -/** - * struct perf_counter - performance counter kernel representation: - */ -struct perf_counter { -#ifdef CONFIG_PERF_COUNTERS - struct list_head group_entry; - struct list_head event_entry; - struct list_head sibling_list; - int nr_siblings; - struct perf_counter *group_leader; - struct perf_counter *output; - const struct pmu *pmu; - - enum perf_counter_active_state state; - atomic64_t count; - - /* - * These are the total time in nanoseconds that the counter - * has been enabled (i.e. eligible to run, and the task has - * been scheduled in, if this is a per-task counter) - * and running (scheduled onto the CPU), respectively. - * - * They are computed from tstamp_enabled, tstamp_running and - * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. - */ - u64 total_time_enabled; - u64 total_time_running; - - /* - * These are timestamps used for computing total_time_enabled - * and total_time_running when the counter is in INACTIVE or - * ACTIVE state, measured in nanoseconds from an arbitrary point - * in time. - * tstamp_enabled: the notional time when the counter was enabled - * tstamp_running: the notional time when the counter was scheduled on - * tstamp_stopped: in INACTIVE state, the notional time when the - * counter was scheduled off. - */ - u64 tstamp_enabled; - u64 tstamp_running; - u64 tstamp_stopped; - - struct perf_counter_attr attr; - struct hw_perf_counter hw; - - struct perf_counter_context *ctx; - struct file *filp; - - /* - * These accumulate total time (in nanoseconds) that children - * counters have been enabled and running, respectively. - */ - atomic64_t child_total_time_enabled; - atomic64_t child_total_time_running; - - /* - * Protect attach/detach and child_list: - */ - struct mutex child_mutex; - struct list_head child_list; - struct perf_counter *parent; - - int oncpu; - int cpu; - - struct list_head owner_entry; - struct task_struct *owner; - - /* mmap bits */ - struct mutex mmap_mutex; - atomic_t mmap_count; - struct perf_mmap_data *data; - - /* poll related */ - wait_queue_head_t waitq; - struct fasync_struct *fasync; - - /* delayed work for NMIs and such */ - int pending_wakeup; - int pending_kill; - int pending_disable; - struct perf_pending_entry pending; - - atomic_t event_limit; - - void (*destroy)(struct perf_counter *); - struct rcu_head rcu_head; - - struct pid_namespace *ns; - u64 id; -#endif -}; - -/** - * struct perf_counter_context - counter context structure - * - * Used as a container for task counters and CPU counters as well: - */ -struct perf_counter_context { - /* - * Protect the states of the counters in the list, - * nr_active, and the list: - */ - spinlock_t lock; - /* - * Protect the list of counters. Locking either mutex or lock - * is sufficient to ensure the list doesn't change; to change - * the list you need to lock both the mutex and the spinlock. - */ - struct mutex mutex; - - struct list_head group_list; - struct list_head event_list; - int nr_counters; - int nr_active; - int is_active; - int nr_stat; - atomic_t refcount; - struct task_struct *task; - - /* - * Context clock, runs when context enabled. - */ - u64 time; - u64 timestamp; - - /* - * These fields let us detect when two contexts have both - * been cloned (inherited) from a common ancestor. - */ - struct perf_counter_context *parent_ctx; - u64 parent_gen; - u64 generation; - int pin_count; - struct rcu_head rcu_head; -}; - -/** - * struct perf_counter_cpu_context - per cpu counter context structure - */ -struct perf_cpu_context { - struct perf_counter_context ctx; - struct perf_counter_context *task_ctx; - int active_oncpu; - int max_pertask; - int exclusive; - - /* - * Recursion avoidance: - * - * task, softirq, irq, nmi context - */ - int recursion[4]; -}; - -struct perf_output_handle { - struct perf_counter *counter; - struct perf_mmap_data *data; - unsigned long head; - unsigned long offset; - int nmi; - int sample; - int locked; - unsigned long flags; -}; - -#ifdef CONFIG_PERF_COUNTERS - -/* - * Set by architecture code: - */ -extern int perf_max_counters; - -extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); - -extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); -extern void perf_counter_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu); -extern void perf_counter_task_tick(struct task_struct *task, int cpu); -extern int perf_counter_init_task(struct task_struct *child); -extern void perf_counter_exit_task(struct task_struct *child); -extern void perf_counter_free_task(struct task_struct *task); -extern void set_perf_counter_pending(void); -extern void perf_counter_do_pending(void); -extern void perf_counter_print_debug(void); -extern void __perf_disable(void); -extern bool __perf_enable(void); -extern void perf_disable(void); -extern void perf_enable(void); -extern int perf_counter_task_disable(void); -extern int perf_counter_task_enable(void); -extern int hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu); -extern void perf_counter_update_userpage(struct perf_counter *counter); - -struct perf_sample_data { - u64 type; - - u64 ip; - struct { - u32 pid; - u32 tid; - } tid_entry; - u64 time; - u64 addr; - u64 id; - u64 stream_id; - struct { - u32 cpu; - u32 reserved; - } cpu_entry; - u64 period; - struct perf_callchain_entry *callchain; - struct perf_raw_record *raw; -}; - -extern void perf_output_sample(struct perf_output_handle *handle, - struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter); -extern void perf_prepare_sample(struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter, - struct pt_regs *regs); - -extern int perf_counter_overflow(struct perf_counter *counter, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs); - -/* - * Return 1 for a software counter, 0 for a hardware counter - */ -static inline int is_software_counter(struct perf_counter *counter) -{ - return (counter->attr.type != PERF_TYPE_RAW) && - (counter->attr.type != PERF_TYPE_HARDWARE) && - (counter->attr.type != PERF_TYPE_HW_CACHE); -} - -extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; - -extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); - -static inline void -perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) -{ - if (atomic_read(&perf_swcounter_enabled[event])) - __perf_swcounter_event(event, nr, nmi, regs, addr); -} - -extern void __perf_counter_mmap(struct vm_area_struct *vma); - -static inline void perf_counter_mmap(struct vm_area_struct *vma) -{ - if (vma->vm_flags & VM_EXEC) - __perf_counter_mmap(vma); -} - -extern void perf_counter_comm(struct task_struct *tsk); -extern void perf_counter_fork(struct task_struct *tsk); - -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); - -extern int sysctl_perf_counter_paranoid; -extern int sysctl_perf_counter_mlock; -extern int sysctl_perf_counter_sample_rate; - -extern void perf_counter_init(void); -extern void perf_tpcounter_event(int event_id, u64 addr, u64 count, - void *record, int entry_size); - -#ifndef perf_misc_flags -#define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ - PERF_EVENT_MISC_KERNEL) -#define perf_instruction_pointer(regs) instruction_pointer(regs) -#endif - -extern int perf_output_begin(struct perf_output_handle *handle, - struct perf_counter *counter, unsigned int size, - int nmi, int sample); -extern void perf_output_end(struct perf_output_handle *handle); -extern void perf_output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len); -#else -static inline void -perf_counter_task_sched_in(struct task_struct *task, int cpu) { } -static inline void -perf_counter_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) { } -static inline void -perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline int perf_counter_init_task(struct task_struct *child) { return 0; } -static inline void perf_counter_exit_task(struct task_struct *child) { } -static inline void perf_counter_free_task(struct task_struct *task) { } -static inline void perf_counter_do_pending(void) { } -static inline void perf_counter_print_debug(void) { } -static inline void perf_disable(void) { } -static inline void perf_enable(void) { } -static inline int perf_counter_task_disable(void) { return -EINVAL; } -static inline int perf_counter_task_enable(void) { return -EINVAL; } - -static inline void -perf_swcounter_event(u32 event, u64 nr, int nmi, - struct pt_regs *regs, u64 addr) { } - -static inline void perf_counter_mmap(struct vm_area_struct *vma) { } -static inline void perf_counter_comm(struct task_struct *tsk) { } -static inline void perf_counter_fork(struct task_struct *tsk) { } -static inline void perf_counter_init(void) { } - -#endif - -#define perf_output_put(handle, x) \ - perf_output_copy((handle), &(x), sizeof(x)) - -#endif /* __KERNEL__ */ -#endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h new file mode 100644 index 00000000000..ae9d9ed6df2 --- /dev/null +++ b/include/linux/perf_event.h @@ -0,0 +1,858 @@ +/* + * Performance events: + * + * Copyright (C) 2008-2009, Thomas Gleixner + * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_PERF_EVENT_H +#define _LINUX_PERF_EVENT_H + +#include +#include +#include + +/* + * User-space ABI bits: + */ + +/* + * attr.type + */ +enum perf_type_id { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, + + PERF_TYPE_MAX, /* non-ABI */ +}; + +/* + * Generalized performance event event_id types, used by the + * attr.event_id parameter of the sys_perf_event_open() + * syscall: + */ +enum perf_hw_id { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + + PERF_COUNT_HW_MAX, /* non-ABI */ +}; + +/* + * Generalized hardware cache events: + * + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x + * { read, write, prefetch } x + * { accesses, misses } + */ +enum perf_hw_cache_id { + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_LL = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + + PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, + + PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, + PERF_COUNT_HW_CACHE_RESULT_MISS = 1, + + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ +}; + +/* + * Special "software" events provided by the kernel, even if the hardware + * does not support performance events. These events measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): + */ +enum perf_sw_ids { + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + + PERF_COUNT_SW_MAX, /* non-ABI */ +}; + +/* + * Bits that can be set in attr.sample_type to request information + * in the overflow packets. + */ +enum perf_event_sample_format { + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_READ = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_ID = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, + PERF_SAMPLE_STREAM_ID = 1U << 9, + PERF_SAMPLE_RAW = 1U << 10, + + PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ +}; + +/* + * The format of the data returned by read() on a perf event fd, + * as specified by attr.read_format: + * + * struct read_format { + * { u64 value; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 id; } && PERF_FORMAT_ID + * } && !PERF_FORMAT_GROUP + * + * { u64 nr; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 value; + * { u64 id; } && PERF_FORMAT_ID + * } cntr[nr]; + * } && PERF_FORMAT_GROUP + * }; + */ +enum perf_event_read_format { + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, + PERF_FORMAT_GROUP = 1U << 3, + + PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ +}; + +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + +/* + * Hardware event_id to monitor via a performance monitoring event: + */ +struct perf_event_attr { + + /* + * Major type: hardware/software/tracepoint/etc. + */ + __u32 type; + + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; + + /* + * Type specific configuration information. + */ + __u64 config; + + union { + __u64 sample_period; + __u64 sample_freq; + }; + + __u64 sample_type; + __u64 read_format; + + __u64 disabled : 1, /* off by default */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + mmap : 1, /* include mmap data */ + comm : 1, /* include comm data */ + freq : 1, /* use freq, not period */ + inherit_stat : 1, /* per task counts */ + enable_on_exec : 1, /* next exec enables */ + task : 1, /* trace fork/exit */ + watermark : 1, /* wakeup_watermark */ + + __reserved_1 : 49; + + union { + __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_watermark; /* bytes before wakeup */ + }; + __u32 __reserved_2; + + __u64 __reserved_3; +}; + +/* + * Ioctls that can be done on a perf event fd: + */ +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) + +enum perf_event_ioc_flags { + PERF_IOC_FLAG_GROUP = 1U << 0, +}; + +/* + * Structure of the page that can be mapped via mmap + */ +struct perf_event_mmap_page { + __u32 version; /* version number of this structure */ + __u32 compat_version; /* lowest version this is compat with */ + + /* + * Bits needed to read the hw events in user-space. + * + * u32 seq; + * s64 count; + * + * do { + * seq = pc->lock; + * + * barrier() + * if (pc->index) { + * count = pmc_read(pc->index - 1); + * count += pc->offset; + * } else + * goto regular_read; + * + * barrier(); + * } while (pc->lock != seq); + * + * NOTE: for obvious reason this only works on self-monitoring + * processes. + */ + __u32 lock; /* seqlock for synchronization */ + __u32 index; /* hardware event identifier */ + __s64 offset; /* add to hardware event value */ + __u64 time_enabled; /* time event active */ + __u64 time_running; /* time event on cpu */ + + /* + * Hole for extension of the self monitor capabilities + */ + + __u64 __reserved[123]; /* align to 1k */ + + /* + * Control data for the mmap() data buffer. + * + * User-space reading the @data_head value should issue an rmb(), on + * SMP capable platforms, after reading this value -- see + * perf_event_wakeup(). + * + * When the mapping is PROT_WRITE the @data_tail value should be + * written by userspace to reflect the last read data. In this case + * the kernel will not over-write unread data. + */ + __u64 data_head; /* head in the data section */ + __u64 data_tail; /* user-space written tail */ +}; + +#define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0) +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_RECORD_MISC_KERNEL (1 << 0) +#define PERF_RECORD_MISC_USER (2 << 0) +#define PERF_RECORD_MISC_HYPERVISOR (3 << 0) + +struct perf_event_header { + __u32 type; + __u16 misc; + __u16 size; +}; + +enum perf_event_type { + + /* + * The MMAP events record the PROT_EXEC mappings so that we can + * correlate userspace IPs to code. They have the following structure: + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; + * }; + */ + PERF_RECORD_MMAP = 1, + + /* + * struct { + * struct perf_event_header header; + * u64 id; + * u64 lost; + * }; + */ + PERF_RECORD_LOST = 2, + + /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * char comm[]; + * }; + */ + PERF_RECORD_COMM = 3, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * u64 time; + * }; + */ + PERF_RECORD_EXIT = 4, + + /* + * struct { + * struct perf_event_header header; + * u64 time; + * u64 id; + * u64 stream_id; + * }; + */ + PERF_RECORD_THROTTLE = 5, + PERF_RECORD_UNTHROTTLE = 6, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * { u64 time; } && PERF_SAMPLE_TIME + * }; + */ + PERF_RECORD_FORK = 7, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, tid; + * + * struct read_format values; + * }; + */ + PERF_RECORD_READ = 8, + + /* + * struct { + * struct perf_event_header header; + * + * { u64 ip; } && PERF_SAMPLE_IP + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 addr; } && PERF_SAMPLE_ADDR + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 period; } && PERF_SAMPLE_PERIOD + * + * { struct read_format values; } && PERF_SAMPLE_READ + * + * { u64 nr, + * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN + * + * # + * # The RAW record below is opaque data wrt the ABI + * # + * # That is, the ABI doesn't make any promises wrt to + * # the stability of its content, it may vary depending + * # on event_id, hardware, kernel version and phase of + * # the moon. + * # + * # In other words, PERF_SAMPLE_RAW contents are not an ABI. + * # + * + * { u32 size; + * char data[size];}&& PERF_SAMPLE_RAW + * }; + */ + PERF_RECORD_SAMPLE = 9, + + PERF_RECORD_MAX, /* non-ABI */ +}; + +enum perf_callchain_context { + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, + + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, + + PERF_CONTEXT_MAX = (__u64)-4095, +}; + +#define PERF_FLAG_FD_NO_GROUP (1U << 0) +#define PERF_FLAG_FD_OUTPUT (1U << 1) + +#ifdef __KERNEL__ +/* + * Kernel-internal data types and definitions: + */ + +#ifdef CONFIG_PERF_EVENTS +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PERF_MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + __u64 nr; + __u64 ip[PERF_MAX_STACK_DEPTH]; +}; + +struct perf_raw_record { + u32 size; + void *data; +}; + +struct task_struct; + +/** + * struct hw_perf_event - performance event hardware details: + */ +struct hw_perf_event { +#ifdef CONFIG_PERF_EVENTS + union { + struct { /* hardware */ + u64 config; + unsigned long config_base; + unsigned long event_base; + int idx; + }; + union { /* software */ + atomic64_t count; + struct hrtimer hrtimer; + }; + }; + atomic64_t prev_count; + u64 sample_period; + u64 last_period; + atomic64_t period_left; + u64 interrupts; + + u64 freq_count; + u64 freq_interrupts; + u64 freq_stamp; +#endif +}; + +struct perf_event; + +/** + * struct pmu - generic performance monitoring unit + */ +struct pmu { + int (*enable) (struct perf_event *event); + void (*disable) (struct perf_event *event); + void (*read) (struct perf_event *event); + void (*unthrottle) (struct perf_event *event); +}; + +/** + * enum perf_event_active_state - the states of a event + */ +enum perf_event_active_state { + PERF_EVENT_STATE_ERROR = -2, + PERF_EVENT_STATE_OFF = -1, + PERF_EVENT_STATE_INACTIVE = 0, + PERF_EVENT_STATE_ACTIVE = 1, +}; + +struct file; + +struct perf_mmap_data { + struct rcu_head rcu_head; + int nr_pages; /* nr of data pages */ + int writable; /* are we writable */ + int nr_locked; /* nr pages mlocked */ + + atomic_t poll; /* POLL_ for wakeups */ + atomic_t events; /* event_id limit */ + + atomic_long_t head; /* write position */ + atomic_long_t done_head; /* completed head */ + + atomic_t lock; /* concurrent writes */ + atomic_t wakeup; /* needs a wakeup */ + atomic_t lost; /* nr records lost */ + + long watermark; /* wakeup watermark */ + + struct perf_event_mmap_page *user_page; + void *data_pages[0]; +}; + +struct perf_pending_entry { + struct perf_pending_entry *next; + void (*func)(struct perf_pending_entry *); +}; + +/** + * struct perf_event - performance event kernel representation: + */ +struct perf_event { +#ifdef CONFIG_PERF_EVENTS + struct list_head group_entry; + struct list_head event_entry; + struct list_head sibling_list; + int nr_siblings; + struct perf_event *group_leader; + struct perf_event *output; + const struct pmu *pmu; + + enum perf_event_active_state state; + atomic64_t count; + + /* + * These are the total time in nanoseconds that the event + * has been enabled (i.e. eligible to run, and the task has + * been scheduled in, if this is a per-task event) + * and running (scheduled onto the CPU), respectively. + * + * They are computed from tstamp_enabled, tstamp_running and + * tstamp_stopped when the event is in INACTIVE or ACTIVE state. + */ + u64 total_time_enabled; + u64 total_time_running; + + /* + * These are timestamps used for computing total_time_enabled + * and total_time_running when the event is in INACTIVE or + * ACTIVE state, measured in nanoseconds from an arbitrary point + * in time. + * tstamp_enabled: the notional time when the event was enabled + * tstamp_running: the notional time when the event was scheduled on + * tstamp_stopped: in INACTIVE state, the notional time when the + * event was scheduled off. + */ + u64 tstamp_enabled; + u64 tstamp_running; + u64 tstamp_stopped; + + struct perf_event_attr attr; + struct hw_perf_event hw; + + struct perf_event_context *ctx; + struct file *filp; + + /* + * These accumulate total time (in nanoseconds) that children + * events have been enabled and running, respectively. + */ + atomic64_t child_total_time_enabled; + atomic64_t child_total_time_running; + + /* + * Protect attach/detach and child_list: + */ + struct mutex child_mutex; + struct list_head child_list; + struct perf_event *parent; + + int oncpu; + int cpu; + + struct list_head owner_entry; + struct task_struct *owner; + + /* mmap bits */ + struct mutex mmap_mutex; + atomic_t mmap_count; + struct perf_mmap_data *data; + + /* poll related */ + wait_queue_head_t waitq; + struct fasync_struct *fasync; + + /* delayed work for NMIs and such */ + int pending_wakeup; + int pending_kill; + int pending_disable; + struct perf_pending_entry pending; + + atomic_t event_limit; + + void (*destroy)(struct perf_event *); + struct rcu_head rcu_head; + + struct pid_namespace *ns; + u64 id; +#endif +}; + +/** + * struct perf_event_context - event context structure + * + * Used as a container for task events and CPU events as well: + */ +struct perf_event_context { + /* + * Protect the states of the events in the list, + * nr_active, and the list: + */ + spinlock_t lock; + /* + * Protect the list of events. Locking either mutex or lock + * is sufficient to ensure the list doesn't change; to change + * the list you need to lock both the mutex and the spinlock. + */ + struct mutex mutex; + + struct list_head group_list; + struct list_head event_list; + int nr_events; + int nr_active; + int is_active; + int nr_stat; + atomic_t refcount; + struct task_struct *task; + + /* + * Context clock, runs when context enabled. + */ + u64 time; + u64 timestamp; + + /* + * These fields let us detect when two contexts have both + * been cloned (inherited) from a common ancestor. + */ + struct perf_event_context *parent_ctx; + u64 parent_gen; + u64 generation; + int pin_count; + struct rcu_head rcu_head; +}; + +/** + * struct perf_event_cpu_context - per cpu event context structure + */ +struct perf_cpu_context { + struct perf_event_context ctx; + struct perf_event_context *task_ctx; + int active_oncpu; + int max_pertask; + int exclusive; + + /* + * Recursion avoidance: + * + * task, softirq, irq, nmi context + */ + int recursion[4]; +}; + +struct perf_output_handle { + struct perf_event *event; + struct perf_mmap_data *data; + unsigned long head; + unsigned long offset; + int nmi; + int sample; + int locked; + unsigned long flags; +}; + +#ifdef CONFIG_PERF_EVENTS + +/* + * Set by architecture code: + */ +extern int perf_max_events; + +extern const struct pmu *hw_perf_event_init(struct perf_event *event); + +extern void perf_event_task_sched_in(struct task_struct *task, int cpu); +extern void perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu); +extern void perf_event_task_tick(struct task_struct *task, int cpu); +extern int perf_event_init_task(struct task_struct *child); +extern void perf_event_exit_task(struct task_struct *child); +extern void perf_event_free_task(struct task_struct *task); +extern void set_perf_event_pending(void); +extern void perf_event_do_pending(void); +extern void perf_event_print_debug(void); +extern void __perf_disable(void); +extern bool __perf_enable(void); +extern void perf_disable(void); +extern void perf_enable(void); +extern int perf_event_task_disable(void); +extern int perf_event_task_enable(void); +extern int hw_perf_group_sched_in(struct perf_event *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu); +extern void perf_event_update_userpage(struct perf_event *event); + +struct perf_sample_data { + u64 type; + + u64 ip; + struct { + u32 pid; + u32 tid; + } tid_entry; + u64 time; + u64 addr; + u64 id; + u64 stream_id; + struct { + u32 cpu; + u32 reserved; + } cpu_entry; + u64 period; + struct perf_callchain_entry *callchain; + struct perf_raw_record *raw; +}; + +extern void perf_output_sample(struct perf_output_handle *handle, + struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event); +extern void perf_prepare_sample(struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event, + struct pt_regs *regs); + +extern int perf_event_overflow(struct perf_event *event, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs); + +/* + * Return 1 for a software event, 0 for a hardware event + */ +static inline int is_software_event(struct perf_event *event) +{ + return (event->attr.type != PERF_TYPE_RAW) && + (event->attr.type != PERF_TYPE_HARDWARE) && + (event->attr.type != PERF_TYPE_HW_CACHE); +} + +extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; + +extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); + +static inline void +perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) +{ + if (atomic_read(&perf_swevent_enabled[event_id])) + __perf_sw_event(event_id, nr, nmi, regs, addr); +} + +extern void __perf_event_mmap(struct vm_area_struct *vma); + +static inline void perf_event_mmap(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_EXEC) + __perf_event_mmap(vma); +} + +extern void perf_event_comm(struct task_struct *tsk); +extern void perf_event_fork(struct task_struct *tsk); + +extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); + +extern int sysctl_perf_event_paranoid; +extern int sysctl_perf_event_mlock; +extern int sysctl_perf_event_sample_rate; + +extern void perf_event_init(void); +extern void perf_tp_event(int event_id, u64 addr, u64 count, + void *record, int entry_size); + +#ifndef perf_misc_flags +#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ + PERF_RECORD_MISC_KERNEL) +#define perf_instruction_pointer(regs) instruction_pointer(regs) +#endif + +extern int perf_output_begin(struct perf_output_handle *handle, + struct perf_event *event, unsigned int size, + int nmi, int sample); +extern void perf_output_end(struct perf_output_handle *handle); +extern void perf_output_copy(struct perf_output_handle *handle, + const void *buf, unsigned int len); +#else +static inline void +perf_event_task_sched_in(struct task_struct *task, int cpu) { } +static inline void +perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu) { } +static inline void +perf_event_task_tick(struct task_struct *task, int cpu) { } +static inline int perf_event_init_task(struct task_struct *child) { return 0; } +static inline void perf_event_exit_task(struct task_struct *child) { } +static inline void perf_event_free_task(struct task_struct *task) { } +static inline void perf_event_do_pending(void) { } +static inline void perf_event_print_debug(void) { } +static inline void perf_disable(void) { } +static inline void perf_enable(void) { } +static inline int perf_event_task_disable(void) { return -EINVAL; } +static inline int perf_event_task_enable(void) { return -EINVAL; } + +static inline void +perf_sw_event(u32 event_id, u64 nr, int nmi, + struct pt_regs *regs, u64 addr) { } + +static inline void perf_event_mmap(struct vm_area_struct *vma) { } +static inline void perf_event_comm(struct task_struct *tsk) { } +static inline void perf_event_fork(struct task_struct *tsk) { } +static inline void perf_event_init(void) { } + +#endif + +#define perf_output_put(handle, x) \ + perf_output_copy((handle), &(x), sizeof(x)) + +#endif /* __KERNEL__ */ +#endif /* _LINUX_PERF_EVENT_H */ diff --git a/include/linux/prctl.h b/include/linux/prctl.h index b00df4c79c6..07bff666e65 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -85,7 +85,7 @@ #define PR_SET_TIMERSLACK 29 #define PR_GET_TIMERSLACK 30 -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 +#define PR_TASK_PERF_EVENTS_DISABLE 31 +#define PR_TASK_PERF_EVENTS_ENABLE 32 #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 8af3d249170..8b265a8986d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -100,7 +100,7 @@ struct robust_list_head; struct bio; struct fs_struct; struct bts_context; -struct perf_counter_context; +struct perf_event_context; /* * List of flags we want to share for kernel threads, @@ -701,7 +701,7 @@ struct user_struct { #endif #endif -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS atomic_long_t locked_vm; #endif }; @@ -1449,10 +1449,10 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif -#ifdef CONFIG_PERF_COUNTERS - struct perf_counter_context *perf_counter_ctxp; - struct mutex perf_counter_mutex; - struct list_head perf_counter_list; +#ifdef CONFIG_PERF_EVENTS + struct perf_event_context *perf_event_ctxp; + struct mutex perf_event_mutex; + struct list_head perf_event_list; #endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; /* Protected by alloc_lock */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a8e37821cc6..02f19f9a76c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -55,7 +55,7 @@ struct compat_timeval; struct robust_list_head; struct getcpu_cache; struct old_linux_dirent; -struct perf_counter_attr; +struct perf_event_attr; #include #include @@ -885,7 +885,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -asmlinkage long sys_perf_counter_open( - struct perf_counter_attr __user *attr_uptr, +asmlinkage long sys_perf_event_open( + struct perf_event_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 72a3b437b82..ec91e78244f 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -378,7 +378,7 @@ static inline int ftrace_get_offsets_##call( \ #ifdef CONFIG_EVENT_PROFILE /* - * Generate the functions needed for tracepoint perf_counter support. + * Generate the functions needed for tracepoint perf_event support. * * NOTE: The insertion profile callback (ftrace_profile_) is defined later * @@ -656,7 +656,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * { * struct ftrace_data_offsets_ __maybe_unused __data_offsets; * struct ftrace_event_call *event_call = &event_; - * extern void perf_tpcounter_event(int, u64, u64, void *, int); + * extern void perf_tp_event(int, u64, u64, void *, int); * struct ftrace_raw_##call *entry; * u64 __addr = 0, __count = 1; * unsigned long irq_flags; @@ -691,7 +691,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * * <- affect our values * - * perf_tpcounter_event(event_call->id, __addr, __count, entry, + * perf_tp_event(event_call->id, __addr, __count, entry, * __entry_size); <- submit them to perf counter * } while (0); * @@ -712,7 +712,7 @@ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_event_call *event_call = &event_##call; \ - extern void perf_tpcounter_event(int, u64, u64, void *, int); \ + extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ @@ -742,7 +742,7 @@ static void ftrace_profile_##call(proto) \ \ { assign; } \ \ - perf_tpcounter_event(event_call->id, __addr, __count, entry,\ + perf_tp_event(event_call->id, __addr, __count, entry,\ __entry_size); \ } while (0); \ \ diff --git a/init/Kconfig b/init/Kconfig index 8e8b76d8a27..cfdf5c32280 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -915,17 +915,17 @@ config AIO by some high performance threaded applications. Disabling this option saves about 7k. -config HAVE_PERF_COUNTERS +config HAVE_PERF_EVENTS bool help See tools/perf/design.txt for details. menu "Performance Counters" -config PERF_COUNTERS +config PERF_EVENTS bool "Kernel Performance Counters" default y if PROFILING - depends on HAVE_PERF_COUNTERS + depends on HAVE_PERF_EVENTS select ANON_INODES help Enable kernel support for performance counter hardware. @@ -947,7 +947,7 @@ config PERF_COUNTERS config EVENT_PROFILE bool "Tracepoint profiling sources" - depends on PERF_COUNTERS && EVENT_TRACING + depends on PERF_EVENTS && EVENT_TRACING default y help Allow the use of tracepoints as software performance counters. diff --git a/kernel/Makefile b/kernel/Makefile index 3d9c7e27e3f..e26a546eac4 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -96,7 +96,7 @@ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/exit.c b/kernel/exit.c index ae5d8660ddf..e47ee8a0613 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include @@ -154,8 +154,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); -#ifdef CONFIG_PERF_COUNTERS - WARN_ON_ONCE(tsk->perf_counter_ctxp); +#ifdef CONFIG_PERF_EVENTS + WARN_ON_ONCE(tsk->perf_event_ctxp); #endif trace_sched_process_free(tsk); put_task_struct(tsk); @@ -981,7 +981,7 @@ NORET_TYPE void do_exit(long code) * Flush inherited counters to the parent - before the parent * gets woken up by child-exit notifications. */ - perf_counter_exit_task(tsk); + perf_event_exit_task(tsk); exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA diff --git a/kernel/fork.c b/kernel/fork.c index bfee931ee3f..2cebfb23b0b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -61,7 +61,7 @@ #include #include #include -#include +#include #include #include @@ -1078,7 +1078,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); - retval = perf_counter_init_task(p); + retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; @@ -1253,7 +1253,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); - perf_counter_fork(p); + perf_event_fork(p); return p; bad_fork_free_pid: @@ -1280,7 +1280,7 @@ bad_fork_cleanup_semundo: bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: - perf_counter_free_task(p); + perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c deleted file mode 100644 index 62de0db8092..00000000000 --- a/kernel/perf_counter.c +++ /dev/null @@ -1,5000 +0,0 @@ -/* - * Performance counter core code - * - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - * Copyright © 2009 Paul Mackerras, IBM Corp. - * - * For licensing details see kernel-base/COPYING - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* - * Each CPU has a list of per CPU counters: - */ -DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); - -int perf_max_counters __read_mostly = 1; -static int perf_reserved_percpu __read_mostly; -static int perf_overcommit __read_mostly = 1; - -static atomic_t nr_counters __read_mostly; -static atomic_t nr_mmap_counters __read_mostly; -static atomic_t nr_comm_counters __read_mostly; -static atomic_t nr_task_counters __read_mostly; - -/* - * perf counter paranoia level: - * -1 - not paranoid at all - * 0 - disallow raw tracepoint access for unpriv - * 1 - disallow cpu counters for unpriv - * 2 - disallow kernel profiling for unpriv - */ -int sysctl_perf_counter_paranoid __read_mostly = 1; - -static inline bool perf_paranoid_tracepoint_raw(void) -{ - return sysctl_perf_counter_paranoid > -1; -} - -static inline bool perf_paranoid_cpu(void) -{ - return sysctl_perf_counter_paranoid > 0; -} - -static inline bool perf_paranoid_kernel(void) -{ - return sysctl_perf_counter_paranoid > 1; -} - -int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ - -/* - * max perf counter sample rate - */ -int sysctl_perf_counter_sample_rate __read_mostly = 100000; - -static atomic64_t perf_counter_id; - -/* - * Lock for (sysadmin-configurable) counter reservations: - */ -static DEFINE_SPINLOCK(perf_resource_lock); - -/* - * Architecture provided APIs - weak aliases: - */ -extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - return NULL; -} - -void __weak hw_perf_disable(void) { barrier(); } -void __weak hw_perf_enable(void) { barrier(); } - -void __weak hw_perf_counter_setup(int cpu) { barrier(); } -void __weak hw_perf_counter_setup_online(int cpu) { barrier(); } - -int __weak -hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu) -{ - return 0; -} - -void __weak perf_counter_print_debug(void) { } - -static DEFINE_PER_CPU(int, perf_disable_count); - -void __perf_disable(void) -{ - __get_cpu_var(perf_disable_count)++; -} - -bool __perf_enable(void) -{ - return !--__get_cpu_var(perf_disable_count); -} - -void perf_disable(void) -{ - __perf_disable(); - hw_perf_disable(); -} - -void perf_enable(void) -{ - if (__perf_enable()) - hw_perf_enable(); -} - -static void get_ctx(struct perf_counter_context *ctx) -{ - WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); -} - -static void free_ctx(struct rcu_head *head) -{ - struct perf_counter_context *ctx; - - ctx = container_of(head, struct perf_counter_context, rcu_head); - kfree(ctx); -} - -static void put_ctx(struct perf_counter_context *ctx) -{ - if (atomic_dec_and_test(&ctx->refcount)) { - if (ctx->parent_ctx) - put_ctx(ctx->parent_ctx); - if (ctx->task) - put_task_struct(ctx->task); - call_rcu(&ctx->rcu_head, free_ctx); - } -} - -static void unclone_ctx(struct perf_counter_context *ctx) -{ - if (ctx->parent_ctx) { - put_ctx(ctx->parent_ctx); - ctx->parent_ctx = NULL; - } -} - -/* - * If we inherit counters we want to return the parent counter id - * to userspace. - */ -static u64 primary_counter_id(struct perf_counter *counter) -{ - u64 id = counter->id; - - if (counter->parent) - id = counter->parent->id; - - return id; -} - -/* - * Get the perf_counter_context for a task and lock it. - * This has to cope with with the fact that until it is locked, - * the context could get moved to another task. - */ -static struct perf_counter_context * -perf_lock_task_context(struct task_struct *task, unsigned long *flags) -{ - struct perf_counter_context *ctx; - - rcu_read_lock(); - retry: - ctx = rcu_dereference(task->perf_counter_ctxp); - if (ctx) { - /* - * If this context is a clone of another, it might - * get swapped for another underneath us by - * perf_counter_task_sched_out, though the - * rcu_read_lock() protects us from any context - * getting freed. Lock the context and check if it - * got swapped before we could get the lock, and retry - * if so. If we locked the right context, then it - * can't get swapped on us any more. - */ - spin_lock_irqsave(&ctx->lock, *flags); - if (ctx != rcu_dereference(task->perf_counter_ctxp)) { - spin_unlock_irqrestore(&ctx->lock, *flags); - goto retry; - } - - if (!atomic_inc_not_zero(&ctx->refcount)) { - spin_unlock_irqrestore(&ctx->lock, *flags); - ctx = NULL; - } - } - rcu_read_unlock(); - return ctx; -} - -/* - * Get the context for a task and increment its pin_count so it - * can't get swapped to another task. This also increments its - * reference count so that the context can't get freed. - */ -static struct perf_counter_context *perf_pin_task_context(struct task_struct *task) -{ - struct perf_counter_context *ctx; - unsigned long flags; - - ctx = perf_lock_task_context(task, &flags); - if (ctx) { - ++ctx->pin_count; - spin_unlock_irqrestore(&ctx->lock, flags); - } - return ctx; -} - -static void perf_unpin_context(struct perf_counter_context *ctx) -{ - unsigned long flags; - - spin_lock_irqsave(&ctx->lock, flags); - --ctx->pin_count; - spin_unlock_irqrestore(&ctx->lock, flags); - put_ctx(ctx); -} - -/* - * Add a counter from the lists for its context. - * Must be called with ctx->mutex and ctx->lock held. - */ -static void -list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) -{ - struct perf_counter *group_leader = counter->group_leader; - - /* - * Depending on whether it is a standalone or sibling counter, - * add it straight to the context's counter list, or to the group - * leader's sibling list: - */ - if (group_leader == counter) - list_add_tail(&counter->group_entry, &ctx->group_list); - else { - list_add_tail(&counter->group_entry, &group_leader->sibling_list); - group_leader->nr_siblings++; - } - - list_add_rcu(&counter->event_entry, &ctx->event_list); - ctx->nr_counters++; - if (counter->attr.inherit_stat) - ctx->nr_stat++; -} - -/* - * Remove a counter from the lists for its context. - * Must be called with ctx->mutex and ctx->lock held. - */ -static void -list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) -{ - struct perf_counter *sibling, *tmp; - - if (list_empty(&counter->group_entry)) - return; - ctx->nr_counters--; - if (counter->attr.inherit_stat) - ctx->nr_stat--; - - list_del_init(&counter->group_entry); - list_del_rcu(&counter->event_entry); - - if (counter->group_leader != counter) - counter->group_leader->nr_siblings--; - - /* - * If this was a group counter with sibling counters then - * upgrade the siblings to singleton counters by adding them - * to the context list directly: - */ - list_for_each_entry_safe(sibling, tmp, &counter->sibling_list, group_entry) { - - list_move_tail(&sibling->group_entry, &ctx->group_list); - sibling->group_leader = sibling; - } -} - -static void -counter_sched_out(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) -{ - if (counter->state != PERF_COUNTER_STATE_ACTIVE) - return; - - counter->state = PERF_COUNTER_STATE_INACTIVE; - if (counter->pending_disable) { - counter->pending_disable = 0; - counter->state = PERF_COUNTER_STATE_OFF; - } - counter->tstamp_stopped = ctx->time; - counter->pmu->disable(counter); - counter->oncpu = -1; - - if (!is_software_counter(counter)) - cpuctx->active_oncpu--; - ctx->nr_active--; - if (counter->attr.exclusive || !cpuctx->active_oncpu) - cpuctx->exclusive = 0; -} - -static void -group_sched_out(struct perf_counter *group_counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) -{ - struct perf_counter *counter; - - if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) - return; - - counter_sched_out(group_counter, cpuctx, ctx); - - /* - * Schedule out siblings (if any): - */ - list_for_each_entry(counter, &group_counter->sibling_list, group_entry) - counter_sched_out(counter, cpuctx, ctx); - - if (group_counter->attr.exclusive) - cpuctx->exclusive = 0; -} - -/* - * Cross CPU call to remove a performance counter - * - * We disable the counter on the hardware level first. After that we - * remove it from the context list. - */ -static void __perf_counter_remove_from_context(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. - */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; - - spin_lock(&ctx->lock); - /* - * Protect the list operation against NMI by disabling the - * counters on a global level. - */ - perf_disable(); - - counter_sched_out(counter, cpuctx, ctx); - - list_del_counter(counter, ctx); - - if (!ctx->task) { - /* - * Allow more per task counters with respect to the - * reservation: - */ - cpuctx->max_pertask = - min(perf_max_counters - ctx->nr_counters, - perf_max_counters - perf_reserved_percpu); - } - - perf_enable(); - spin_unlock(&ctx->lock); -} - - -/* - * Remove the counter from a task's (or a CPU's) list of counters. - * - * Must be called with ctx->mutex held. - * - * CPU counters are removed with a smp call. For task counters we only - * call when the task is on a CPU. - * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to - * remains valid. This is OK when called from perf_release since - * that only calls us on the top-level context, which can't be a clone. - * When called from perf_counter_exit_task, it's OK because the - * context has been detached from its task. - */ -static void perf_counter_remove_from_context(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Per cpu counters are removed via an smp call and - * the removal is always sucessful. - */ - smp_call_function_single(counter->cpu, - __perf_counter_remove_from_context, - counter, 1); - return; - } - -retry: - task_oncpu_function_call(task, __perf_counter_remove_from_context, - counter); - - spin_lock_irq(&ctx->lock); - /* - * If the context is active we need to retry the smp call. - */ - if (ctx->nr_active && !list_empty(&counter->group_entry)) { - spin_unlock_irq(&ctx->lock); - goto retry; - } - - /* - * The lock prevents that this context is scheduled in so we - * can remove the counter safely, if the call above did not - * succeed. - */ - if (!list_empty(&counter->group_entry)) { - list_del_counter(counter, ctx); - } - spin_unlock_irq(&ctx->lock); -} - -static inline u64 perf_clock(void) -{ - return cpu_clock(smp_processor_id()); -} - -/* - * Update the record of the current time in a context. - */ -static void update_context_time(struct perf_counter_context *ctx) -{ - u64 now = perf_clock(); - - ctx->time += now - ctx->timestamp; - ctx->timestamp = now; -} - -/* - * Update the total_time_enabled and total_time_running fields for a counter. - */ -static void update_counter_times(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - u64 run_end; - - if (counter->state < PERF_COUNTER_STATE_INACTIVE || - counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE) - return; - - counter->total_time_enabled = ctx->time - counter->tstamp_enabled; - - if (counter->state == PERF_COUNTER_STATE_INACTIVE) - run_end = counter->tstamp_stopped; - else - run_end = ctx->time; - - counter->total_time_running = run_end - counter->tstamp_running; -} - -/* - * Update total_time_enabled and total_time_running for all counters in a group. - */ -static void update_group_times(struct perf_counter *leader) -{ - struct perf_counter *counter; - - update_counter_times(leader); - list_for_each_entry(counter, &leader->sibling_list, group_entry) - update_counter_times(counter); -} - -/* - * Cross CPU call to disable a performance counter - */ -static void __perf_counter_disable(void *info) -{ - struct perf_counter *counter = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = counter->ctx; - - /* - * If this is a per-task counter, need to check whether this - * counter's task is the current task on this cpu. - */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; - - spin_lock(&ctx->lock); - - /* - * If the counter is on, turn it off. - * If it is in error state, leave it in error state. - */ - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { - update_context_time(ctx); - update_group_times(counter); - if (counter == counter->group_leader) - group_sched_out(counter, cpuctx, ctx); - else - counter_sched_out(counter, cpuctx, ctx); - counter->state = PERF_COUNTER_STATE_OFF; - } - - spin_unlock(&ctx->lock); -} - -/* - * Disable a counter. - * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to - * remains valid. This condition is satisifed when called through - * perf_counter_for_each_child or perf_counter_for_each because they - * hold the top-level counter's child_mutex, so any descendant that - * goes to exit will block in sync_child_counter. - * When called from perf_pending_counter it's OK because counter->ctx - * is the current context on this CPU and preemption is disabled, - * hence we can't get into perf_counter_task_sched_out for this context. - */ -static void perf_counter_disable(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Disable the counter on the cpu that it's on - */ - smp_call_function_single(counter->cpu, __perf_counter_disable, - counter, 1); - return; - } - - retry: - task_oncpu_function_call(task, __perf_counter_disable, counter); - - spin_lock_irq(&ctx->lock); - /* - * If the counter is still active, we need to retry the cross-call. - */ - if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - spin_unlock_irq(&ctx->lock); - goto retry; - } - - /* - * Since we have the lock this context can't be scheduled - * in, so we can change the state safely. - */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_group_times(counter); - counter->state = PERF_COUNTER_STATE_OFF; - } - - spin_unlock_irq(&ctx->lock); -} - -static int -counter_sched_in(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, - int cpu) -{ - if (counter->state <= PERF_COUNTER_STATE_OFF) - return 0; - - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ - /* - * The new state must be visible before we turn it on in the hardware: - */ - smp_wmb(); - - if (counter->pmu->enable(counter)) { - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->oncpu = -1; - return -EAGAIN; - } - - counter->tstamp_running += ctx->time - counter->tstamp_stopped; - - if (!is_software_counter(counter)) - cpuctx->active_oncpu++; - ctx->nr_active++; - - if (counter->attr.exclusive) - cpuctx->exclusive = 1; - - return 0; -} - -static int -group_sched_in(struct perf_counter *group_counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, - int cpu) -{ - struct perf_counter *counter, *partial_group; - int ret; - - if (group_counter->state == PERF_COUNTER_STATE_OFF) - return 0; - - ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu); - if (ret) - return ret < 0 ? ret : 0; - - if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) - return -EAGAIN; - - /* - * Schedule in siblings as one group (if any): - */ - list_for_each_entry(counter, &group_counter->sibling_list, group_entry) { - if (counter_sched_in(counter, cpuctx, ctx, cpu)) { - partial_group = counter; - goto group_error; - } - } - - return 0; - -group_error: - /* - * Groups can be scheduled in as one unit only, so undo any - * partial group before returning: - */ - list_for_each_entry(counter, &group_counter->sibling_list, group_entry) { - if (counter == partial_group) - break; - counter_sched_out(counter, cpuctx, ctx); - } - counter_sched_out(group_counter, cpuctx, ctx); - - return -EAGAIN; -} - -/* - * Return 1 for a group consisting entirely of software counters, - * 0 if the group contains any hardware counters. - */ -static int is_software_only_group(struct perf_counter *leader) -{ - struct perf_counter *counter; - - if (!is_software_counter(leader)) - return 0; - - list_for_each_entry(counter, &leader->sibling_list, group_entry) - if (!is_software_counter(counter)) - return 0; - - return 1; -} - -/* - * Work out whether we can put this counter group on the CPU now. - */ -static int group_can_go_on(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - int can_add_hw) -{ - /* - * Groups consisting entirely of software counters can always go on. - */ - if (is_software_only_group(counter)) - return 1; - /* - * If an exclusive group is already on, no other hardware - * counters can go on. - */ - if (cpuctx->exclusive) - return 0; - /* - * If this group is exclusive and there are already - * counters on the CPU, it can't go on. - */ - if (counter->attr.exclusive && cpuctx->active_oncpu) - return 0; - /* - * Otherwise, try to add it if all previous groups were able - * to go on. - */ - return can_add_hw; -} - -static void add_counter_to_ctx(struct perf_counter *counter, - struct perf_counter_context *ctx) -{ - list_add_counter(counter, ctx); - counter->tstamp_enabled = ctx->time; - counter->tstamp_running = ctx->time; - counter->tstamp_stopped = ctx->time; -} - -/* - * Cross CPU call to install and enable a performance counter - * - * Must be called with ctx->mutex held - */ -static void __perf_install_in_context(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *leader = counter->group_leader; - int cpu = smp_processor_id(); - int err; - - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. - * Or possibly this is the right context but it isn't - * on this cpu because it had no counters. - */ - if (ctx->task && cpuctx->task_ctx != ctx) { - if (cpuctx->task_ctx || ctx->task != current) - return; - cpuctx->task_ctx = ctx; - } - - spin_lock(&ctx->lock); - ctx->is_active = 1; - update_context_time(ctx); - - /* - * Protect the list operation against NMI by disabling the - * counters on a global level. NOP for non NMI based counters. - */ - perf_disable(); - - add_counter_to_ctx(counter, ctx); - - /* - * Don't put the counter on if it is disabled or if - * it is in a group and the group isn't on. - */ - if (counter->state != PERF_COUNTER_STATE_INACTIVE || - (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)) - goto unlock; - - /* - * An exclusive counter can't go on if there are already active - * hardware counters, and no hardware counter can go on if there - * is already an exclusive counter on. - */ - if (!group_can_go_on(counter, cpuctx, 1)) - err = -EEXIST; - else - err = counter_sched_in(counter, cpuctx, ctx, cpu); - - if (err) { - /* - * This counter couldn't go on. If it is in a group - * then we have to pull the whole group off. - * If the counter group is pinned then put it in error state. - */ - if (leader != counter) - group_sched_out(leader, cpuctx, ctx); - if (leader->attr.pinned) { - update_group_times(leader); - leader->state = PERF_COUNTER_STATE_ERROR; - } - } - - if (!err && !ctx->task && cpuctx->max_pertask) - cpuctx->max_pertask--; - - unlock: - perf_enable(); - - spin_unlock(&ctx->lock); -} - -/* - * Attach a performance counter to a context - * - * First we add the counter to the list with the hardware enable bit - * in counter->hw_config cleared. - * - * If the counter is attached to a task which is on a CPU we use a smp - * call to enable it in the task context. The task might have been - * scheduled away, but we check this in the smp call again. - * - * Must be called with ctx->mutex held. - */ -static void -perf_install_in_context(struct perf_counter_context *ctx, - struct perf_counter *counter, - int cpu) -{ - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Per cpu counters are installed via an smp call and - * the install is always sucessful. - */ - smp_call_function_single(cpu, __perf_install_in_context, - counter, 1); - return; - } - -retry: - task_oncpu_function_call(task, __perf_install_in_context, - counter); - - spin_lock_irq(&ctx->lock); - /* - * we need to retry the smp call. - */ - if (ctx->is_active && list_empty(&counter->group_entry)) { - spin_unlock_irq(&ctx->lock); - goto retry; - } - - /* - * The lock prevents that this context is scheduled in so we - * can add the counter safely, if it the call above did not - * succeed. - */ - if (list_empty(&counter->group_entry)) - add_counter_to_ctx(counter, ctx); - spin_unlock_irq(&ctx->lock); -} - -/* - * Put a counter into inactive state and update time fields. - * Enabling the leader of a group effectively enables all - * the group members that aren't explicitly disabled, so we - * have to update their ->tstamp_enabled also. - * Note: this works for group members as well as group leaders - * since the non-leader members' sibling_lists will be empty. - */ -static void __perf_counter_mark_enabled(struct perf_counter *counter, - struct perf_counter_context *ctx) -{ - struct perf_counter *sub; - - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time - counter->total_time_enabled; - list_for_each_entry(sub, &counter->sibling_list, group_entry) - if (sub->state >= PERF_COUNTER_STATE_INACTIVE) - sub->tstamp_enabled = - ctx->time - sub->total_time_enabled; -} - -/* - * Cross CPU call to enable a performance counter - */ -static void __perf_counter_enable(void *info) -{ - struct perf_counter *counter = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *leader = counter->group_leader; - int err; - - /* - * If this is a per-task counter, need to check whether this - * counter's task is the current task on this cpu. - */ - if (ctx->task && cpuctx->task_ctx != ctx) { - if (cpuctx->task_ctx || ctx->task != current) - return; - cpuctx->task_ctx = ctx; - } - - spin_lock(&ctx->lock); - ctx->is_active = 1; - update_context_time(ctx); - - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) - goto unlock; - __perf_counter_mark_enabled(counter, ctx); - - /* - * If the counter is in a group and isn't the group leader, - * then don't put it on unless the group is on. - */ - if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE) - goto unlock; - - if (!group_can_go_on(counter, cpuctx, 1)) { - err = -EEXIST; - } else { - perf_disable(); - if (counter == leader) - err = group_sched_in(counter, cpuctx, ctx, - smp_processor_id()); - else - err = counter_sched_in(counter, cpuctx, ctx, - smp_processor_id()); - perf_enable(); - } - - if (err) { - /* - * If this counter can't go on and it's part of a - * group, then the whole group has to come off. - */ - if (leader != counter) - group_sched_out(leader, cpuctx, ctx); - if (leader->attr.pinned) { - update_group_times(leader); - leader->state = PERF_COUNTER_STATE_ERROR; - } - } - - unlock: - spin_unlock(&ctx->lock); -} - -/* - * Enable a counter. - * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to - * remains valid. This condition is satisfied when called through - * perf_counter_for_each_child or perf_counter_for_each as described - * for perf_counter_disable. - */ -static void perf_counter_enable(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Enable the counter on the cpu that it's on - */ - smp_call_function_single(counter->cpu, __perf_counter_enable, - counter, 1); - return; - } - - spin_lock_irq(&ctx->lock); - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) - goto out; - - /* - * If the counter is in error state, clear that first. - * That way, if we see the counter in error state below, we - * know that it has gone back into error state, as distinct - * from the task having been scheduled away before the - * cross-call arrived. - */ - if (counter->state == PERF_COUNTER_STATE_ERROR) - counter->state = PERF_COUNTER_STATE_OFF; - - retry: - spin_unlock_irq(&ctx->lock); - task_oncpu_function_call(task, __perf_counter_enable, counter); - - spin_lock_irq(&ctx->lock); - - /* - * If the context is active and the counter is still off, - * we need to retry the cross-call. - */ - if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF) - goto retry; - - /* - * Since we have the lock this context can't be scheduled - * in, so we can change the state safely. - */ - if (counter->state == PERF_COUNTER_STATE_OFF) - __perf_counter_mark_enabled(counter, ctx); - - out: - spin_unlock_irq(&ctx->lock); -} - -static int perf_counter_refresh(struct perf_counter *counter, int refresh) -{ - /* - * not supported on inherited counters - */ - if (counter->attr.inherit) - return -EINVAL; - - atomic_add(refresh, &counter->event_limit); - perf_counter_enable(counter); - - return 0; -} - -void __perf_counter_sched_out(struct perf_counter_context *ctx, - struct perf_cpu_context *cpuctx) -{ - struct perf_counter *counter; - - spin_lock(&ctx->lock); - ctx->is_active = 0; - if (likely(!ctx->nr_counters)) - goto out; - update_context_time(ctx); - - perf_disable(); - if (ctx->nr_active) { - list_for_each_entry(counter, &ctx->group_list, group_entry) { - if (counter != counter->group_leader) - counter_sched_out(counter, cpuctx, ctx); - else - group_sched_out(counter, cpuctx, ctx); - } - } - perf_enable(); - out: - spin_unlock(&ctx->lock); -} - -/* - * Test whether two contexts are equivalent, i.e. whether they - * have both been cloned from the same version of the same context - * and they both have the same number of enabled counters. - * If the number of enabled counters is the same, then the set - * of enabled counters should be the same, because these are both - * inherited contexts, therefore we can't access individual counters - * in them directly with an fd; we can only enable/disable all - * counters via prctl, or enable/disable all counters in a family - * via ioctl, which will have the same effect on both contexts. - */ -static int context_equiv(struct perf_counter_context *ctx1, - struct perf_counter_context *ctx2) -{ - return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx - && ctx1->parent_gen == ctx2->parent_gen - && !ctx1->pin_count && !ctx2->pin_count; -} - -static void __perf_counter_read(void *counter); - -static void __perf_counter_sync_stat(struct perf_counter *counter, - struct perf_counter *next_counter) -{ - u64 value; - - if (!counter->attr.inherit_stat) - return; - - /* - * Update the counter value, we cannot use perf_counter_read() - * because we're in the middle of a context switch and have IRQs - * disabled, which upsets smp_call_function_single(), however - * we know the counter must be on the current CPU, therefore we - * don't need to use it. - */ - switch (counter->state) { - case PERF_COUNTER_STATE_ACTIVE: - __perf_counter_read(counter); - break; - - case PERF_COUNTER_STATE_INACTIVE: - update_counter_times(counter); - break; - - default: - break; - } - - /* - * In order to keep per-task stats reliable we need to flip the counter - * values when we flip the contexts. - */ - value = atomic64_read(&next_counter->count); - value = atomic64_xchg(&counter->count, value); - atomic64_set(&next_counter->count, value); - - swap(counter->total_time_enabled, next_counter->total_time_enabled); - swap(counter->total_time_running, next_counter->total_time_running); - - /* - * Since we swizzled the values, update the user visible data too. - */ - perf_counter_update_userpage(counter); - perf_counter_update_userpage(next_counter); -} - -#define list_next_entry(pos, member) \ - list_entry(pos->member.next, typeof(*pos), member) - -static void perf_counter_sync_stat(struct perf_counter_context *ctx, - struct perf_counter_context *next_ctx) -{ - struct perf_counter *counter, *next_counter; - - if (!ctx->nr_stat) - return; - - counter = list_first_entry(&ctx->event_list, - struct perf_counter, event_entry); - - next_counter = list_first_entry(&next_ctx->event_list, - struct perf_counter, event_entry); - - while (&counter->event_entry != &ctx->event_list && - &next_counter->event_entry != &next_ctx->event_list) { - - __perf_counter_sync_stat(counter, next_counter); - - counter = list_next_entry(counter, event_entry); - next_counter = list_next_entry(next_counter, event_entry); - } -} - -/* - * Called from scheduler to remove the counters of the current task, - * with interrupts disabled. - * - * We stop each counter and update the counter value in counter->count. - * - * This does not protect us against NMI, but disable() - * sets the disabled bit in the control field of counter _before_ - * accessing the counter control register. If a NMI hits, then it will - * not restart the counter. - */ -void perf_counter_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) -{ - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = task->perf_counter_ctxp; - struct perf_counter_context *next_ctx; - struct perf_counter_context *parent; - struct pt_regs *regs; - int do_switch = 1; - - regs = task_pt_regs(task); - perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0); - - if (likely(!ctx || !cpuctx->task_ctx)) - return; - - update_context_time(ctx); - - rcu_read_lock(); - parent = rcu_dereference(ctx->parent_ctx); - next_ctx = next->perf_counter_ctxp; - if (parent && next_ctx && - rcu_dereference(next_ctx->parent_ctx) == parent) { - /* - * Looks like the two contexts are clones, so we might be - * able to optimize the context switch. We lock both - * contexts and check that they are clones under the - * lock (including re-checking that neither has been - * uncloned in the meantime). It doesn't matter which - * order we take the locks because no other cpu could - * be trying to lock both of these tasks. - */ - spin_lock(&ctx->lock); - spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); - if (context_equiv(ctx, next_ctx)) { - /* - * XXX do we need a memory barrier of sorts - * wrt to rcu_dereference() of perf_counter_ctxp - */ - task->perf_counter_ctxp = next_ctx; - next->perf_counter_ctxp = ctx; - ctx->task = next; - next_ctx->task = task; - do_switch = 0; - - perf_counter_sync_stat(ctx, next_ctx); - } - spin_unlock(&next_ctx->lock); - spin_unlock(&ctx->lock); - } - rcu_read_unlock(); - - if (do_switch) { - __perf_counter_sched_out(ctx, cpuctx); - cpuctx->task_ctx = NULL; - } -} - -/* - * Called with IRQs disabled - */ -static void __perf_counter_task_sched_out(struct perf_counter_context *ctx) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - - if (!cpuctx->task_ctx) - return; - - if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) - return; - - __perf_counter_sched_out(ctx, cpuctx); - cpuctx->task_ctx = NULL; -} - -/* - * Called with IRQs disabled - */ -static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx) -{ - __perf_counter_sched_out(&cpuctx->ctx, cpuctx); -} - -static void -__perf_counter_sched_in(struct perf_counter_context *ctx, - struct perf_cpu_context *cpuctx, int cpu) -{ - struct perf_counter *counter; - int can_add_hw = 1; - - spin_lock(&ctx->lock); - ctx->is_active = 1; - if (likely(!ctx->nr_counters)) - goto out; - - ctx->timestamp = perf_clock(); - - perf_disable(); - - /* - * First go through the list and put on any pinned groups - * in order to give them the best chance of going on. - */ - list_for_each_entry(counter, &ctx->group_list, group_entry) { - if (counter->state <= PERF_COUNTER_STATE_OFF || - !counter->attr.pinned) - continue; - if (counter->cpu != -1 && counter->cpu != cpu) - continue; - - if (counter != counter->group_leader) - counter_sched_in(counter, cpuctx, ctx, cpu); - else { - if (group_can_go_on(counter, cpuctx, 1)) - group_sched_in(counter, cpuctx, ctx, cpu); - } - - /* - * If this pinned group hasn't been scheduled, - * put it in error state. - */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_group_times(counter); - counter->state = PERF_COUNTER_STATE_ERROR; - } - } - - list_for_each_entry(counter, &ctx->group_list, group_entry) { - /* - * Ignore counters in OFF or ERROR state, and - * ignore pinned counters since we did them already. - */ - if (counter->state <= PERF_COUNTER_STATE_OFF || - counter->attr.pinned) - continue; - - /* - * Listen to the 'cpu' scheduling filter constraint - * of counters: - */ - if (counter->cpu != -1 && counter->cpu != cpu) - continue; - - if (counter != counter->group_leader) { - if (counter_sched_in(counter, cpuctx, ctx, cpu)) - can_add_hw = 0; - } else { - if (group_can_go_on(counter, cpuctx, can_add_hw)) { - if (group_sched_in(counter, cpuctx, ctx, cpu)) - can_add_hw = 0; - } - } - } - perf_enable(); - out: - spin_unlock(&ctx->lock); -} - -/* - * Called from scheduler to add the counters of the current task - * with interrupts disabled. - * - * We restore the counter value and then enable it. - * - * This does not protect us against NMI, but enable() - * sets the enabled bit in the control field of counter _before_ - * accessing the counter control register. If a NMI hits, then it will - * keep the counter running. - */ -void perf_counter_task_sched_in(struct task_struct *task, int cpu) -{ - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = task->perf_counter_ctxp; - - if (likely(!ctx)) - return; - if (cpuctx->task_ctx == ctx) - return; - __perf_counter_sched_in(ctx, cpuctx, cpu); - cpuctx->task_ctx = ctx; -} - -static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) -{ - struct perf_counter_context *ctx = &cpuctx->ctx; - - __perf_counter_sched_in(ctx, cpuctx, cpu); -} - -#define MAX_INTERRUPTS (~0ULL) - -static void perf_log_throttle(struct perf_counter *counter, int enable); - -static void perf_adjust_period(struct perf_counter *counter, u64 events) -{ - struct hw_perf_counter *hwc = &counter->hw; - u64 period, sample_period; - s64 delta; - - events *= hwc->sample_period; - period = div64_u64(events, counter->attr.sample_freq); - - delta = (s64)(period - hwc->sample_period); - delta = (delta + 7) / 8; /* low pass filter */ - - sample_period = hwc->sample_period + delta; - - if (!sample_period) - sample_period = 1; - - hwc->sample_period = sample_period; -} - -static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) -{ - struct perf_counter *counter; - struct hw_perf_counter *hwc; - u64 interrupts, freq; - - spin_lock(&ctx->lock); - list_for_each_entry(counter, &ctx->group_list, group_entry) { - if (counter->state != PERF_COUNTER_STATE_ACTIVE) - continue; - - hwc = &counter->hw; - - interrupts = hwc->interrupts; - hwc->interrupts = 0; - - /* - * unthrottle counters on the tick - */ - if (interrupts == MAX_INTERRUPTS) { - perf_log_throttle(counter, 1); - counter->pmu->unthrottle(counter); - interrupts = 2*sysctl_perf_counter_sample_rate/HZ; - } - - if (!counter->attr.freq || !counter->attr.sample_freq) - continue; - - /* - * if the specified freq < HZ then we need to skip ticks - */ - if (counter->attr.sample_freq < HZ) { - freq = counter->attr.sample_freq; - - hwc->freq_count += freq; - hwc->freq_interrupts += interrupts; - - if (hwc->freq_count < HZ) - continue; - - interrupts = hwc->freq_interrupts; - hwc->freq_interrupts = 0; - hwc->freq_count -= HZ; - } else - freq = HZ; - - perf_adjust_period(counter, freq * interrupts); - - /* - * In order to avoid being stalled by an (accidental) huge - * sample period, force reset the sample period if we didn't - * get any events in this freq period. - */ - if (!interrupts) { - perf_disable(); - counter->pmu->disable(counter); - atomic64_set(&hwc->period_left, 0); - counter->pmu->enable(counter); - perf_enable(); - } - } - spin_unlock(&ctx->lock); -} - -/* - * Round-robin a context's counters: - */ -static void rotate_ctx(struct perf_counter_context *ctx) -{ - struct perf_counter *counter; - - if (!ctx->nr_counters) - return; - - spin_lock(&ctx->lock); - /* - * Rotate the first entry last (works just fine for group counters too): - */ - perf_disable(); - list_for_each_entry(counter, &ctx->group_list, group_entry) { - list_move_tail(&counter->group_entry, &ctx->group_list); - break; - } - perf_enable(); - - spin_unlock(&ctx->lock); -} - -void perf_counter_task_tick(struct task_struct *curr, int cpu) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; - - if (!atomic_read(&nr_counters)) - return; - - cpuctx = &per_cpu(perf_cpu_context, cpu); - ctx = curr->perf_counter_ctxp; - - perf_ctx_adjust_freq(&cpuctx->ctx); - if (ctx) - perf_ctx_adjust_freq(ctx); - - perf_counter_cpu_sched_out(cpuctx); - if (ctx) - __perf_counter_task_sched_out(ctx); - - rotate_ctx(&cpuctx->ctx); - if (ctx) - rotate_ctx(ctx); - - perf_counter_cpu_sched_in(cpuctx, cpu); - if (ctx) - perf_counter_task_sched_in(curr, cpu); -} - -/* - * Enable all of a task's counters that have been marked enable-on-exec. - * This expects task == current. - */ -static void perf_counter_enable_on_exec(struct task_struct *task) -{ - struct perf_counter_context *ctx; - struct perf_counter *counter; - unsigned long flags; - int enabled = 0; - - local_irq_save(flags); - ctx = task->perf_counter_ctxp; - if (!ctx || !ctx->nr_counters) - goto out; - - __perf_counter_task_sched_out(ctx); - - spin_lock(&ctx->lock); - - list_for_each_entry(counter, &ctx->group_list, group_entry) { - if (!counter->attr.enable_on_exec) - continue; - counter->attr.enable_on_exec = 0; - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) - continue; - __perf_counter_mark_enabled(counter, ctx); - enabled = 1; - } - - /* - * Unclone this context if we enabled any counter. - */ - if (enabled) - unclone_ctx(ctx); - - spin_unlock(&ctx->lock); - - perf_counter_task_sched_in(task, smp_processor_id()); - out: - local_irq_restore(flags); -} - -/* - * Cross CPU call to read the hardware counter - */ -static void __perf_counter_read(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - unsigned long flags; - - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. In that case - * counter->count would have been updated to a recent sample - * when the counter was scheduled out. - */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; - - local_irq_save(flags); - if (ctx->is_active) - update_context_time(ctx); - counter->pmu->read(counter); - update_counter_times(counter); - local_irq_restore(flags); -} - -static u64 perf_counter_read(struct perf_counter *counter) -{ - /* - * If counter is enabled and currently active on a CPU, update the - * value in the counter structure: - */ - if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - smp_call_function_single(counter->oncpu, - __perf_counter_read, counter, 1); - } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_counter_times(counter); - } - - return atomic64_read(&counter->count); -} - -/* - * Initialize the perf_counter context in a task_struct: - */ -static void -__perf_counter_init_context(struct perf_counter_context *ctx, - struct task_struct *task) -{ - memset(ctx, 0, sizeof(*ctx)); - spin_lock_init(&ctx->lock); - mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->group_list); - INIT_LIST_HEAD(&ctx->event_list); - atomic_set(&ctx->refcount, 1); - ctx->task = task; -} - -static struct perf_counter_context *find_get_context(pid_t pid, int cpu) -{ - struct perf_counter_context *ctx; - struct perf_cpu_context *cpuctx; - struct task_struct *task; - unsigned long flags; - int err; - - /* - * If cpu is not a wildcard then this is a percpu counter: - */ - if (cpu != -1) { - /* Must be root to operate on a CPU counter: */ - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EACCES); - - if (cpu < 0 || cpu > num_possible_cpus()) - return ERR_PTR(-EINVAL); - - /* - * We could be clever and allow to attach a counter to an - * offline CPU and activate it when the CPU comes up, but - * that's for later. - */ - if (!cpu_isset(cpu, cpu_online_map)) - return ERR_PTR(-ENODEV); - - cpuctx = &per_cpu(perf_cpu_context, cpu); - ctx = &cpuctx->ctx; - get_ctx(ctx); - - return ctx; - } - - rcu_read_lock(); - if (!pid) - task = current; - else - task = find_task_by_vpid(pid); - if (task) - get_task_struct(task); - rcu_read_unlock(); - - if (!task) - return ERR_PTR(-ESRCH); - - /* - * Can't attach counters to a dying task. - */ - err = -ESRCH; - if (task->flags & PF_EXITING) - goto errout; - - /* Reuse ptrace permission checks for now. */ - err = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto errout; - - retry: - ctx = perf_lock_task_context(task, &flags); - if (ctx) { - unclone_ctx(ctx); - spin_unlock_irqrestore(&ctx->lock, flags); - } - - if (!ctx) { - ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); - err = -ENOMEM; - if (!ctx) - goto errout; - __perf_counter_init_context(ctx, task); - get_ctx(ctx); - if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) { - /* - * We raced with some other task; use - * the context they set. - */ - kfree(ctx); - goto retry; - } - get_task_struct(task); - } - - put_task_struct(task); - return ctx; - - errout: - put_task_struct(task); - return ERR_PTR(err); -} - -static void free_counter_rcu(struct rcu_head *head) -{ - struct perf_counter *counter; - - counter = container_of(head, struct perf_counter, rcu_head); - if (counter->ns) - put_pid_ns(counter->ns); - kfree(counter); -} - -static void perf_pending_sync(struct perf_counter *counter); - -static void free_counter(struct perf_counter *counter) -{ - perf_pending_sync(counter); - - if (!counter->parent) { - atomic_dec(&nr_counters); - if (counter->attr.mmap) - atomic_dec(&nr_mmap_counters); - if (counter->attr.comm) - atomic_dec(&nr_comm_counters); - if (counter->attr.task) - atomic_dec(&nr_task_counters); - } - - if (counter->output) { - fput(counter->output->filp); - counter->output = NULL; - } - - if (counter->destroy) - counter->destroy(counter); - - put_ctx(counter->ctx); - call_rcu(&counter->rcu_head, free_counter_rcu); -} - -/* - * Called when the last reference to the file is gone. - */ -static int perf_release(struct inode *inode, struct file *file) -{ - struct perf_counter *counter = file->private_data; - struct perf_counter_context *ctx = counter->ctx; - - file->private_data = NULL; - - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - perf_counter_remove_from_context(counter); - mutex_unlock(&ctx->mutex); - - mutex_lock(&counter->owner->perf_counter_mutex); - list_del_init(&counter->owner_entry); - mutex_unlock(&counter->owner->perf_counter_mutex); - put_task_struct(counter->owner); - - free_counter(counter); - - return 0; -} - -static int perf_counter_read_size(struct perf_counter *counter) -{ - int entry = sizeof(u64); /* value */ - int size = 0; - int nr = 1; - - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - size += sizeof(u64); - - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - size += sizeof(u64); - - if (counter->attr.read_format & PERF_FORMAT_ID) - entry += sizeof(u64); - - if (counter->attr.read_format & PERF_FORMAT_GROUP) { - nr += counter->group_leader->nr_siblings; - size += sizeof(u64); - } - - size += entry * nr; - - return size; -} - -static u64 perf_counter_read_value(struct perf_counter *counter) -{ - struct perf_counter *child; - u64 total = 0; - - total += perf_counter_read(counter); - list_for_each_entry(child, &counter->child_list, child_list) - total += perf_counter_read(child); - - return total; -} - -static int perf_counter_read_entry(struct perf_counter *counter, - u64 read_format, char __user *buf) -{ - int n = 0, count = 0; - u64 values[2]; - - values[n++] = perf_counter_read_value(counter); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(counter); - - count = n * sizeof(u64); - - if (copy_to_user(buf, values, count)) - return -EFAULT; - - return count; -} - -static int perf_counter_read_group(struct perf_counter *counter, - u64 read_format, char __user *buf) -{ - struct perf_counter *leader = counter->group_leader, *sub; - int n = 0, size = 0, err = -EFAULT; - u64 values[3]; - - values[n++] = 1 + leader->nr_siblings; - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = leader->total_time_enabled + - atomic64_read(&leader->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = leader->total_time_running + - atomic64_read(&leader->child_total_time_running); - } - - size = n * sizeof(u64); - - if (copy_to_user(buf, values, size)) - return -EFAULT; - - err = perf_counter_read_entry(leader, read_format, buf + size); - if (err < 0) - return err; - - size += err; - - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - err = perf_counter_read_entry(sub, read_format, - buf + size); - if (err < 0) - return err; - - size += err; - } - - return size; -} - -static int perf_counter_read_one(struct perf_counter *counter, - u64 read_format, char __user *buf) -{ - u64 values[4]; - int n = 0; - - values[n++] = perf_counter_read_value(counter); - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); - } - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(counter); - - if (copy_to_user(buf, values, n * sizeof(u64))) - return -EFAULT; - - return n * sizeof(u64); -} - -/* - * Read the performance counter - simple non blocking version for now - */ -static ssize_t -perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) -{ - u64 read_format = counter->attr.read_format; - int ret; - - /* - * Return end-of-file for a read on a counter that is in - * error state (i.e. because it was pinned but it couldn't be - * scheduled on to the CPU at some point). - */ - if (counter->state == PERF_COUNTER_STATE_ERROR) - return 0; - - if (count < perf_counter_read_size(counter)) - return -ENOSPC; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->child_mutex); - if (read_format & PERF_FORMAT_GROUP) - ret = perf_counter_read_group(counter, read_format, buf); - else - ret = perf_counter_read_one(counter, read_format, buf); - mutex_unlock(&counter->child_mutex); - - return ret; -} - -static ssize_t -perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) -{ - struct perf_counter *counter = file->private_data; - - return perf_read_hw(counter, buf, count); -} - -static unsigned int perf_poll(struct file *file, poll_table *wait) -{ - struct perf_counter *counter = file->private_data; - struct perf_mmap_data *data; - unsigned int events = POLL_HUP; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (data) - events = atomic_xchg(&data->poll, 0); - rcu_read_unlock(); - - poll_wait(file, &counter->waitq, wait); - - return events; -} - -static void perf_counter_reset(struct perf_counter *counter) -{ - (void)perf_counter_read(counter); - atomic64_set(&counter->count, 0); - perf_counter_update_userpage(counter); -} - -/* - * Holding the top-level counter's child_mutex means that any - * descendant process that has inherited this counter will block - * in sync_child_counter if it goes to exit, thus satisfying the - * task existence requirements of perf_counter_enable/disable. - */ -static void perf_counter_for_each_child(struct perf_counter *counter, - void (*func)(struct perf_counter *)) -{ - struct perf_counter *child; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->child_mutex); - func(counter); - list_for_each_entry(child, &counter->child_list, child_list) - func(child); - mutex_unlock(&counter->child_mutex); -} - -static void perf_counter_for_each(struct perf_counter *counter, - void (*func)(struct perf_counter *)) -{ - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *sibling; - - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - counter = counter->group_leader; - - perf_counter_for_each_child(counter, func); - func(counter); - list_for_each_entry(sibling, &counter->sibling_list, group_entry) - perf_counter_for_each_child(counter, func); - mutex_unlock(&ctx->mutex); -} - -static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) -{ - struct perf_counter_context *ctx = counter->ctx; - unsigned long size; - int ret = 0; - u64 value; - - if (!counter->attr.sample_period) - return -EINVAL; - - size = copy_from_user(&value, arg, sizeof(value)); - if (size != sizeof(value)) - return -EFAULT; - - if (!value) - return -EINVAL; - - spin_lock_irq(&ctx->lock); - if (counter->attr.freq) { - if (value > sysctl_perf_counter_sample_rate) { - ret = -EINVAL; - goto unlock; - } - - counter->attr.sample_freq = value; - } else { - counter->attr.sample_period = value; - counter->hw.sample_period = value; - } -unlock: - spin_unlock_irq(&ctx->lock); - - return ret; -} - -int perf_counter_set_output(struct perf_counter *counter, int output_fd); - -static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct perf_counter *counter = file->private_data; - void (*func)(struct perf_counter *); - u32 flags = arg; - - switch (cmd) { - case PERF_COUNTER_IOC_ENABLE: - func = perf_counter_enable; - break; - case PERF_COUNTER_IOC_DISABLE: - func = perf_counter_disable; - break; - case PERF_COUNTER_IOC_RESET: - func = perf_counter_reset; - break; - - case PERF_COUNTER_IOC_REFRESH: - return perf_counter_refresh(counter, arg); - - case PERF_COUNTER_IOC_PERIOD: - return perf_counter_period(counter, (u64 __user *)arg); - - case PERF_COUNTER_IOC_SET_OUTPUT: - return perf_counter_set_output(counter, arg); - - default: - return -ENOTTY; - } - - if (flags & PERF_IOC_FLAG_GROUP) - perf_counter_for_each(counter, func); - else - perf_counter_for_each_child(counter, func); - - return 0; -} - -int perf_counter_task_enable(void) -{ - struct perf_counter *counter; - - mutex_lock(¤t->perf_counter_mutex); - list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) - perf_counter_for_each_child(counter, perf_counter_enable); - mutex_unlock(¤t->perf_counter_mutex); - - return 0; -} - -int perf_counter_task_disable(void) -{ - struct perf_counter *counter; - - mutex_lock(¤t->perf_counter_mutex); - list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) - perf_counter_for_each_child(counter, perf_counter_disable); - mutex_unlock(¤t->perf_counter_mutex); - - return 0; -} - -#ifndef PERF_COUNTER_INDEX_OFFSET -# define PERF_COUNTER_INDEX_OFFSET 0 -#endif - -static int perf_counter_index(struct perf_counter *counter) -{ - if (counter->state != PERF_COUNTER_STATE_ACTIVE) - return 0; - - return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET; -} - -/* - * Callers need to ensure there can be no nesting of this function, otherwise - * the seqlock logic goes bad. We can not serialize this because the arch - * code calls this from NMI context. - */ -void perf_counter_update_userpage(struct perf_counter *counter) -{ - struct perf_counter_mmap_page *userpg; - struct perf_mmap_data *data; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (!data) - goto unlock; - - userpg = data->user_page; - - /* - * Disable preemption so as to not let the corresponding user-space - * spin too long if we get preempted. - */ - preempt_disable(); - ++userpg->lock; - barrier(); - userpg->index = perf_counter_index(counter); - userpg->offset = atomic64_read(&counter->count); - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - userpg->offset -= atomic64_read(&counter->hw.prev_count); - - userpg->time_enabled = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); - - userpg->time_running = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); - - barrier(); - ++userpg->lock; - preempt_enable(); -unlock: - rcu_read_unlock(); -} - -static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct perf_counter *counter = vma->vm_file->private_data; - struct perf_mmap_data *data; - int ret = VM_FAULT_SIGBUS; - - if (vmf->flags & FAULT_FLAG_MKWRITE) { - if (vmf->pgoff == 0) - ret = 0; - return ret; - } - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (!data) - goto unlock; - - if (vmf->pgoff == 0) { - vmf->page = virt_to_page(data->user_page); - } else { - int nr = vmf->pgoff - 1; - - if ((unsigned)nr > data->nr_pages) - goto unlock; - - if (vmf->flags & FAULT_FLAG_WRITE) - goto unlock; - - vmf->page = virt_to_page(data->data_pages[nr]); - } - - get_page(vmf->page); - vmf->page->mapping = vma->vm_file->f_mapping; - vmf->page->index = vmf->pgoff; - - ret = 0; -unlock: - rcu_read_unlock(); - - return ret; -} - -static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) -{ - struct perf_mmap_data *data; - unsigned long size; - int i; - - WARN_ON(atomic_read(&counter->mmap_count)); - - size = sizeof(struct perf_mmap_data); - size += nr_pages * sizeof(void *); - - data = kzalloc(size, GFP_KERNEL); - if (!data) - goto fail; - - data->user_page = (void *)get_zeroed_page(GFP_KERNEL); - if (!data->user_page) - goto fail_user_page; - - for (i = 0; i < nr_pages; i++) { - data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); - if (!data->data_pages[i]) - goto fail_data_pages; - } - - data->nr_pages = nr_pages; - atomic_set(&data->lock, -1); - - if (counter->attr.watermark) { - data->watermark = min_t(long, PAGE_SIZE * nr_pages, - counter->attr.wakeup_watermark); - } - if (!data->watermark) - data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4); - - rcu_assign_pointer(counter->data, data); - - return 0; - -fail_data_pages: - for (i--; i >= 0; i--) - free_page((unsigned long)data->data_pages[i]); - - free_page((unsigned long)data->user_page); - -fail_user_page: - kfree(data); - -fail: - return -ENOMEM; -} - -static void perf_mmap_free_page(unsigned long addr) -{ - struct page *page = virt_to_page((void *)addr); - - page->mapping = NULL; - __free_page(page); -} - -static void __perf_mmap_data_free(struct rcu_head *rcu_head) -{ - struct perf_mmap_data *data; - int i; - - data = container_of(rcu_head, struct perf_mmap_data, rcu_head); - - perf_mmap_free_page((unsigned long)data->user_page); - for (i = 0; i < data->nr_pages; i++) - perf_mmap_free_page((unsigned long)data->data_pages[i]); - - kfree(data); -} - -static void perf_mmap_data_free(struct perf_counter *counter) -{ - struct perf_mmap_data *data = counter->data; - - WARN_ON(atomic_read(&counter->mmap_count)); - - rcu_assign_pointer(counter->data, NULL); - call_rcu(&data->rcu_head, __perf_mmap_data_free); -} - -static void perf_mmap_open(struct vm_area_struct *vma) -{ - struct perf_counter *counter = vma->vm_file->private_data; - - atomic_inc(&counter->mmap_count); -} - -static void perf_mmap_close(struct vm_area_struct *vma) -{ - struct perf_counter *counter = vma->vm_file->private_data; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) { - struct user_struct *user = current_user(); - - atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm); - vma->vm_mm->locked_vm -= counter->data->nr_locked; - perf_mmap_data_free(counter); - mutex_unlock(&counter->mmap_mutex); - } -} - -static struct vm_operations_struct perf_mmap_vmops = { - .open = perf_mmap_open, - .close = perf_mmap_close, - .fault = perf_mmap_fault, - .page_mkwrite = perf_mmap_fault, -}; - -static int perf_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct perf_counter *counter = file->private_data; - unsigned long user_locked, user_lock_limit; - struct user_struct *user = current_user(); - unsigned long locked, lock_limit; - unsigned long vma_size; - unsigned long nr_pages; - long user_extra, extra; - int ret = 0; - - if (!(vma->vm_flags & VM_SHARED)) - return -EINVAL; - - vma_size = vma->vm_end - vma->vm_start; - nr_pages = (vma_size / PAGE_SIZE) - 1; - - /* - * If we have data pages ensure they're a power-of-two number, so we - * can do bitmasks instead of modulo. - */ - if (nr_pages != 0 && !is_power_of_2(nr_pages)) - return -EINVAL; - - if (vma_size != PAGE_SIZE * (1 + nr_pages)) - return -EINVAL; - - if (vma->vm_pgoff != 0) - return -EINVAL; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->mmap_mutex); - if (counter->output) { - ret = -EINVAL; - goto unlock; - } - - if (atomic_inc_not_zero(&counter->mmap_count)) { - if (nr_pages != counter->data->nr_pages) - ret = -EINVAL; - goto unlock; - } - - user_extra = nr_pages + 1; - user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10); - - /* - * Increase the limit linearly with more CPUs: - */ - user_lock_limit *= num_online_cpus(); - - user_locked = atomic_long_read(&user->locked_vm) + user_extra; - - extra = 0; - if (user_locked > user_lock_limit) - extra = user_locked - user_lock_limit; - - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; - lock_limit >>= PAGE_SHIFT; - locked = vma->vm_mm->locked_vm + extra; - - if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && - !capable(CAP_IPC_LOCK)) { - ret = -EPERM; - goto unlock; - } - - WARN_ON(counter->data); - ret = perf_mmap_data_alloc(counter, nr_pages); - if (ret) - goto unlock; - - atomic_set(&counter->mmap_count, 1); - atomic_long_add(user_extra, &user->locked_vm); - vma->vm_mm->locked_vm += extra; - counter->data->nr_locked = extra; - if (vma->vm_flags & VM_WRITE) - counter->data->writable = 1; - -unlock: - mutex_unlock(&counter->mmap_mutex); - - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &perf_mmap_vmops; - - return ret; -} - -static int perf_fasync(int fd, struct file *filp, int on) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct perf_counter *counter = filp->private_data; - int retval; - - mutex_lock(&inode->i_mutex); - retval = fasync_helper(fd, filp, on, &counter->fasync); - mutex_unlock(&inode->i_mutex); - - if (retval < 0) - return retval; - - return 0; -} - -static const struct file_operations perf_fops = { - .release = perf_release, - .read = perf_read, - .poll = perf_poll, - .unlocked_ioctl = perf_ioctl, - .compat_ioctl = perf_ioctl, - .mmap = perf_mmap, - .fasync = perf_fasync, -}; - -/* - * Perf counter wakeup - * - * If there's data, ensure we set the poll() state and publish everything - * to user-space before waking everybody up. - */ - -void perf_counter_wakeup(struct perf_counter *counter) -{ - wake_up_all(&counter->waitq); - - if (counter->pending_kill) { - kill_fasync(&counter->fasync, SIGIO, counter->pending_kill); - counter->pending_kill = 0; - } -} - -/* - * Pending wakeups - * - * Handle the case where we need to wakeup up from NMI (or rq->lock) context. - * - * The NMI bit means we cannot possibly take locks. Therefore, maintain a - * single linked list and use cmpxchg() to add entries lockless. - */ - -static void perf_pending_counter(struct perf_pending_entry *entry) -{ - struct perf_counter *counter = container_of(entry, - struct perf_counter, pending); - - if (counter->pending_disable) { - counter->pending_disable = 0; - __perf_counter_disable(counter); - } - - if (counter->pending_wakeup) { - counter->pending_wakeup = 0; - perf_counter_wakeup(counter); - } -} - -#define PENDING_TAIL ((struct perf_pending_entry *)-1UL) - -static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { - PENDING_TAIL, -}; - -static void perf_pending_queue(struct perf_pending_entry *entry, - void (*func)(struct perf_pending_entry *)) -{ - struct perf_pending_entry **head; - - if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) - return; - - entry->func = func; - - head = &get_cpu_var(perf_pending_head); - - do { - entry->next = *head; - } while (cmpxchg(head, entry->next, entry) != entry->next); - - set_perf_counter_pending(); - - put_cpu_var(perf_pending_head); -} - -static int __perf_pending_run(void) -{ - struct perf_pending_entry *list; - int nr = 0; - - list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); - while (list != PENDING_TAIL) { - void (*func)(struct perf_pending_entry *); - struct perf_pending_entry *entry = list; - - list = list->next; - - func = entry->func; - entry->next = NULL; - /* - * Ensure we observe the unqueue before we issue the wakeup, - * so that we won't be waiting forever. - * -- see perf_not_pending(). - */ - smp_wmb(); - - func(entry); - nr++; - } - - return nr; -} - -static inline int perf_not_pending(struct perf_counter *counter) -{ - /* - * If we flush on whatever cpu we run, there is a chance we don't - * need to wait. - */ - get_cpu(); - __perf_pending_run(); - put_cpu(); - - /* - * Ensure we see the proper queue state before going to sleep - * so that we do not miss the wakeup. -- see perf_pending_handle() - */ - smp_rmb(); - return counter->pending.next == NULL; -} - -static void perf_pending_sync(struct perf_counter *counter) -{ - wait_event(counter->waitq, perf_not_pending(counter)); -} - -void perf_counter_do_pending(void) -{ - __perf_pending_run(); -} - -/* - * Callchain support -- arch specific - */ - -__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - return NULL; -} - -/* - * Output - */ -static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, - unsigned long offset, unsigned long head) -{ - unsigned long mask; - - if (!data->writable) - return true; - - mask = (data->nr_pages << PAGE_SHIFT) - 1; - - offset = (offset - tail) & mask; - head = (head - tail) & mask; - - if ((int)(head - offset) < 0) - return false; - - return true; -} - -static void perf_output_wakeup(struct perf_output_handle *handle) -{ - atomic_set(&handle->data->poll, POLL_IN); - - if (handle->nmi) { - handle->counter->pending_wakeup = 1; - perf_pending_queue(&handle->counter->pending, - perf_pending_counter); - } else - perf_counter_wakeup(handle->counter); -} - -/* - * Curious locking construct. - * - * We need to ensure a later event doesn't publish a head when a former - * event isn't done writing. However since we need to deal with NMIs we - * cannot fully serialize things. - * - * What we do is serialize between CPUs so we only have to deal with NMI - * nesting on a single CPU. - * - * We only publish the head (and generate a wakeup) when the outer-most - * event completes. - */ -static void perf_output_lock(struct perf_output_handle *handle) -{ - struct perf_mmap_data *data = handle->data; - int cpu; - - handle->locked = 0; - - local_irq_save(handle->flags); - cpu = smp_processor_id(); - - if (in_nmi() && atomic_read(&data->lock) == cpu) - return; - - while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) - cpu_relax(); - - handle->locked = 1; -} - -static void perf_output_unlock(struct perf_output_handle *handle) -{ - struct perf_mmap_data *data = handle->data; - unsigned long head; - int cpu; - - data->done_head = data->head; - - if (!handle->locked) - goto out; - -again: - /* - * The xchg implies a full barrier that ensures all writes are done - * before we publish the new head, matched by a rmb() in userspace when - * reading this position. - */ - while ((head = atomic_long_xchg(&data->done_head, 0))) - data->user_page->data_head = head; - - /* - * NMI can happen here, which means we can miss a done_head update. - */ - - cpu = atomic_xchg(&data->lock, -1); - WARN_ON_ONCE(cpu != smp_processor_id()); - - /* - * Therefore we have to validate we did not indeed do so. - */ - if (unlikely(atomic_long_read(&data->done_head))) { - /* - * Since we had it locked, we can lock it again. - */ - while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) - cpu_relax(); - - goto again; - } - - if (atomic_xchg(&data->wakeup, 0)) - perf_output_wakeup(handle); -out: - local_irq_restore(handle->flags); -} - -void perf_output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len) -{ - unsigned int pages_mask; - unsigned int offset; - unsigned int size; - void **pages; - - offset = handle->offset; - pages_mask = handle->data->nr_pages - 1; - pages = handle->data->data_pages; - - do { - unsigned int page_offset; - int nr; - - nr = (offset >> PAGE_SHIFT) & pages_mask; - page_offset = offset & (PAGE_SIZE - 1); - size = min_t(unsigned int, PAGE_SIZE - page_offset, len); - - memcpy(pages[nr] + page_offset, buf, size); - - len -= size; - buf += size; - offset += size; - } while (len); - - handle->offset = offset; - - /* - * Check we didn't copy past our reservation window, taking the - * possible unsigned int wrap into account. - */ - WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); -} - -int perf_output_begin(struct perf_output_handle *handle, - struct perf_counter *counter, unsigned int size, - int nmi, int sample) -{ - struct perf_counter *output_counter; - struct perf_mmap_data *data; - unsigned long tail, offset, head; - int have_lost; - struct { - struct perf_event_header header; - u64 id; - u64 lost; - } lost_event; - - rcu_read_lock(); - /* - * For inherited counters we send all the output towards the parent. - */ - if (counter->parent) - counter = counter->parent; - - output_counter = rcu_dereference(counter->output); - if (output_counter) - counter = output_counter; - - data = rcu_dereference(counter->data); - if (!data) - goto out; - - handle->data = data; - handle->counter = counter; - handle->nmi = nmi; - handle->sample = sample; - - if (!data->nr_pages) - goto fail; - - have_lost = atomic_read(&data->lost); - if (have_lost) - size += sizeof(lost_event); - - perf_output_lock(handle); - - do { - /* - * Userspace could choose to issue a mb() before updating the - * tail pointer. So that all reads will be completed before the - * write is issued. - */ - tail = ACCESS_ONCE(data->user_page->data_tail); - smp_rmb(); - offset = head = atomic_long_read(&data->head); - head += size; - if (unlikely(!perf_output_space(data, tail, offset, head))) - goto fail; - } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); - - handle->offset = offset; - handle->head = head; - - if (head - tail > data->watermark) - atomic_set(&data->wakeup, 1); - - if (have_lost) { - lost_event.header.type = PERF_EVENT_LOST; - lost_event.header.misc = 0; - lost_event.header.size = sizeof(lost_event); - lost_event.id = counter->id; - lost_event.lost = atomic_xchg(&data->lost, 0); - - perf_output_put(handle, lost_event); - } - - return 0; - -fail: - atomic_inc(&data->lost); - perf_output_unlock(handle); -out: - rcu_read_unlock(); - - return -ENOSPC; -} - -void perf_output_end(struct perf_output_handle *handle) -{ - struct perf_counter *counter = handle->counter; - struct perf_mmap_data *data = handle->data; - - int wakeup_events = counter->attr.wakeup_events; - - if (handle->sample && wakeup_events) { - int events = atomic_inc_return(&data->events); - if (events >= wakeup_events) { - atomic_sub(wakeup_events, &data->events); - atomic_set(&data->wakeup, 1); - } - } - - perf_output_unlock(handle); - rcu_read_unlock(); -} - -static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p) -{ - /* - * only top level counters have the pid namespace they were created in - */ - if (counter->parent) - counter = counter->parent; - - return task_tgid_nr_ns(p, counter->ns); -} - -static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) -{ - /* - * only top level counters have the pid namespace they were created in - */ - if (counter->parent) - counter = counter->parent; - - return task_pid_nr_ns(p, counter->ns); -} - -static void perf_output_read_one(struct perf_output_handle *handle, - struct perf_counter *counter) -{ - u64 read_format = counter->attr.read_format; - u64 values[4]; - int n = 0; - - values[n++] = atomic64_read(&counter->count); - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); - } - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(counter); - - perf_output_copy(handle, values, n * sizeof(u64)); -} - -/* - * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult. - */ -static void perf_output_read_group(struct perf_output_handle *handle, - struct perf_counter *counter) -{ - struct perf_counter *leader = counter->group_leader, *sub; - u64 read_format = counter->attr.read_format; - u64 values[5]; - int n = 0; - - values[n++] = 1 + leader->nr_siblings; - - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = leader->total_time_enabled; - - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = leader->total_time_running; - - if (leader != counter) - leader->pmu->read(leader); - - values[n++] = atomic64_read(&leader->count); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(leader); - - perf_output_copy(handle, values, n * sizeof(u64)); - - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - n = 0; - - if (sub != counter) - sub->pmu->read(sub); - - values[n++] = atomic64_read(&sub->count); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(sub); - - perf_output_copy(handle, values, n * sizeof(u64)); - } -} - -static void perf_output_read(struct perf_output_handle *handle, - struct perf_counter *counter) -{ - if (counter->attr.read_format & PERF_FORMAT_GROUP) - perf_output_read_group(handle, counter); - else - perf_output_read_one(handle, counter); -} - -void perf_output_sample(struct perf_output_handle *handle, - struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter) -{ - u64 sample_type = data->type; - - perf_output_put(handle, *header); - - if (sample_type & PERF_SAMPLE_IP) - perf_output_put(handle, data->ip); - - if (sample_type & PERF_SAMPLE_TID) - perf_output_put(handle, data->tid_entry); - - if (sample_type & PERF_SAMPLE_TIME) - perf_output_put(handle, data->time); - - if (sample_type & PERF_SAMPLE_ADDR) - perf_output_put(handle, data->addr); - - if (sample_type & PERF_SAMPLE_ID) - perf_output_put(handle, data->id); - - if (sample_type & PERF_SAMPLE_STREAM_ID) - perf_output_put(handle, data->stream_id); - - if (sample_type & PERF_SAMPLE_CPU) - perf_output_put(handle, data->cpu_entry); - - if (sample_type & PERF_SAMPLE_PERIOD) - perf_output_put(handle, data->period); - - if (sample_type & PERF_SAMPLE_READ) - perf_output_read(handle, counter); - - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - if (data->callchain) { - int size = 1; - - if (data->callchain) - size += data->callchain->nr; - - size *= sizeof(u64); - - perf_output_copy(handle, data->callchain, size); - } else { - u64 nr = 0; - perf_output_put(handle, nr); - } - } - - if (sample_type & PERF_SAMPLE_RAW) { - if (data->raw) { - perf_output_put(handle, data->raw->size); - perf_output_copy(handle, data->raw->data, - data->raw->size); - } else { - struct { - u32 size; - u32 data; - } raw = { - .size = sizeof(u32), - .data = 0, - }; - perf_output_put(handle, raw); - } - } -} - -void perf_prepare_sample(struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter, - struct pt_regs *regs) -{ - u64 sample_type = counter->attr.sample_type; - - data->type = sample_type; - - header->type = PERF_EVENT_SAMPLE; - header->size = sizeof(*header); - - header->misc = 0; - header->misc |= perf_misc_flags(regs); - - if (sample_type & PERF_SAMPLE_IP) { - data->ip = perf_instruction_pointer(regs); - - header->size += sizeof(data->ip); - } - - if (sample_type & PERF_SAMPLE_TID) { - /* namespace issues */ - data->tid_entry.pid = perf_counter_pid(counter, current); - data->tid_entry.tid = perf_counter_tid(counter, current); - - header->size += sizeof(data->tid_entry); - } - - if (sample_type & PERF_SAMPLE_TIME) { - data->time = perf_clock(); - - header->size += sizeof(data->time); - } - - if (sample_type & PERF_SAMPLE_ADDR) - header->size += sizeof(data->addr); - - if (sample_type & PERF_SAMPLE_ID) { - data->id = primary_counter_id(counter); - - header->size += sizeof(data->id); - } - - if (sample_type & PERF_SAMPLE_STREAM_ID) { - data->stream_id = counter->id; - - header->size += sizeof(data->stream_id); - } - - if (sample_type & PERF_SAMPLE_CPU) { - data->cpu_entry.cpu = raw_smp_processor_id(); - data->cpu_entry.reserved = 0; - - header->size += sizeof(data->cpu_entry); - } - - if (sample_type & PERF_SAMPLE_PERIOD) - header->size += sizeof(data->period); - - if (sample_type & PERF_SAMPLE_READ) - header->size += perf_counter_read_size(counter); - - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - int size = 1; - - data->callchain = perf_callchain(regs); - - if (data->callchain) - size += data->callchain->nr; - - header->size += size * sizeof(u64); - } - - if (sample_type & PERF_SAMPLE_RAW) { - int size = sizeof(u32); - - if (data->raw) - size += data->raw->size; - else - size += sizeof(u32); - - WARN_ON_ONCE(size & (sizeof(u64)-1)); - header->size += size; - } -} - -static void perf_counter_output(struct perf_counter *counter, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct perf_output_handle handle; - struct perf_event_header header; - - perf_prepare_sample(&header, data, counter, regs); - - if (perf_output_begin(&handle, counter, header.size, nmi, 1)) - return; - - perf_output_sample(&handle, &header, data, counter); - - perf_output_end(&handle); -} - -/* - * read event - */ - -struct perf_read_event { - struct perf_event_header header; - - u32 pid; - u32 tid; -}; - -static void -perf_counter_read_event(struct perf_counter *counter, - struct task_struct *task) -{ - struct perf_output_handle handle; - struct perf_read_event read_event = { - .header = { - .type = PERF_EVENT_READ, - .misc = 0, - .size = sizeof(read_event) + perf_counter_read_size(counter), - }, - .pid = perf_counter_pid(counter, task), - .tid = perf_counter_tid(counter, task), - }; - int ret; - - ret = perf_output_begin(&handle, counter, read_event.header.size, 0, 0); - if (ret) - return; - - perf_output_put(&handle, read_event); - perf_output_read(&handle, counter); - - perf_output_end(&handle); -} - -/* - * task tracking -- fork/exit - * - * enabled by: attr.comm | attr.mmap | attr.task - */ - -struct perf_task_event { - struct task_struct *task; - struct perf_counter_context *task_ctx; - - struct { - struct perf_event_header header; - - u32 pid; - u32 ppid; - u32 tid; - u32 ptid; - u64 time; - } event; -}; - -static void perf_counter_task_output(struct perf_counter *counter, - struct perf_task_event *task_event) -{ - struct perf_output_handle handle; - int size; - struct task_struct *task = task_event->task; - int ret; - - size = task_event->event.header.size; - ret = perf_output_begin(&handle, counter, size, 0, 0); - - if (ret) - return; - - task_event->event.pid = perf_counter_pid(counter, task); - task_event->event.ppid = perf_counter_pid(counter, current); - - task_event->event.tid = perf_counter_tid(counter, task); - task_event->event.ptid = perf_counter_tid(counter, current); - - task_event->event.time = perf_clock(); - - perf_output_put(&handle, task_event->event); - - perf_output_end(&handle); -} - -static int perf_counter_task_match(struct perf_counter *counter) -{ - if (counter->attr.comm || counter->attr.mmap || counter->attr.task) - return 1; - - return 0; -} - -static void perf_counter_task_ctx(struct perf_counter_context *ctx, - struct perf_task_event *task_event) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_task_match(counter)) - perf_counter_task_output(counter, task_event); - } - rcu_read_unlock(); -} - -static void perf_counter_task_event(struct perf_task_event *task_event) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx = task_event->task_ctx; - - cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_task_ctx(&cpuctx->ctx, task_event); - put_cpu_var(perf_cpu_context); - - rcu_read_lock(); - if (!ctx) - ctx = rcu_dereference(task_event->task->perf_counter_ctxp); - if (ctx) - perf_counter_task_ctx(ctx, task_event); - rcu_read_unlock(); -} - -static void perf_counter_task(struct task_struct *task, - struct perf_counter_context *task_ctx, - int new) -{ - struct perf_task_event task_event; - - if (!atomic_read(&nr_comm_counters) && - !atomic_read(&nr_mmap_counters) && - !atomic_read(&nr_task_counters)) - return; - - task_event = (struct perf_task_event){ - .task = task, - .task_ctx = task_ctx, - .event = { - .header = { - .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, - .misc = 0, - .size = sizeof(task_event.event), - }, - /* .pid */ - /* .ppid */ - /* .tid */ - /* .ptid */ - }, - }; - - perf_counter_task_event(&task_event); -} - -void perf_counter_fork(struct task_struct *task) -{ - perf_counter_task(task, NULL, 1); -} - -/* - * comm tracking - */ - -struct perf_comm_event { - struct task_struct *task; - char *comm; - int comm_size; - - struct { - struct perf_event_header header; - - u32 pid; - u32 tid; - } event; -}; - -static void perf_counter_comm_output(struct perf_counter *counter, - struct perf_comm_event *comm_event) -{ - struct perf_output_handle handle; - int size = comm_event->event.header.size; - int ret = perf_output_begin(&handle, counter, size, 0, 0); - - if (ret) - return; - - comm_event->event.pid = perf_counter_pid(counter, comm_event->task); - comm_event->event.tid = perf_counter_tid(counter, comm_event->task); - - perf_output_put(&handle, comm_event->event); - perf_output_copy(&handle, comm_event->comm, - comm_event->comm_size); - perf_output_end(&handle); -} - -static int perf_counter_comm_match(struct perf_counter *counter) -{ - if (counter->attr.comm) - return 1; - - return 0; -} - -static void perf_counter_comm_ctx(struct perf_counter_context *ctx, - struct perf_comm_event *comm_event) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_comm_match(counter)) - perf_counter_comm_output(counter, comm_event); - } - rcu_read_unlock(); -} - -static void perf_counter_comm_event(struct perf_comm_event *comm_event) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; - unsigned int size; - char comm[TASK_COMM_LEN]; - - memset(comm, 0, sizeof(comm)); - strncpy(comm, comm_event->task->comm, sizeof(comm)); - size = ALIGN(strlen(comm)+1, sizeof(u64)); - - comm_event->comm = comm; - comm_event->comm_size = size; - - comm_event->event.header.size = sizeof(comm_event->event) + size; - - cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_comm_ctx(&cpuctx->ctx, comm_event); - put_cpu_var(perf_cpu_context); - - rcu_read_lock(); - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ - ctx = rcu_dereference(current->perf_counter_ctxp); - if (ctx) - perf_counter_comm_ctx(ctx, comm_event); - rcu_read_unlock(); -} - -void perf_counter_comm(struct task_struct *task) -{ - struct perf_comm_event comm_event; - - if (task->perf_counter_ctxp) - perf_counter_enable_on_exec(task); - - if (!atomic_read(&nr_comm_counters)) - return; - - comm_event = (struct perf_comm_event){ - .task = task, - /* .comm */ - /* .comm_size */ - .event = { - .header = { - .type = PERF_EVENT_COMM, - .misc = 0, - /* .size */ - }, - /* .pid */ - /* .tid */ - }, - }; - - perf_counter_comm_event(&comm_event); -} - -/* - * mmap tracking - */ - -struct perf_mmap_event { - struct vm_area_struct *vma; - - const char *file_name; - int file_size; - - struct { - struct perf_event_header header; - - u32 pid; - u32 tid; - u64 start; - u64 len; - u64 pgoff; - } event; -}; - -static void perf_counter_mmap_output(struct perf_counter *counter, - struct perf_mmap_event *mmap_event) -{ - struct perf_output_handle handle; - int size = mmap_event->event.header.size; - int ret = perf_output_begin(&handle, counter, size, 0, 0); - - if (ret) - return; - - mmap_event->event.pid = perf_counter_pid(counter, current); - mmap_event->event.tid = perf_counter_tid(counter, current); - - perf_output_put(&handle, mmap_event->event); - perf_output_copy(&handle, mmap_event->file_name, - mmap_event->file_size); - perf_output_end(&handle); -} - -static int perf_counter_mmap_match(struct perf_counter *counter, - struct perf_mmap_event *mmap_event) -{ - if (counter->attr.mmap) - return 1; - - return 0; -} - -static void perf_counter_mmap_ctx(struct perf_counter_context *ctx, - struct perf_mmap_event *mmap_event) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_mmap_match(counter, mmap_event)) - perf_counter_mmap_output(counter, mmap_event); - } - rcu_read_unlock(); -} - -static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; - struct vm_area_struct *vma = mmap_event->vma; - struct file *file = vma->vm_file; - unsigned int size; - char tmp[16]; - char *buf = NULL; - const char *name; - - memset(tmp, 0, sizeof(tmp)); - - if (file) { - /* - * d_path works from the end of the buffer backwards, so we - * need to add enough zero bytes after the string to handle - * the 64bit alignment we do later. - */ - buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL); - if (!buf) { - name = strncpy(tmp, "//enomem", sizeof(tmp)); - goto got_name; - } - name = d_path(&file->f_path, buf, PATH_MAX); - if (IS_ERR(name)) { - name = strncpy(tmp, "//toolong", sizeof(tmp)); - goto got_name; - } - } else { - if (arch_vma_name(mmap_event->vma)) { - name = strncpy(tmp, arch_vma_name(mmap_event->vma), - sizeof(tmp)); - goto got_name; - } - - if (!vma->vm_mm) { - name = strncpy(tmp, "[vdso]", sizeof(tmp)); - goto got_name; - } - - name = strncpy(tmp, "//anon", sizeof(tmp)); - goto got_name; - } - -got_name: - size = ALIGN(strlen(name)+1, sizeof(u64)); - - mmap_event->file_name = name; - mmap_event->file_size = size; - - mmap_event->event.header.size = sizeof(mmap_event->event) + size; - - cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); - put_cpu_var(perf_cpu_context); - - rcu_read_lock(); - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ - ctx = rcu_dereference(current->perf_counter_ctxp); - if (ctx) - perf_counter_mmap_ctx(ctx, mmap_event); - rcu_read_unlock(); - - kfree(buf); -} - -void __perf_counter_mmap(struct vm_area_struct *vma) -{ - struct perf_mmap_event mmap_event; - - if (!atomic_read(&nr_mmap_counters)) - return; - - mmap_event = (struct perf_mmap_event){ - .vma = vma, - /* .file_name */ - /* .file_size */ - .event = { - .header = { - .type = PERF_EVENT_MMAP, - .misc = 0, - /* .size */ - }, - /* .pid */ - /* .tid */ - .start = vma->vm_start, - .len = vma->vm_end - vma->vm_start, - .pgoff = vma->vm_pgoff, - }, - }; - - perf_counter_mmap_event(&mmap_event); -} - -/* - * IRQ throttle logging - */ - -static void perf_log_throttle(struct perf_counter *counter, int enable) -{ - struct perf_output_handle handle; - int ret; - - struct { - struct perf_event_header header; - u64 time; - u64 id; - u64 stream_id; - } throttle_event = { - .header = { - .type = PERF_EVENT_THROTTLE, - .misc = 0, - .size = sizeof(throttle_event), - }, - .time = perf_clock(), - .id = primary_counter_id(counter), - .stream_id = counter->id, - }; - - if (enable) - throttle_event.header.type = PERF_EVENT_UNTHROTTLE; - - ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0); - if (ret) - return; - - perf_output_put(&handle, throttle_event); - perf_output_end(&handle); -} - -/* - * Generic counter overflow handling, sampling. - */ - -static int __perf_counter_overflow(struct perf_counter *counter, int nmi, - int throttle, struct perf_sample_data *data, - struct pt_regs *regs) -{ - int events = atomic_read(&counter->event_limit); - struct hw_perf_counter *hwc = &counter->hw; - int ret = 0; - - throttle = (throttle && counter->pmu->unthrottle != NULL); - - if (!throttle) { - hwc->interrupts++; - } else { - if (hwc->interrupts != MAX_INTERRUPTS) { - hwc->interrupts++; - if (HZ * hwc->interrupts > - (u64)sysctl_perf_counter_sample_rate) { - hwc->interrupts = MAX_INTERRUPTS; - perf_log_throttle(counter, 0); - ret = 1; - } - } else { - /* - * Keep re-disabling counters even though on the previous - * pass we disabled it - just in case we raced with a - * sched-in and the counter got enabled again: - */ - ret = 1; - } - } - - if (counter->attr.freq) { - u64 now = perf_clock(); - s64 delta = now - hwc->freq_stamp; - - hwc->freq_stamp = now; - - if (delta > 0 && delta < TICK_NSEC) - perf_adjust_period(counter, NSEC_PER_SEC / (int)delta); - } - - /* - * XXX event_limit might not quite work as expected on inherited - * counters - */ - - counter->pending_kill = POLL_IN; - if (events && atomic_dec_and_test(&counter->event_limit)) { - ret = 1; - counter->pending_kill = POLL_HUP; - if (nmi) { - counter->pending_disable = 1; - perf_pending_queue(&counter->pending, - perf_pending_counter); - } else - perf_counter_disable(counter); - } - - perf_counter_output(counter, nmi, data, regs); - return ret; -} - -int perf_counter_overflow(struct perf_counter *counter, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - return __perf_counter_overflow(counter, nmi, 1, data, regs); -} - -/* - * Generic software counter infrastructure - */ - -/* - * We directly increment counter->count and keep a second value in - * counter->hw.period_left to count intervals. This period counter - * is kept in the range [-sample_period, 0] so that we can use the - * sign as trigger. - */ - -static u64 perf_swcounter_set_period(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - u64 period = hwc->last_period; - u64 nr, offset; - s64 old, val; - - hwc->last_period = hwc->sample_period; - -again: - old = val = atomic64_read(&hwc->period_left); - if (val < 0) - return 0; - - nr = div64_u64(period + val, period); - offset = nr * period; - val -= offset; - if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) - goto again; - - return nr; -} - -static void perf_swcounter_overflow(struct perf_counter *counter, - int nmi, struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct hw_perf_counter *hwc = &counter->hw; - int throttle = 0; - u64 overflow; - - data->period = counter->hw.last_period; - overflow = perf_swcounter_set_period(counter); - - if (hwc->interrupts == MAX_INTERRUPTS) - return; - - for (; overflow; overflow--) { - if (__perf_counter_overflow(counter, nmi, throttle, - data, regs)) { - /* - * We inhibit the overflow from happening when - * hwc->interrupts == MAX_INTERRUPTS. - */ - break; - } - throttle = 1; - } -} - -static void perf_swcounter_unthrottle(struct perf_counter *counter) -{ - /* - * Nothing to do, we already reset hwc->interrupts. - */ -} - -static void perf_swcounter_add(struct perf_counter *counter, u64 nr, - int nmi, struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct hw_perf_counter *hwc = &counter->hw; - - atomic64_add(nr, &counter->count); - - if (!hwc->sample_period) - return; - - if (!regs) - return; - - if (!atomic64_add_negative(nr, &hwc->period_left)) - perf_swcounter_overflow(counter, nmi, data, regs); -} - -static int perf_swcounter_is_counting(struct perf_counter *counter) -{ - /* - * The counter is active, we're good! - */ - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - return 1; - - /* - * The counter is off/error, not counting. - */ - if (counter->state != PERF_COUNTER_STATE_INACTIVE) - return 0; - - /* - * The counter is inactive, if the context is active - * we're part of a group that didn't make it on the 'pmu', - * not counting. - */ - if (counter->ctx->is_active) - return 0; - - /* - * We're inactive and the context is too, this means the - * task is scheduled out, we're counting events that happen - * to us, like migration events. - */ - return 1; -} - -static int perf_swcounter_match(struct perf_counter *counter, - enum perf_type_id type, - u32 event_id, struct pt_regs *regs) -{ - if (!perf_swcounter_is_counting(counter)) - return 0; - - if (counter->attr.type != type) - return 0; - if (counter->attr.config != event_id) - return 0; - - if (regs) { - if (counter->attr.exclude_user && user_mode(regs)) - return 0; - - if (counter->attr.exclude_kernel && !user_mode(regs)) - return 0; - } - - return 1; -} - -static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, - enum perf_type_id type, - u32 event_id, u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_swcounter_match(counter, type, event_id, regs)) - perf_swcounter_add(counter, nr, nmi, data, regs); - } - rcu_read_unlock(); -} - -static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) -{ - if (in_nmi()) - return &cpuctx->recursion[3]; - - if (in_irq()) - return &cpuctx->recursion[2]; - - if (in_softirq()) - return &cpuctx->recursion[1]; - - return &cpuctx->recursion[0]; -} - -static void do_perf_swcounter_event(enum perf_type_id type, u32 event, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - int *recursion = perf_swcounter_recursion_context(cpuctx); - struct perf_counter_context *ctx; - - if (*recursion) - goto out; - - (*recursion)++; - barrier(); - - perf_swcounter_ctx_event(&cpuctx->ctx, type, event, - nr, nmi, data, regs); - rcu_read_lock(); - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ - ctx = rcu_dereference(current->perf_counter_ctxp); - if (ctx) - perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs); - rcu_read_unlock(); - - barrier(); - (*recursion)--; - -out: - put_cpu_var(perf_cpu_context); -} - -void __perf_swcounter_event(u32 event, u64 nr, int nmi, - struct pt_regs *regs, u64 addr) -{ - struct perf_sample_data data = { - .addr = addr, - }; - - do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, - &data, regs); -} - -static void perf_swcounter_read(struct perf_counter *counter) -{ -} - -static int perf_swcounter_enable(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - - if (hwc->sample_period) { - hwc->last_period = hwc->sample_period; - perf_swcounter_set_period(counter); - } - return 0; -} - -static void perf_swcounter_disable(struct perf_counter *counter) -{ -} - -static const struct pmu perf_ops_generic = { - .enable = perf_swcounter_enable, - .disable = perf_swcounter_disable, - .read = perf_swcounter_read, - .unthrottle = perf_swcounter_unthrottle, -}; - -/* - * hrtimer based swcounter callback - */ - -static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) -{ - enum hrtimer_restart ret = HRTIMER_RESTART; - struct perf_sample_data data; - struct pt_regs *regs; - struct perf_counter *counter; - u64 period; - - counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); - counter->pmu->read(counter); - - data.addr = 0; - regs = get_irq_regs(); - /* - * In case we exclude kernel IPs or are somehow not in interrupt - * context, provide the next best thing, the user IP. - */ - if ((counter->attr.exclude_kernel || !regs) && - !counter->attr.exclude_user) - regs = task_pt_regs(current); - - if (regs) { - if (perf_counter_overflow(counter, 0, &data, regs)) - ret = HRTIMER_NORESTART; - } - - period = max_t(u64, 10000, counter->hw.sample_period); - hrtimer_forward_now(hrtimer, ns_to_ktime(period)); - - return ret; -} - -/* - * Software counter: cpu wall time clock - */ - -static void cpu_clock_perf_counter_update(struct perf_counter *counter) -{ - int cpu = raw_smp_processor_id(); - s64 prev; - u64 now; - - now = cpu_clock(cpu); - prev = atomic64_read(&counter->hw.prev_count); - atomic64_set(&counter->hw.prev_count, now); - atomic64_add(now - prev, &counter->count); -} - -static int cpu_clock_perf_counter_enable(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - int cpu = raw_smp_processor_id(); - - atomic64_set(&hwc->prev_count, cpu_clock(cpu)); - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swcounter_hrtimer; - if (hwc->sample_period) { - u64 period = max_t(u64, 10000, hwc->sample_period); - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } - - return 0; -} - -static void cpu_clock_perf_counter_disable(struct perf_counter *counter) -{ - if (counter->hw.sample_period) - hrtimer_cancel(&counter->hw.hrtimer); - cpu_clock_perf_counter_update(counter); -} - -static void cpu_clock_perf_counter_read(struct perf_counter *counter) -{ - cpu_clock_perf_counter_update(counter); -} - -static const struct pmu perf_ops_cpu_clock = { - .enable = cpu_clock_perf_counter_enable, - .disable = cpu_clock_perf_counter_disable, - .read = cpu_clock_perf_counter_read, -}; - -/* - * Software counter: task time clock - */ - -static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) -{ - u64 prev; - s64 delta; - - prev = atomic64_xchg(&counter->hw.prev_count, now); - delta = now - prev; - atomic64_add(delta, &counter->count); -} - -static int task_clock_perf_counter_enable(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - u64 now; - - now = counter->ctx->time; - - atomic64_set(&hwc->prev_count, now); - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swcounter_hrtimer; - if (hwc->sample_period) { - u64 period = max_t(u64, 10000, hwc->sample_period); - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } - - return 0; -} - -static void task_clock_perf_counter_disable(struct perf_counter *counter) -{ - if (counter->hw.sample_period) - hrtimer_cancel(&counter->hw.hrtimer); - task_clock_perf_counter_update(counter, counter->ctx->time); - -} - -static void task_clock_perf_counter_read(struct perf_counter *counter) -{ - u64 time; - - if (!in_nmi()) { - update_context_time(counter->ctx); - time = counter->ctx->time; - } else { - u64 now = perf_clock(); - u64 delta = now - counter->ctx->timestamp; - time = counter->ctx->time + delta; - } - - task_clock_perf_counter_update(counter, time); -} - -static const struct pmu perf_ops_task_clock = { - .enable = task_clock_perf_counter_enable, - .disable = task_clock_perf_counter_disable, - .read = task_clock_perf_counter_read, -}; - -#ifdef CONFIG_EVENT_PROFILE -void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, - int entry_size) -{ - struct perf_raw_record raw = { - .size = entry_size, - .data = record, - }; - - struct perf_sample_data data = { - .addr = addr, - .raw = &raw, - }; - - struct pt_regs *regs = get_irq_regs(); - - if (!regs) - regs = task_pt_regs(current); - - do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, - &data, regs); -} -EXPORT_SYMBOL_GPL(perf_tpcounter_event); - -extern int ftrace_profile_enable(int); -extern void ftrace_profile_disable(int); - -static void tp_perf_counter_destroy(struct perf_counter *counter) -{ - ftrace_profile_disable(counter->attr.config); -} - -static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) -{ - /* - * Raw tracepoint data is a severe data leak, only allow root to - * have these. - */ - if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && - perf_paranoid_tracepoint_raw() && - !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); - - if (ftrace_profile_enable(counter->attr.config)) - return NULL; - - counter->destroy = tp_perf_counter_destroy; - - return &perf_ops_generic; -} -#else -static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) -{ - return NULL; -} -#endif - -atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; - -static void sw_perf_counter_destroy(struct perf_counter *counter) -{ - u64 event_id = counter->attr.config; - - WARN_ON(counter->parent); - - atomic_dec(&perf_swcounter_enabled[event_id]); -} - -static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) -{ - const struct pmu *pmu = NULL; - u64 event_id = counter->attr.config; - - /* - * Software counters (currently) can't in general distinguish - * between user, kernel and hypervisor events. - * However, context switches and cpu migrations are considered - * to be kernel events, and page faults are never hypervisor - * events. - */ - switch (event_id) { - case PERF_COUNT_SW_CPU_CLOCK: - pmu = &perf_ops_cpu_clock; - - break; - case PERF_COUNT_SW_TASK_CLOCK: - /* - * If the user instantiates this as a per-cpu counter, - * use the cpu_clock counter instead. - */ - if (counter->ctx->task) - pmu = &perf_ops_task_clock; - else - pmu = &perf_ops_cpu_clock; - - break; - case PERF_COUNT_SW_PAGE_FAULTS: - case PERF_COUNT_SW_PAGE_FAULTS_MIN: - case PERF_COUNT_SW_PAGE_FAULTS_MAJ: - case PERF_COUNT_SW_CONTEXT_SWITCHES: - case PERF_COUNT_SW_CPU_MIGRATIONS: - if (!counter->parent) { - atomic_inc(&perf_swcounter_enabled[event_id]); - counter->destroy = sw_perf_counter_destroy; - } - pmu = &perf_ops_generic; - break; - } - - return pmu; -} - -/* - * Allocate and initialize a counter structure - */ -static struct perf_counter * -perf_counter_alloc(struct perf_counter_attr *attr, - int cpu, - struct perf_counter_context *ctx, - struct perf_counter *group_leader, - struct perf_counter *parent_counter, - gfp_t gfpflags) -{ - const struct pmu *pmu; - struct perf_counter *counter; - struct hw_perf_counter *hwc; - long err; - - counter = kzalloc(sizeof(*counter), gfpflags); - if (!counter) - return ERR_PTR(-ENOMEM); - - /* - * Single counters are their own group leaders, with an - * empty sibling list: - */ - if (!group_leader) - group_leader = counter; - - mutex_init(&counter->child_mutex); - INIT_LIST_HEAD(&counter->child_list); - - INIT_LIST_HEAD(&counter->group_entry); - INIT_LIST_HEAD(&counter->event_entry); - INIT_LIST_HEAD(&counter->sibling_list); - init_waitqueue_head(&counter->waitq); - - mutex_init(&counter->mmap_mutex); - - counter->cpu = cpu; - counter->attr = *attr; - counter->group_leader = group_leader; - counter->pmu = NULL; - counter->ctx = ctx; - counter->oncpu = -1; - - counter->parent = parent_counter; - - counter->ns = get_pid_ns(current->nsproxy->pid_ns); - counter->id = atomic64_inc_return(&perf_counter_id); - - counter->state = PERF_COUNTER_STATE_INACTIVE; - - if (attr->disabled) - counter->state = PERF_COUNTER_STATE_OFF; - - pmu = NULL; - - hwc = &counter->hw; - hwc->sample_period = attr->sample_period; - if (attr->freq && attr->sample_freq) - hwc->sample_period = 1; - hwc->last_period = hwc->sample_period; - - atomic64_set(&hwc->period_left, hwc->sample_period); - - /* - * we currently do not support PERF_FORMAT_GROUP on inherited counters - */ - if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) - goto done; - - switch (attr->type) { - case PERF_TYPE_RAW: - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - pmu = hw_perf_counter_init(counter); - break; - - case PERF_TYPE_SOFTWARE: - pmu = sw_perf_counter_init(counter); - break; - - case PERF_TYPE_TRACEPOINT: - pmu = tp_perf_counter_init(counter); - break; - - default: - break; - } -done: - err = 0; - if (!pmu) - err = -EINVAL; - else if (IS_ERR(pmu)) - err = PTR_ERR(pmu); - - if (err) { - if (counter->ns) - put_pid_ns(counter->ns); - kfree(counter); - return ERR_PTR(err); - } - - counter->pmu = pmu; - - if (!counter->parent) { - atomic_inc(&nr_counters); - if (counter->attr.mmap) - atomic_inc(&nr_mmap_counters); - if (counter->attr.comm) - atomic_inc(&nr_comm_counters); - if (counter->attr.task) - atomic_inc(&nr_task_counters); - } - - return counter; -} - -static int perf_copy_attr(struct perf_counter_attr __user *uattr, - struct perf_counter_attr *attr) -{ - u32 size; - int ret; - - if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) - return -EFAULT; - - /* - * zero the full structure, so that a short copy will be nice. - */ - memset(attr, 0, sizeof(*attr)); - - ret = get_user(size, &uattr->size); - if (ret) - return ret; - - if (size > PAGE_SIZE) /* silly large */ - goto err_size; - - if (!size) /* abi compat */ - size = PERF_ATTR_SIZE_VER0; - - if (size < PERF_ATTR_SIZE_VER0) - goto err_size; - - /* - * If we're handed a bigger struct than we know of, - * ensure all the unknown bits are 0 - i.e. new - * user-space does not rely on any kernel feature - * extensions we dont know about yet. - */ - if (size > sizeof(*attr)) { - unsigned char __user *addr; - unsigned char __user *end; - unsigned char val; - - addr = (void __user *)uattr + sizeof(*attr); - end = (void __user *)uattr + size; - - for (; addr < end; addr++) { - ret = get_user(val, addr); - if (ret) - return ret; - if (val) - goto err_size; - } - size = sizeof(*attr); - } - - ret = copy_from_user(attr, uattr, size); - if (ret) - return -EFAULT; - - /* - * If the type exists, the corresponding creation will verify - * the attr->config. - */ - if (attr->type >= PERF_TYPE_MAX) - return -EINVAL; - - if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) - return -EINVAL; - - if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) - return -EINVAL; - - if (attr->read_format & ~(PERF_FORMAT_MAX-1)) - return -EINVAL; - -out: - return ret; - -err_size: - put_user(sizeof(*attr), &uattr->size); - ret = -E2BIG; - goto out; -} - -int perf_counter_set_output(struct perf_counter *counter, int output_fd) -{ - struct perf_counter *output_counter = NULL; - struct file *output_file = NULL; - struct perf_counter *old_output; - int fput_needed = 0; - int ret = -EINVAL; - - if (!output_fd) - goto set; - - output_file = fget_light(output_fd, &fput_needed); - if (!output_file) - return -EBADF; - - if (output_file->f_op != &perf_fops) - goto out; - - output_counter = output_file->private_data; - - /* Don't chain output fds */ - if (output_counter->output) - goto out; - - /* Don't set an output fd when we already have an output channel */ - if (counter->data) - goto out; - - atomic_long_inc(&output_file->f_count); - -set: - mutex_lock(&counter->mmap_mutex); - old_output = counter->output; - rcu_assign_pointer(counter->output, output_counter); - mutex_unlock(&counter->mmap_mutex); - - if (old_output) { - /* - * we need to make sure no existing perf_output_*() - * is still referencing this counter. - */ - synchronize_rcu(); - fput(old_output->filp); - } - - ret = 0; -out: - fput_light(output_file, fput_needed); - return ret; -} - -/** - * sys_perf_counter_open - open a performance counter, associate it to a task/cpu - * - * @attr_uptr: event type attributes for monitoring/sampling - * @pid: target pid - * @cpu: target cpu - * @group_fd: group leader counter fd - */ -SYSCALL_DEFINE5(perf_counter_open, - struct perf_counter_attr __user *, attr_uptr, - pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) -{ - struct perf_counter *counter, *group_leader; - struct perf_counter_attr attr; - struct perf_counter_context *ctx; - struct file *counter_file = NULL; - struct file *group_file = NULL; - int fput_needed = 0; - int fput_needed2 = 0; - int err; - - /* for future expandability... */ - if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) - return -EINVAL; - - err = perf_copy_attr(attr_uptr, &attr); - if (err) - return err; - - if (!attr.exclude_kernel) { - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; - } - - if (attr.freq) { - if (attr.sample_freq > sysctl_perf_counter_sample_rate) - return -EINVAL; - } - - /* - * Get the target context (task or percpu): - */ - ctx = find_get_context(pid, cpu); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - /* - * Look up the group leader (we will attach this counter to it): - */ - group_leader = NULL; - if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { - err = -EINVAL; - group_file = fget_light(group_fd, &fput_needed); - if (!group_file) - goto err_put_context; - if (group_file->f_op != &perf_fops) - goto err_put_context; - - group_leader = group_file->private_data; - /* - * Do not allow a recursive hierarchy (this new sibling - * becoming part of another group-sibling): - */ - if (group_leader->group_leader != group_leader) - goto err_put_context; - /* - * Do not allow to attach to a group in a different - * task or CPU context: - */ - if (group_leader->ctx != ctx) - goto err_put_context; - /* - * Only a group leader can be exclusive or pinned - */ - if (attr.exclusive || attr.pinned) - goto err_put_context; - } - - counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, - NULL, GFP_KERNEL); - err = PTR_ERR(counter); - if (IS_ERR(counter)) - goto err_put_context; - - err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); - if (err < 0) - goto err_free_put_context; - - counter_file = fget_light(err, &fput_needed2); - if (!counter_file) - goto err_free_put_context; - - if (flags & PERF_FLAG_FD_OUTPUT) { - err = perf_counter_set_output(counter, group_fd); - if (err) - goto err_fput_free_put_context; - } - - counter->filp = counter_file; - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - perf_install_in_context(ctx, counter, cpu); - ++ctx->generation; - mutex_unlock(&ctx->mutex); - - counter->owner = current; - get_task_struct(current); - mutex_lock(¤t->perf_counter_mutex); - list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); - mutex_unlock(¤t->perf_counter_mutex); - -err_fput_free_put_context: - fput_light(counter_file, fput_needed2); - -err_free_put_context: - if (err < 0) - kfree(counter); - -err_put_context: - if (err < 0) - put_ctx(ctx); - - fput_light(group_file, fput_needed); - - return err; -} - -/* - * inherit a counter from parent task to child task: - */ -static struct perf_counter * -inherit_counter(struct perf_counter *parent_counter, - struct task_struct *parent, - struct perf_counter_context *parent_ctx, - struct task_struct *child, - struct perf_counter *group_leader, - struct perf_counter_context *child_ctx) -{ - struct perf_counter *child_counter; - - /* - * Instead of creating recursive hierarchies of counters, - * we link inherited counters back to the original parent, - * which has a filp for sure, which we use as the reference - * count: - */ - if (parent_counter->parent) - parent_counter = parent_counter->parent; - - child_counter = perf_counter_alloc(&parent_counter->attr, - parent_counter->cpu, child_ctx, - group_leader, parent_counter, - GFP_KERNEL); - if (IS_ERR(child_counter)) - return child_counter; - get_ctx(child_ctx); - - /* - * Make the child state follow the state of the parent counter, - * not its attr.disabled bit. We hold the parent's mutex, - * so we won't race with perf_counter_{en, dis}able_family. - */ - if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) - child_counter->state = PERF_COUNTER_STATE_INACTIVE; - else - child_counter->state = PERF_COUNTER_STATE_OFF; - - if (parent_counter->attr.freq) - child_counter->hw.sample_period = parent_counter->hw.sample_period; - - /* - * Link it up in the child's context: - */ - add_counter_to_ctx(child_counter, child_ctx); - - /* - * Get a reference to the parent filp - we will fput it - * when the child counter exits. This is safe to do because - * we are in the parent and we know that the filp still - * exists and has a nonzero count: - */ - atomic_long_inc(&parent_counter->filp->f_count); - - /* - * Link this into the parent counter's child list - */ - WARN_ON_ONCE(parent_counter->ctx->parent_ctx); - mutex_lock(&parent_counter->child_mutex); - list_add_tail(&child_counter->child_list, &parent_counter->child_list); - mutex_unlock(&parent_counter->child_mutex); - - return child_counter; -} - -static int inherit_group(struct perf_counter *parent_counter, - struct task_struct *parent, - struct perf_counter_context *parent_ctx, - struct task_struct *child, - struct perf_counter_context *child_ctx) -{ - struct perf_counter *leader; - struct perf_counter *sub; - struct perf_counter *child_ctr; - - leader = inherit_counter(parent_counter, parent, parent_ctx, - child, NULL, child_ctx); - if (IS_ERR(leader)) - return PTR_ERR(leader); - list_for_each_entry(sub, &parent_counter->sibling_list, group_entry) { - child_ctr = inherit_counter(sub, parent, parent_ctx, - child, leader, child_ctx); - if (IS_ERR(child_ctr)) - return PTR_ERR(child_ctr); - } - return 0; -} - -static void sync_child_counter(struct perf_counter *child_counter, - struct task_struct *child) -{ - struct perf_counter *parent_counter = child_counter->parent; - u64 child_val; - - if (child_counter->attr.inherit_stat) - perf_counter_read_event(child_counter, child); - - child_val = atomic64_read(&child_counter->count); - - /* - * Add back the child's count to the parent's count: - */ - atomic64_add(child_val, &parent_counter->count); - atomic64_add(child_counter->total_time_enabled, - &parent_counter->child_total_time_enabled); - atomic64_add(child_counter->total_time_running, - &parent_counter->child_total_time_running); - - /* - * Remove this counter from the parent's list - */ - WARN_ON_ONCE(parent_counter->ctx->parent_ctx); - mutex_lock(&parent_counter->child_mutex); - list_del_init(&child_counter->child_list); - mutex_unlock(&parent_counter->child_mutex); - - /* - * Release the parent counter, if this was the last - * reference to it. - */ - fput(parent_counter->filp); -} - -static void -__perf_counter_exit_task(struct perf_counter *child_counter, - struct perf_counter_context *child_ctx, - struct task_struct *child) -{ - struct perf_counter *parent_counter; - - update_counter_times(child_counter); - perf_counter_remove_from_context(child_counter); - - parent_counter = child_counter->parent; - /* - * It can happen that parent exits first, and has counters - * that are still around due to the child reference. These - * counters need to be zapped - but otherwise linger. - */ - if (parent_counter) { - sync_child_counter(child_counter, child); - free_counter(child_counter); - } -} - -/* - * When a child task exits, feed back counter values to parent counters. - */ -void perf_counter_exit_task(struct task_struct *child) -{ - struct perf_counter *child_counter, *tmp; - struct perf_counter_context *child_ctx; - unsigned long flags; - - if (likely(!child->perf_counter_ctxp)) { - perf_counter_task(child, NULL, 0); - return; - } - - local_irq_save(flags); - /* - * We can't reschedule here because interrupts are disabled, - * and either child is current or it is a task that can't be - * scheduled, so we are now safe from rescheduling changing - * our context. - */ - child_ctx = child->perf_counter_ctxp; - __perf_counter_task_sched_out(child_ctx); - - /* - * Take the context lock here so that if find_get_context is - * reading child->perf_counter_ctxp, we wait until it has - * incremented the context's refcount before we do put_ctx below. - */ - spin_lock(&child_ctx->lock); - child->perf_counter_ctxp = NULL; - /* - * If this context is a clone; unclone it so it can't get - * swapped to another process while we're removing all - * the counters from it. - */ - unclone_ctx(child_ctx); - spin_unlock_irqrestore(&child_ctx->lock, flags); - - /* - * Report the task dead after unscheduling the counters so that we - * won't get any samples after PERF_EVENT_EXIT. We can however still - * get a few PERF_EVENT_READ events. - */ - perf_counter_task(child, child_ctx, 0); - - /* - * We can recurse on the same lock type through: - * - * __perf_counter_exit_task() - * sync_child_counter() - * fput(parent_counter->filp) - * perf_release() - * mutex_lock(&ctx->mutex) - * - * But since its the parent context it won't be the same instance. - */ - mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); - -again: - list_for_each_entry_safe(child_counter, tmp, &child_ctx->group_list, - group_entry) - __perf_counter_exit_task(child_counter, child_ctx, child); - - /* - * If the last counter was a group counter, it will have appended all - * its siblings to the list, but we obtained 'tmp' before that which - * will still point to the list head terminating the iteration. - */ - if (!list_empty(&child_ctx->group_list)) - goto again; - - mutex_unlock(&child_ctx->mutex); - - put_ctx(child_ctx); -} - -/* - * free an unexposed, unused context as created by inheritance by - * init_task below, used by fork() in case of fail. - */ -void perf_counter_free_task(struct task_struct *task) -{ - struct perf_counter_context *ctx = task->perf_counter_ctxp; - struct perf_counter *counter, *tmp; - - if (!ctx) - return; - - mutex_lock(&ctx->mutex); -again: - list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry) { - struct perf_counter *parent = counter->parent; - - if (WARN_ON_ONCE(!parent)) - continue; - - mutex_lock(&parent->child_mutex); - list_del_init(&counter->child_list); - mutex_unlock(&parent->child_mutex); - - fput(parent->filp); - - list_del_counter(counter, ctx); - free_counter(counter); - } - - if (!list_empty(&ctx->group_list)) - goto again; - - mutex_unlock(&ctx->mutex); - - put_ctx(ctx); -} - -/* - * Initialize the perf_counter context in task_struct - */ -int perf_counter_init_task(struct task_struct *child) -{ - struct perf_counter_context *child_ctx, *parent_ctx; - struct perf_counter_context *cloned_ctx; - struct perf_counter *counter; - struct task_struct *parent = current; - int inherited_all = 1; - int ret = 0; - - child->perf_counter_ctxp = NULL; - - mutex_init(&child->perf_counter_mutex); - INIT_LIST_HEAD(&child->perf_counter_list); - - if (likely(!parent->perf_counter_ctxp)) - return 0; - - /* - * This is executed from the parent task context, so inherit - * counters that have been marked for cloning. - * First allocate and initialize a context for the child. - */ - - child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); - if (!child_ctx) - return -ENOMEM; - - __perf_counter_init_context(child_ctx, child); - child->perf_counter_ctxp = child_ctx; - get_task_struct(child); - - /* - * If the parent's context is a clone, pin it so it won't get - * swapped under us. - */ - parent_ctx = perf_pin_task_context(parent); - - /* - * No need to check if parent_ctx != NULL here; since we saw - * it non-NULL earlier, the only reason for it to become NULL - * is if we exit, and since we're currently in the middle of - * a fork we can't be exiting at the same time. - */ - - /* - * Lock the parent list. No need to lock the child - not PID - * hashed yet and not running, so nobody can access it. - */ - mutex_lock(&parent_ctx->mutex); - - /* - * We dont have to disable NMIs - we are only looking at - * the list, not manipulating it: - */ - list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) { - if (counter != counter->group_leader) - continue; - - if (!counter->attr.inherit) { - inherited_all = 0; - continue; - } - - ret = inherit_group(counter, parent, parent_ctx, - child, child_ctx); - if (ret) { - inherited_all = 0; - break; - } - } - - if (inherited_all) { - /* - * Mark the child context as a clone of the parent - * context, or of whatever the parent is a clone of. - * Note that if the parent is a clone, it could get - * uncloned at any point, but that doesn't matter - * because the list of counters and the generation - * count can't have changed since we took the mutex. - */ - cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); - if (cloned_ctx) { - child_ctx->parent_ctx = cloned_ctx; - child_ctx->parent_gen = parent_ctx->parent_gen; - } else { - child_ctx->parent_ctx = parent_ctx; - child_ctx->parent_gen = parent_ctx->generation; - } - get_ctx(child_ctx->parent_ctx); - } - - mutex_unlock(&parent_ctx->mutex); - - perf_unpin_context(parent_ctx); - - return ret; -} - -static void __cpuinit perf_counter_init_cpu(int cpu) -{ - struct perf_cpu_context *cpuctx; - - cpuctx = &per_cpu(perf_cpu_context, cpu); - __perf_counter_init_context(&cpuctx->ctx, NULL); - - spin_lock(&perf_resource_lock); - cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu; - spin_unlock(&perf_resource_lock); - - hw_perf_counter_setup(cpu); -} - -#ifdef CONFIG_HOTPLUG_CPU -static void __perf_counter_exit_cpu(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = &cpuctx->ctx; - struct perf_counter *counter, *tmp; - - list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry) - __perf_counter_remove_from_context(counter); -} -static void perf_counter_exit_cpu(int cpu) -{ - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = &cpuctx->ctx; - - mutex_lock(&ctx->mutex); - smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); - mutex_unlock(&ctx->mutex); -} -#else -static inline void perf_counter_exit_cpu(int cpu) { } -#endif - -static int __cpuinit -perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action) { - - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - perf_counter_init_cpu(cpu); - break; - - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - hw_perf_counter_setup_online(cpu); - break; - - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - perf_counter_exit_cpu(cpu); - break; - - default: - break; - } - - return NOTIFY_OK; -} - -/* - * This has to have a higher priority than migration_notifier in sched.c. - */ -static struct notifier_block __cpuinitdata perf_cpu_nb = { - .notifier_call = perf_cpu_notify, - .priority = 20, -}; - -void __init perf_counter_init(void) -{ - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, - (void *)(long)smp_processor_id()); - register_cpu_notifier(&perf_cpu_nb); -} - -static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf) -{ - return sprintf(buf, "%d\n", perf_reserved_percpu); -} - -static ssize_t -perf_set_reserve_percpu(struct sysdev_class *class, - const char *buf, - size_t count) -{ - struct perf_cpu_context *cpuctx; - unsigned long val; - int err, cpu, mpt; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > perf_max_counters) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_reserved_percpu = val; - for_each_online_cpu(cpu) { - cpuctx = &per_cpu(perf_cpu_context, cpu); - spin_lock_irq(&cpuctx->ctx.lock); - mpt = min(perf_max_counters - cpuctx->ctx.nr_counters, - perf_max_counters - perf_reserved_percpu); - cpuctx->max_pertask = mpt; - spin_unlock_irq(&cpuctx->ctx.lock); - } - spin_unlock(&perf_resource_lock); - - return count; -} - -static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf) -{ - return sprintf(buf, "%d\n", perf_overcommit); -} - -static ssize_t -perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count) -{ - unsigned long val; - int err; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > 1) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_overcommit = val; - spin_unlock(&perf_resource_lock); - - return count; -} - -static SYSDEV_CLASS_ATTR( - reserve_percpu, - 0644, - perf_show_reserve_percpu, - perf_set_reserve_percpu - ); - -static SYSDEV_CLASS_ATTR( - overcommit, - 0644, - perf_show_overcommit, - perf_set_overcommit - ); - -static struct attribute *perfclass_attrs[] = { - &attr_reserve_percpu.attr, - &attr_overcommit.attr, - NULL -}; - -static struct attribute_group perfclass_attr_group = { - .attrs = perfclass_attrs, - .name = "perf_counters", -}; - -static int __init perf_counter_sysfs_init(void) -{ - return sysfs_create_group(&cpu_sysdev_class.kset.kobj, - &perfclass_attr_group); -} -device_initcall(perf_counter_sysfs_init); diff --git a/kernel/perf_event.c b/kernel/perf_event.c new file mode 100644 index 00000000000..6e8b99a04e1 --- /dev/null +++ b/kernel/perf_event.c @@ -0,0 +1,5000 @@ +/* + * Performance event core code + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright © 2009 Paul Mackerras, IBM Corp. + * + * For licensing details see kernel-base/COPYING + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * Each CPU has a list of per CPU events: + */ +DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); + +int perf_max_events __read_mostly = 1; +static int perf_reserved_percpu __read_mostly; +static int perf_overcommit __read_mostly = 1; + +static atomic_t nr_events __read_mostly; +static atomic_t nr_mmap_events __read_mostly; +static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_task_events __read_mostly; + +/* + * perf event paranoia level: + * -1 - not paranoid at all + * 0 - disallow raw tracepoint access for unpriv + * 1 - disallow cpu events for unpriv + * 2 - disallow kernel profiling for unpriv + */ +int sysctl_perf_event_paranoid __read_mostly = 1; + +static inline bool perf_paranoid_tracepoint_raw(void) +{ + return sysctl_perf_event_paranoid > -1; +} + +static inline bool perf_paranoid_cpu(void) +{ + return sysctl_perf_event_paranoid > 0; +} + +static inline bool perf_paranoid_kernel(void) +{ + return sysctl_perf_event_paranoid > 1; +} + +int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ + +/* + * max perf event sample rate + */ +int sysctl_perf_event_sample_rate __read_mostly = 100000; + +static atomic64_t perf_event_id; + +/* + * Lock for (sysadmin-configurable) event reservations: + */ +static DEFINE_SPINLOCK(perf_resource_lock); + +/* + * Architecture provided APIs - weak aliases: + */ +extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + return NULL; +} + +void __weak hw_perf_disable(void) { barrier(); } +void __weak hw_perf_enable(void) { barrier(); } + +void __weak hw_perf_event_setup(int cpu) { barrier(); } +void __weak hw_perf_event_setup_online(int cpu) { barrier(); } + +int __weak +hw_perf_group_sched_in(struct perf_event *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu) +{ + return 0; +} + +void __weak perf_event_print_debug(void) { } + +static DEFINE_PER_CPU(int, perf_disable_count); + +void __perf_disable(void) +{ + __get_cpu_var(perf_disable_count)++; +} + +bool __perf_enable(void) +{ + return !--__get_cpu_var(perf_disable_count); +} + +void perf_disable(void) +{ + __perf_disable(); + hw_perf_disable(); +} + +void perf_enable(void) +{ + if (__perf_enable()) + hw_perf_enable(); +} + +static void get_ctx(struct perf_event_context *ctx) +{ + WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); +} + +static void free_ctx(struct rcu_head *head) +{ + struct perf_event_context *ctx; + + ctx = container_of(head, struct perf_event_context, rcu_head); + kfree(ctx); +} + +static void put_ctx(struct perf_event_context *ctx) +{ + if (atomic_dec_and_test(&ctx->refcount)) { + if (ctx->parent_ctx) + put_ctx(ctx->parent_ctx); + if (ctx->task) + put_task_struct(ctx->task); + call_rcu(&ctx->rcu_head, free_ctx); + } +} + +static void unclone_ctx(struct perf_event_context *ctx) +{ + if (ctx->parent_ctx) { + put_ctx(ctx->parent_ctx); + ctx->parent_ctx = NULL; + } +} + +/* + * If we inherit events we want to return the parent event id + * to userspace. + */ +static u64 primary_event_id(struct perf_event *event) +{ + u64 id = event->id; + + if (event->parent) + id = event->parent->id; + + return id; +} + +/* + * Get the perf_event_context for a task and lock it. + * This has to cope with with the fact that until it is locked, + * the context could get moved to another task. + */ +static struct perf_event_context * +perf_lock_task_context(struct task_struct *task, unsigned long *flags) +{ + struct perf_event_context *ctx; + + rcu_read_lock(); + retry: + ctx = rcu_dereference(task->perf_event_ctxp); + if (ctx) { + /* + * If this context is a clone of another, it might + * get swapped for another underneath us by + * perf_event_task_sched_out, though the + * rcu_read_lock() protects us from any context + * getting freed. Lock the context and check if it + * got swapped before we could get the lock, and retry + * if so. If we locked the right context, then it + * can't get swapped on us any more. + */ + spin_lock_irqsave(&ctx->lock, *flags); + if (ctx != rcu_dereference(task->perf_event_ctxp)) { + spin_unlock_irqrestore(&ctx->lock, *flags); + goto retry; + } + + if (!atomic_inc_not_zero(&ctx->refcount)) { + spin_unlock_irqrestore(&ctx->lock, *flags); + ctx = NULL; + } + } + rcu_read_unlock(); + return ctx; +} + +/* + * Get the context for a task and increment its pin_count so it + * can't get swapped to another task. This also increments its + * reference count so that the context can't get freed. + */ +static struct perf_event_context *perf_pin_task_context(struct task_struct *task) +{ + struct perf_event_context *ctx; + unsigned long flags; + + ctx = perf_lock_task_context(task, &flags); + if (ctx) { + ++ctx->pin_count; + spin_unlock_irqrestore(&ctx->lock, flags); + } + return ctx; +} + +static void perf_unpin_context(struct perf_event_context *ctx) +{ + unsigned long flags; + + spin_lock_irqsave(&ctx->lock, flags); + --ctx->pin_count; + spin_unlock_irqrestore(&ctx->lock, flags); + put_ctx(ctx); +} + +/* + * Add a event from the lists for its context. + * Must be called with ctx->mutex and ctx->lock held. + */ +static void +list_add_event(struct perf_event *event, struct perf_event_context *ctx) +{ + struct perf_event *group_leader = event->group_leader; + + /* + * Depending on whether it is a standalone or sibling event, + * add it straight to the context's event list, or to the group + * leader's sibling list: + */ + if (group_leader == event) + list_add_tail(&event->group_entry, &ctx->group_list); + else { + list_add_tail(&event->group_entry, &group_leader->sibling_list); + group_leader->nr_siblings++; + } + + list_add_rcu(&event->event_entry, &ctx->event_list); + ctx->nr_events++; + if (event->attr.inherit_stat) + ctx->nr_stat++; +} + +/* + * Remove a event from the lists for its context. + * Must be called with ctx->mutex and ctx->lock held. + */ +static void +list_del_event(struct perf_event *event, struct perf_event_context *ctx) +{ + struct perf_event *sibling, *tmp; + + if (list_empty(&event->group_entry)) + return; + ctx->nr_events--; + if (event->attr.inherit_stat) + ctx->nr_stat--; + + list_del_init(&event->group_entry); + list_del_rcu(&event->event_entry); + + if (event->group_leader != event) + event->group_leader->nr_siblings--; + + /* + * If this was a group event with sibling events then + * upgrade the siblings to singleton events by adding them + * to the context list directly: + */ + list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { + + list_move_tail(&sibling->group_entry, &ctx->group_list); + sibling->group_leader = sibling; + } +} + +static void +event_sched_out(struct perf_event *event, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ + if (event->state != PERF_EVENT_STATE_ACTIVE) + return; + + event->state = PERF_EVENT_STATE_INACTIVE; + if (event->pending_disable) { + event->pending_disable = 0; + event->state = PERF_EVENT_STATE_OFF; + } + event->tstamp_stopped = ctx->time; + event->pmu->disable(event); + event->oncpu = -1; + + if (!is_software_event(event)) + cpuctx->active_oncpu--; + ctx->nr_active--; + if (event->attr.exclusive || !cpuctx->active_oncpu) + cpuctx->exclusive = 0; +} + +static void +group_sched_out(struct perf_event *group_event, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ + struct perf_event *event; + + if (group_event->state != PERF_EVENT_STATE_ACTIVE) + return; + + event_sched_out(group_event, cpuctx, ctx); + + /* + * Schedule out siblings (if any): + */ + list_for_each_entry(event, &group_event->sibling_list, group_entry) + event_sched_out(event, cpuctx, ctx); + + if (group_event->attr.exclusive) + cpuctx->exclusive = 0; +} + +/* + * Cross CPU call to remove a performance event + * + * We disable the event on the hardware level first. After that we + * remove it from the context list. + */ +static void __perf_event_remove_from_context(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event *event = info; + struct perf_event_context *ctx = event->ctx; + + /* + * If this is a task context, we need to check whether it is + * the current task context of this cpu. If not it has been + * scheduled out before the smp call arrived. + */ + if (ctx->task && cpuctx->task_ctx != ctx) + return; + + spin_lock(&ctx->lock); + /* + * Protect the list operation against NMI by disabling the + * events on a global level. + */ + perf_disable(); + + event_sched_out(event, cpuctx, ctx); + + list_del_event(event, ctx); + + if (!ctx->task) { + /* + * Allow more per task events with respect to the + * reservation: + */ + cpuctx->max_pertask = + min(perf_max_events - ctx->nr_events, + perf_max_events - perf_reserved_percpu); + } + + perf_enable(); + spin_unlock(&ctx->lock); +} + + +/* + * Remove the event from a task's (or a CPU's) list of events. + * + * Must be called with ctx->mutex held. + * + * CPU events are removed with a smp call. For task events we only + * call when the task is on a CPU. + * + * If event->ctx is a cloned context, callers must make sure that + * every task struct that event->ctx->task could possibly point to + * remains valid. This is OK when called from perf_release since + * that only calls us on the top-level context, which can't be a clone. + * When called from perf_event_exit_task, it's OK because the + * context has been detached from its task. + */ +static void perf_event_remove_from_context(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Per cpu events are removed via an smp call and + * the removal is always sucessful. + */ + smp_call_function_single(event->cpu, + __perf_event_remove_from_context, + event, 1); + return; + } + +retry: + task_oncpu_function_call(task, __perf_event_remove_from_context, + event); + + spin_lock_irq(&ctx->lock); + /* + * If the context is active we need to retry the smp call. + */ + if (ctx->nr_active && !list_empty(&event->group_entry)) { + spin_unlock_irq(&ctx->lock); + goto retry; + } + + /* + * The lock prevents that this context is scheduled in so we + * can remove the event safely, if the call above did not + * succeed. + */ + if (!list_empty(&event->group_entry)) { + list_del_event(event, ctx); + } + spin_unlock_irq(&ctx->lock); +} + +static inline u64 perf_clock(void) +{ + return cpu_clock(smp_processor_id()); +} + +/* + * Update the record of the current time in a context. + */ +static void update_context_time(struct perf_event_context *ctx) +{ + u64 now = perf_clock(); + + ctx->time += now - ctx->timestamp; + ctx->timestamp = now; +} + +/* + * Update the total_time_enabled and total_time_running fields for a event. + */ +static void update_event_times(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + u64 run_end; + + if (event->state < PERF_EVENT_STATE_INACTIVE || + event->group_leader->state < PERF_EVENT_STATE_INACTIVE) + return; + + event->total_time_enabled = ctx->time - event->tstamp_enabled; + + if (event->state == PERF_EVENT_STATE_INACTIVE) + run_end = event->tstamp_stopped; + else + run_end = ctx->time; + + event->total_time_running = run_end - event->tstamp_running; +} + +/* + * Update total_time_enabled and total_time_running for all events in a group. + */ +static void update_group_times(struct perf_event *leader) +{ + struct perf_event *event; + + update_event_times(leader); + list_for_each_entry(event, &leader->sibling_list, group_entry) + update_event_times(event); +} + +/* + * Cross CPU call to disable a performance event + */ +static void __perf_event_disable(void *info) +{ + struct perf_event *event = info; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event_context *ctx = event->ctx; + + /* + * If this is a per-task event, need to check whether this + * event's task is the current task on this cpu. + */ + if (ctx->task && cpuctx->task_ctx != ctx) + return; + + spin_lock(&ctx->lock); + + /* + * If the event is on, turn it off. + * If it is in error state, leave it in error state. + */ + if (event->state >= PERF_EVENT_STATE_INACTIVE) { + update_context_time(ctx); + update_group_times(event); + if (event == event->group_leader) + group_sched_out(event, cpuctx, ctx); + else + event_sched_out(event, cpuctx, ctx); + event->state = PERF_EVENT_STATE_OFF; + } + + spin_unlock(&ctx->lock); +} + +/* + * Disable a event. + * + * If event->ctx is a cloned context, callers must make sure that + * every task struct that event->ctx->task could possibly point to + * remains valid. This condition is satisifed when called through + * perf_event_for_each_child or perf_event_for_each because they + * hold the top-level event's child_mutex, so any descendant that + * goes to exit will block in sync_child_event. + * When called from perf_pending_event it's OK because event->ctx + * is the current context on this CPU and preemption is disabled, + * hence we can't get into perf_event_task_sched_out for this context. + */ +static void perf_event_disable(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Disable the event on the cpu that it's on + */ + smp_call_function_single(event->cpu, __perf_event_disable, + event, 1); + return; + } + + retry: + task_oncpu_function_call(task, __perf_event_disable, event); + + spin_lock_irq(&ctx->lock); + /* + * If the event is still active, we need to retry the cross-call. + */ + if (event->state == PERF_EVENT_STATE_ACTIVE) { + spin_unlock_irq(&ctx->lock); + goto retry; + } + + /* + * Since we have the lock this context can't be scheduled + * in, so we can change the state safely. + */ + if (event->state == PERF_EVENT_STATE_INACTIVE) { + update_group_times(event); + event->state = PERF_EVENT_STATE_OFF; + } + + spin_unlock_irq(&ctx->lock); +} + +static int +event_sched_in(struct perf_event *event, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, + int cpu) +{ + if (event->state <= PERF_EVENT_STATE_OFF) + return 0; + + event->state = PERF_EVENT_STATE_ACTIVE; + event->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ + /* + * The new state must be visible before we turn it on in the hardware: + */ + smp_wmb(); + + if (event->pmu->enable(event)) { + event->state = PERF_EVENT_STATE_INACTIVE; + event->oncpu = -1; + return -EAGAIN; + } + + event->tstamp_running += ctx->time - event->tstamp_stopped; + + if (!is_software_event(event)) + cpuctx->active_oncpu++; + ctx->nr_active++; + + if (event->attr.exclusive) + cpuctx->exclusive = 1; + + return 0; +} + +static int +group_sched_in(struct perf_event *group_event, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, + int cpu) +{ + struct perf_event *event, *partial_group; + int ret; + + if (group_event->state == PERF_EVENT_STATE_OFF) + return 0; + + ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu); + if (ret) + return ret < 0 ? ret : 0; + + if (event_sched_in(group_event, cpuctx, ctx, cpu)) + return -EAGAIN; + + /* + * Schedule in siblings as one group (if any): + */ + list_for_each_entry(event, &group_event->sibling_list, group_entry) { + if (event_sched_in(event, cpuctx, ctx, cpu)) { + partial_group = event; + goto group_error; + } + } + + return 0; + +group_error: + /* + * Groups can be scheduled in as one unit only, so undo any + * partial group before returning: + */ + list_for_each_entry(event, &group_event->sibling_list, group_entry) { + if (event == partial_group) + break; + event_sched_out(event, cpuctx, ctx); + } + event_sched_out(group_event, cpuctx, ctx); + + return -EAGAIN; +} + +/* + * Return 1 for a group consisting entirely of software events, + * 0 if the group contains any hardware events. + */ +static int is_software_only_group(struct perf_event *leader) +{ + struct perf_event *event; + + if (!is_software_event(leader)) + return 0; + + list_for_each_entry(event, &leader->sibling_list, group_entry) + if (!is_software_event(event)) + return 0; + + return 1; +} + +/* + * Work out whether we can put this event group on the CPU now. + */ +static int group_can_go_on(struct perf_event *event, + struct perf_cpu_context *cpuctx, + int can_add_hw) +{ + /* + * Groups consisting entirely of software events can always go on. + */ + if (is_software_only_group(event)) + return 1; + /* + * If an exclusive group is already on, no other hardware + * events can go on. + */ + if (cpuctx->exclusive) + return 0; + /* + * If this group is exclusive and there are already + * events on the CPU, it can't go on. + */ + if (event->attr.exclusive && cpuctx->active_oncpu) + return 0; + /* + * Otherwise, try to add it if all previous groups were able + * to go on. + */ + return can_add_hw; +} + +static void add_event_to_ctx(struct perf_event *event, + struct perf_event_context *ctx) +{ + list_add_event(event, ctx); + event->tstamp_enabled = ctx->time; + event->tstamp_running = ctx->time; + event->tstamp_stopped = ctx->time; +} + +/* + * Cross CPU call to install and enable a performance event + * + * Must be called with ctx->mutex held + */ +static void __perf_install_in_context(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event *event = info; + struct perf_event_context *ctx = event->ctx; + struct perf_event *leader = event->group_leader; + int cpu = smp_processor_id(); + int err; + + /* + * If this is a task context, we need to check whether it is + * the current task context of this cpu. If not it has been + * scheduled out before the smp call arrived. + * Or possibly this is the right context but it isn't + * on this cpu because it had no events. + */ + if (ctx->task && cpuctx->task_ctx != ctx) { + if (cpuctx->task_ctx || ctx->task != current) + return; + cpuctx->task_ctx = ctx; + } + + spin_lock(&ctx->lock); + ctx->is_active = 1; + update_context_time(ctx); + + /* + * Protect the list operation against NMI by disabling the + * events on a global level. NOP for non NMI based events. + */ + perf_disable(); + + add_event_to_ctx(event, ctx); + + /* + * Don't put the event on if it is disabled or if + * it is in a group and the group isn't on. + */ + if (event->state != PERF_EVENT_STATE_INACTIVE || + (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)) + goto unlock; + + /* + * An exclusive event can't go on if there are already active + * hardware events, and no hardware event can go on if there + * is already an exclusive event on. + */ + if (!group_can_go_on(event, cpuctx, 1)) + err = -EEXIST; + else + err = event_sched_in(event, cpuctx, ctx, cpu); + + if (err) { + /* + * This event couldn't go on. If it is in a group + * then we have to pull the whole group off. + * If the event group is pinned then put it in error state. + */ + if (leader != event) + group_sched_out(leader, cpuctx, ctx); + if (leader->attr.pinned) { + update_group_times(leader); + leader->state = PERF_EVENT_STATE_ERROR; + } + } + + if (!err && !ctx->task && cpuctx->max_pertask) + cpuctx->max_pertask--; + + unlock: + perf_enable(); + + spin_unlock(&ctx->lock); +} + +/* + * Attach a performance event to a context + * + * First we add the event to the list with the hardware enable bit + * in event->hw_config cleared. + * + * If the event is attached to a task which is on a CPU we use a smp + * call to enable it in the task context. The task might have been + * scheduled away, but we check this in the smp call again. + * + * Must be called with ctx->mutex held. + */ +static void +perf_install_in_context(struct perf_event_context *ctx, + struct perf_event *event, + int cpu) +{ + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Per cpu events are installed via an smp call and + * the install is always sucessful. + */ + smp_call_function_single(cpu, __perf_install_in_context, + event, 1); + return; + } + +retry: + task_oncpu_function_call(task, __perf_install_in_context, + event); + + spin_lock_irq(&ctx->lock); + /* + * we need to retry the smp call. + */ + if (ctx->is_active && list_empty(&event->group_entry)) { + spin_unlock_irq(&ctx->lock); + goto retry; + } + + /* + * The lock prevents that this context is scheduled in so we + * can add the event safely, if it the call above did not + * succeed. + */ + if (list_empty(&event->group_entry)) + add_event_to_ctx(event, ctx); + spin_unlock_irq(&ctx->lock); +} + +/* + * Put a event into inactive state and update time fields. + * Enabling the leader of a group effectively enables all + * the group members that aren't explicitly disabled, so we + * have to update their ->tstamp_enabled also. + * Note: this works for group members as well as group leaders + * since the non-leader members' sibling_lists will be empty. + */ +static void __perf_event_mark_enabled(struct perf_event *event, + struct perf_event_context *ctx) +{ + struct perf_event *sub; + + event->state = PERF_EVENT_STATE_INACTIVE; + event->tstamp_enabled = ctx->time - event->total_time_enabled; + list_for_each_entry(sub, &event->sibling_list, group_entry) + if (sub->state >= PERF_EVENT_STATE_INACTIVE) + sub->tstamp_enabled = + ctx->time - sub->total_time_enabled; +} + +/* + * Cross CPU call to enable a performance event + */ +static void __perf_event_enable(void *info) +{ + struct perf_event *event = info; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event_context *ctx = event->ctx; + struct perf_event *leader = event->group_leader; + int err; + + /* + * If this is a per-task event, need to check whether this + * event's task is the current task on this cpu. + */ + if (ctx->task && cpuctx->task_ctx != ctx) { + if (cpuctx->task_ctx || ctx->task != current) + return; + cpuctx->task_ctx = ctx; + } + + spin_lock(&ctx->lock); + ctx->is_active = 1; + update_context_time(ctx); + + if (event->state >= PERF_EVENT_STATE_INACTIVE) + goto unlock; + __perf_event_mark_enabled(event, ctx); + + /* + * If the event is in a group and isn't the group leader, + * then don't put it on unless the group is on. + */ + if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) + goto unlock; + + if (!group_can_go_on(event, cpuctx, 1)) { + err = -EEXIST; + } else { + perf_disable(); + if (event == leader) + err = group_sched_in(event, cpuctx, ctx, + smp_processor_id()); + else + err = event_sched_in(event, cpuctx, ctx, + smp_processor_id()); + perf_enable(); + } + + if (err) { + /* + * If this event can't go on and it's part of a + * group, then the whole group has to come off. + */ + if (leader != event) + group_sched_out(leader, cpuctx, ctx); + if (leader->attr.pinned) { + update_group_times(leader); + leader->state = PERF_EVENT_STATE_ERROR; + } + } + + unlock: + spin_unlock(&ctx->lock); +} + +/* + * Enable a event. + * + * If event->ctx is a cloned context, callers must make sure that + * every task struct that event->ctx->task could possibly point to + * remains valid. This condition is satisfied when called through + * perf_event_for_each_child or perf_event_for_each as described + * for perf_event_disable. + */ +static void perf_event_enable(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Enable the event on the cpu that it's on + */ + smp_call_function_single(event->cpu, __perf_event_enable, + event, 1); + return; + } + + spin_lock_irq(&ctx->lock); + if (event->state >= PERF_EVENT_STATE_INACTIVE) + goto out; + + /* + * If the event is in error state, clear that first. + * That way, if we see the event in error state below, we + * know that it has gone back into error state, as distinct + * from the task having been scheduled away before the + * cross-call arrived. + */ + if (event->state == PERF_EVENT_STATE_ERROR) + event->state = PERF_EVENT_STATE_OFF; + + retry: + spin_unlock_irq(&ctx->lock); + task_oncpu_function_call(task, __perf_event_enable, event); + + spin_lock_irq(&ctx->lock); + + /* + * If the context is active and the event is still off, + * we need to retry the cross-call. + */ + if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) + goto retry; + + /* + * Since we have the lock this context can't be scheduled + * in, so we can change the state safely. + */ + if (event->state == PERF_EVENT_STATE_OFF) + __perf_event_mark_enabled(event, ctx); + + out: + spin_unlock_irq(&ctx->lock); +} + +static int perf_event_refresh(struct perf_event *event, int refresh) +{ + /* + * not supported on inherited events + */ + if (event->attr.inherit) + return -EINVAL; + + atomic_add(refresh, &event->event_limit); + perf_event_enable(event); + + return 0; +} + +void __perf_event_sched_out(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx) +{ + struct perf_event *event; + + spin_lock(&ctx->lock); + ctx->is_active = 0; + if (likely(!ctx->nr_events)) + goto out; + update_context_time(ctx); + + perf_disable(); + if (ctx->nr_active) { + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (event != event->group_leader) + event_sched_out(event, cpuctx, ctx); + else + group_sched_out(event, cpuctx, ctx); + } + } + perf_enable(); + out: + spin_unlock(&ctx->lock); +} + +/* + * Test whether two contexts are equivalent, i.e. whether they + * have both been cloned from the same version of the same context + * and they both have the same number of enabled events. + * If the number of enabled events is the same, then the set + * of enabled events should be the same, because these are both + * inherited contexts, therefore we can't access individual events + * in them directly with an fd; we can only enable/disable all + * events via prctl, or enable/disable all events in a family + * via ioctl, which will have the same effect on both contexts. + */ +static int context_equiv(struct perf_event_context *ctx1, + struct perf_event_context *ctx2) +{ + return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx + && ctx1->parent_gen == ctx2->parent_gen + && !ctx1->pin_count && !ctx2->pin_count; +} + +static void __perf_event_read(void *event); + +static void __perf_event_sync_stat(struct perf_event *event, + struct perf_event *next_event) +{ + u64 value; + + if (!event->attr.inherit_stat) + return; + + /* + * Update the event value, we cannot use perf_event_read() + * because we're in the middle of a context switch and have IRQs + * disabled, which upsets smp_call_function_single(), however + * we know the event must be on the current CPU, therefore we + * don't need to use it. + */ + switch (event->state) { + case PERF_EVENT_STATE_ACTIVE: + __perf_event_read(event); + break; + + case PERF_EVENT_STATE_INACTIVE: + update_event_times(event); + break; + + default: + break; + } + + /* + * In order to keep per-task stats reliable we need to flip the event + * values when we flip the contexts. + */ + value = atomic64_read(&next_event->count); + value = atomic64_xchg(&event->count, value); + atomic64_set(&next_event->count, value); + + swap(event->total_time_enabled, next_event->total_time_enabled); + swap(event->total_time_running, next_event->total_time_running); + + /* + * Since we swizzled the values, update the user visible data too. + */ + perf_event_update_userpage(event); + perf_event_update_userpage(next_event); +} + +#define list_next_entry(pos, member) \ + list_entry(pos->member.next, typeof(*pos), member) + +static void perf_event_sync_stat(struct perf_event_context *ctx, + struct perf_event_context *next_ctx) +{ + struct perf_event *event, *next_event; + + if (!ctx->nr_stat) + return; + + event = list_first_entry(&ctx->event_list, + struct perf_event, event_entry); + + next_event = list_first_entry(&next_ctx->event_list, + struct perf_event, event_entry); + + while (&event->event_entry != &ctx->event_list && + &next_event->event_entry != &next_ctx->event_list) { + + __perf_event_sync_stat(event, next_event); + + event = list_next_entry(event, event_entry); + next_event = list_next_entry(next_event, event_entry); + } +} + +/* + * Called from scheduler to remove the events of the current task, + * with interrupts disabled. + * + * We stop each event and update the event value in event->count. + * + * This does not protect us against NMI, but disable() + * sets the disabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * not restart the event. + */ +void perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event_context *next_ctx; + struct perf_event_context *parent; + struct pt_regs *regs; + int do_switch = 1; + + regs = task_pt_regs(task); + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0); + + if (likely(!ctx || !cpuctx->task_ctx)) + return; + + update_context_time(ctx); + + rcu_read_lock(); + parent = rcu_dereference(ctx->parent_ctx); + next_ctx = next->perf_event_ctxp; + if (parent && next_ctx && + rcu_dereference(next_ctx->parent_ctx) == parent) { + /* + * Looks like the two contexts are clones, so we might be + * able to optimize the context switch. We lock both + * contexts and check that they are clones under the + * lock (including re-checking that neither has been + * uncloned in the meantime). It doesn't matter which + * order we take the locks because no other cpu could + * be trying to lock both of these tasks. + */ + spin_lock(&ctx->lock); + spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); + if (context_equiv(ctx, next_ctx)) { + /* + * XXX do we need a memory barrier of sorts + * wrt to rcu_dereference() of perf_event_ctxp + */ + task->perf_event_ctxp = next_ctx; + next->perf_event_ctxp = ctx; + ctx->task = next; + next_ctx->task = task; + do_switch = 0; + + perf_event_sync_stat(ctx, next_ctx); + } + spin_unlock(&next_ctx->lock); + spin_unlock(&ctx->lock); + } + rcu_read_unlock(); + + if (do_switch) { + __perf_event_sched_out(ctx, cpuctx); + cpuctx->task_ctx = NULL; + } +} + +/* + * Called with IRQs disabled + */ +static void __perf_event_task_sched_out(struct perf_event_context *ctx) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + + if (!cpuctx->task_ctx) + return; + + if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) + return; + + __perf_event_sched_out(ctx, cpuctx); + cpuctx->task_ctx = NULL; +} + +/* + * Called with IRQs disabled + */ +static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) +{ + __perf_event_sched_out(&cpuctx->ctx, cpuctx); +} + +static void +__perf_event_sched_in(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx, int cpu) +{ + struct perf_event *event; + int can_add_hw = 1; + + spin_lock(&ctx->lock); + ctx->is_active = 1; + if (likely(!ctx->nr_events)) + goto out; + + ctx->timestamp = perf_clock(); + + perf_disable(); + + /* + * First go through the list and put on any pinned groups + * in order to give them the best chance of going on. + */ + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (event->state <= PERF_EVENT_STATE_OFF || + !event->attr.pinned) + continue; + if (event->cpu != -1 && event->cpu != cpu) + continue; + + if (event != event->group_leader) + event_sched_in(event, cpuctx, ctx, cpu); + else { + if (group_can_go_on(event, cpuctx, 1)) + group_sched_in(event, cpuctx, ctx, cpu); + } + + /* + * If this pinned group hasn't been scheduled, + * put it in error state. + */ + if (event->state == PERF_EVENT_STATE_INACTIVE) { + update_group_times(event); + event->state = PERF_EVENT_STATE_ERROR; + } + } + + list_for_each_entry(event, &ctx->group_list, group_entry) { + /* + * Ignore events in OFF or ERROR state, and + * ignore pinned events since we did them already. + */ + if (event->state <= PERF_EVENT_STATE_OFF || + event->attr.pinned) + continue; + + /* + * Listen to the 'cpu' scheduling filter constraint + * of events: + */ + if (event->cpu != -1 && event->cpu != cpu) + continue; + + if (event != event->group_leader) { + if (event_sched_in(event, cpuctx, ctx, cpu)) + can_add_hw = 0; + } else { + if (group_can_go_on(event, cpuctx, can_add_hw)) { + if (group_sched_in(event, cpuctx, ctx, cpu)) + can_add_hw = 0; + } + } + } + perf_enable(); + out: + spin_unlock(&ctx->lock); +} + +/* + * Called from scheduler to add the events of the current task + * with interrupts disabled. + * + * We restore the event value and then enable it. + * + * This does not protect us against NMI, but enable() + * sets the enabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * keep the event running. + */ +void perf_event_task_sched_in(struct task_struct *task, int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_event_context *ctx = task->perf_event_ctxp; + + if (likely(!ctx)) + return; + if (cpuctx->task_ctx == ctx) + return; + __perf_event_sched_in(ctx, cpuctx, cpu); + cpuctx->task_ctx = ctx; +} + +static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) +{ + struct perf_event_context *ctx = &cpuctx->ctx; + + __perf_event_sched_in(ctx, cpuctx, cpu); +} + +#define MAX_INTERRUPTS (~0ULL) + +static void perf_log_throttle(struct perf_event *event, int enable); + +static void perf_adjust_period(struct perf_event *event, u64 events) +{ + struct hw_perf_event *hwc = &event->hw; + u64 period, sample_period; + s64 delta; + + events *= hwc->sample_period; + period = div64_u64(events, event->attr.sample_freq); + + delta = (s64)(period - hwc->sample_period); + delta = (delta + 7) / 8; /* low pass filter */ + + sample_period = hwc->sample_period + delta; + + if (!sample_period) + sample_period = 1; + + hwc->sample_period = sample_period; +} + +static void perf_ctx_adjust_freq(struct perf_event_context *ctx) +{ + struct perf_event *event; + struct hw_perf_event *hwc; + u64 interrupts, freq; + + spin_lock(&ctx->lock); + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (event->state != PERF_EVENT_STATE_ACTIVE) + continue; + + hwc = &event->hw; + + interrupts = hwc->interrupts; + hwc->interrupts = 0; + + /* + * unthrottle events on the tick + */ + if (interrupts == MAX_INTERRUPTS) { + perf_log_throttle(event, 1); + event->pmu->unthrottle(event); + interrupts = 2*sysctl_perf_event_sample_rate/HZ; + } + + if (!event->attr.freq || !event->attr.sample_freq) + continue; + + /* + * if the specified freq < HZ then we need to skip ticks + */ + if (event->attr.sample_freq < HZ) { + freq = event->attr.sample_freq; + + hwc->freq_count += freq; + hwc->freq_interrupts += interrupts; + + if (hwc->freq_count < HZ) + continue; + + interrupts = hwc->freq_interrupts; + hwc->freq_interrupts = 0; + hwc->freq_count -= HZ; + } else + freq = HZ; + + perf_adjust_period(event, freq * interrupts); + + /* + * In order to avoid being stalled by an (accidental) huge + * sample period, force reset the sample period if we didn't + * get any events in this freq period. + */ + if (!interrupts) { + perf_disable(); + event->pmu->disable(event); + atomic64_set(&hwc->period_left, 0); + event->pmu->enable(event); + perf_enable(); + } + } + spin_unlock(&ctx->lock); +} + +/* + * Round-robin a context's events: + */ +static void rotate_ctx(struct perf_event_context *ctx) +{ + struct perf_event *event; + + if (!ctx->nr_events) + return; + + spin_lock(&ctx->lock); + /* + * Rotate the first entry last (works just fine for group events too): + */ + perf_disable(); + list_for_each_entry(event, &ctx->group_list, group_entry) { + list_move_tail(&event->group_entry, &ctx->group_list); + break; + } + perf_enable(); + + spin_unlock(&ctx->lock); +} + +void perf_event_task_tick(struct task_struct *curr, int cpu) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; + + if (!atomic_read(&nr_events)) + return; + + cpuctx = &per_cpu(perf_cpu_context, cpu); + ctx = curr->perf_event_ctxp; + + perf_ctx_adjust_freq(&cpuctx->ctx); + if (ctx) + perf_ctx_adjust_freq(ctx); + + perf_event_cpu_sched_out(cpuctx); + if (ctx) + __perf_event_task_sched_out(ctx); + + rotate_ctx(&cpuctx->ctx); + if (ctx) + rotate_ctx(ctx); + + perf_event_cpu_sched_in(cpuctx, cpu); + if (ctx) + perf_event_task_sched_in(curr, cpu); +} + +/* + * Enable all of a task's events that have been marked enable-on-exec. + * This expects task == current. + */ +static void perf_event_enable_on_exec(struct task_struct *task) +{ + struct perf_event_context *ctx; + struct perf_event *event; + unsigned long flags; + int enabled = 0; + + local_irq_save(flags); + ctx = task->perf_event_ctxp; + if (!ctx || !ctx->nr_events) + goto out; + + __perf_event_task_sched_out(ctx); + + spin_lock(&ctx->lock); + + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (!event->attr.enable_on_exec) + continue; + event->attr.enable_on_exec = 0; + if (event->state >= PERF_EVENT_STATE_INACTIVE) + continue; + __perf_event_mark_enabled(event, ctx); + enabled = 1; + } + + /* + * Unclone this context if we enabled any event. + */ + if (enabled) + unclone_ctx(ctx); + + spin_unlock(&ctx->lock); + + perf_event_task_sched_in(task, smp_processor_id()); + out: + local_irq_restore(flags); +} + +/* + * Cross CPU call to read the hardware event + */ +static void __perf_event_read(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event *event = info; + struct perf_event_context *ctx = event->ctx; + unsigned long flags; + + /* + * If this is a task context, we need to check whether it is + * the current task context of this cpu. If not it has been + * scheduled out before the smp call arrived. In that case + * event->count would have been updated to a recent sample + * when the event was scheduled out. + */ + if (ctx->task && cpuctx->task_ctx != ctx) + return; + + local_irq_save(flags); + if (ctx->is_active) + update_context_time(ctx); + event->pmu->read(event); + update_event_times(event); + local_irq_restore(flags); +} + +static u64 perf_event_read(struct perf_event *event) +{ + /* + * If event is enabled and currently active on a CPU, update the + * value in the event structure: + */ + if (event->state == PERF_EVENT_STATE_ACTIVE) { + smp_call_function_single(event->oncpu, + __perf_event_read, event, 1); + } else if (event->state == PERF_EVENT_STATE_INACTIVE) { + update_event_times(event); + } + + return atomic64_read(&event->count); +} + +/* + * Initialize the perf_event context in a task_struct: + */ +static void +__perf_event_init_context(struct perf_event_context *ctx, + struct task_struct *task) +{ + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->lock); + mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->group_list); + INIT_LIST_HEAD(&ctx->event_list); + atomic_set(&ctx->refcount, 1); + ctx->task = task; +} + +static struct perf_event_context *find_get_context(pid_t pid, int cpu) +{ + struct perf_event_context *ctx; + struct perf_cpu_context *cpuctx; + struct task_struct *task; + unsigned long flags; + int err; + + /* + * If cpu is not a wildcard then this is a percpu event: + */ + if (cpu != -1) { + /* Must be root to operate on a CPU event: */ + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EACCES); + + if (cpu < 0 || cpu > num_possible_cpus()) + return ERR_PTR(-EINVAL); + + /* + * We could be clever and allow to attach a event to an + * offline CPU and activate it when the CPU comes up, but + * that's for later. + */ + if (!cpu_isset(cpu, cpu_online_map)) + return ERR_PTR(-ENODEV); + + cpuctx = &per_cpu(perf_cpu_context, cpu); + ctx = &cpuctx->ctx; + get_ctx(ctx); + + return ctx; + } + + rcu_read_lock(); + if (!pid) + task = current; + else + task = find_task_by_vpid(pid); + if (task) + get_task_struct(task); + rcu_read_unlock(); + + if (!task) + return ERR_PTR(-ESRCH); + + /* + * Can't attach events to a dying task. + */ + err = -ESRCH; + if (task->flags & PF_EXITING) + goto errout; + + /* Reuse ptrace permission checks for now. */ + err = -EACCES; + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto errout; + + retry: + ctx = perf_lock_task_context(task, &flags); + if (ctx) { + unclone_ctx(ctx); + spin_unlock_irqrestore(&ctx->lock, flags); + } + + if (!ctx) { + ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); + err = -ENOMEM; + if (!ctx) + goto errout; + __perf_event_init_context(ctx, task); + get_ctx(ctx); + if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { + /* + * We raced with some other task; use + * the context they set. + */ + kfree(ctx); + goto retry; + } + get_task_struct(task); + } + + put_task_struct(task); + return ctx; + + errout: + put_task_struct(task); + return ERR_PTR(err); +} + +static void free_event_rcu(struct rcu_head *head) +{ + struct perf_event *event; + + event = container_of(head, struct perf_event, rcu_head); + if (event->ns) + put_pid_ns(event->ns); + kfree(event); +} + +static void perf_pending_sync(struct perf_event *event); + +static void free_event(struct perf_event *event) +{ + perf_pending_sync(event); + + if (!event->parent) { + atomic_dec(&nr_events); + if (event->attr.mmap) + atomic_dec(&nr_mmap_events); + if (event->attr.comm) + atomic_dec(&nr_comm_events); + if (event->attr.task) + atomic_dec(&nr_task_events); + } + + if (event->output) { + fput(event->output->filp); + event->output = NULL; + } + + if (event->destroy) + event->destroy(event); + + put_ctx(event->ctx); + call_rcu(&event->rcu_head, free_event_rcu); +} + +/* + * Called when the last reference to the file is gone. + */ +static int perf_release(struct inode *inode, struct file *file) +{ + struct perf_event *event = file->private_data; + struct perf_event_context *ctx = event->ctx; + + file->private_data = NULL; + + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + perf_event_remove_from_context(event); + mutex_unlock(&ctx->mutex); + + mutex_lock(&event->owner->perf_event_mutex); + list_del_init(&event->owner_entry); + mutex_unlock(&event->owner->perf_event_mutex); + put_task_struct(event->owner); + + free_event(event); + + return 0; +} + +static int perf_event_read_size(struct perf_event *event) +{ + int entry = sizeof(u64); /* value */ + int size = 0; + int nr = 1; + + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + size += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + size += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_ID) + entry += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_GROUP) { + nr += event->group_leader->nr_siblings; + size += sizeof(u64); + } + + size += entry * nr; + + return size; +} + +static u64 perf_event_read_value(struct perf_event *event) +{ + struct perf_event *child; + u64 total = 0; + + total += perf_event_read(event); + list_for_each_entry(child, &event->child_list, child_list) + total += perf_event_read(child); + + return total; +} + +static int perf_event_read_entry(struct perf_event *event, + u64 read_format, char __user *buf) +{ + int n = 0, count = 0; + u64 values[2]; + + values[n++] = perf_event_read_value(event); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(event); + + count = n * sizeof(u64); + + if (copy_to_user(buf, values, count)) + return -EFAULT; + + return count; +} + +static int perf_event_read_group(struct perf_event *event, + u64 read_format, char __user *buf) +{ + struct perf_event *leader = event->group_leader, *sub; + int n = 0, size = 0, err = -EFAULT; + u64 values[3]; + + values[n++] = 1 + leader->nr_siblings; + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = leader->total_time_enabled + + atomic64_read(&leader->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = leader->total_time_running + + atomic64_read(&leader->child_total_time_running); + } + + size = n * sizeof(u64); + + if (copy_to_user(buf, values, size)) + return -EFAULT; + + err = perf_event_read_entry(leader, read_format, buf + size); + if (err < 0) + return err; + + size += err; + + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + err = perf_event_read_entry(sub, read_format, + buf + size); + if (err < 0) + return err; + + size += err; + } + + return size; +} + +static int perf_event_read_one(struct perf_event *event, + u64 read_format, char __user *buf) +{ + u64 values[4]; + int n = 0; + + values[n++] = perf_event_read_value(event); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = event->total_time_running + + atomic64_read(&event->child_total_time_running); + } + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(event); + + if (copy_to_user(buf, values, n * sizeof(u64))) + return -EFAULT; + + return n * sizeof(u64); +} + +/* + * Read the performance event - simple non blocking version for now + */ +static ssize_t +perf_read_hw(struct perf_event *event, char __user *buf, size_t count) +{ + u64 read_format = event->attr.read_format; + int ret; + + /* + * Return end-of-file for a read on a event that is in + * error state (i.e. because it was pinned but it couldn't be + * scheduled on to the CPU at some point). + */ + if (event->state == PERF_EVENT_STATE_ERROR) + return 0; + + if (count < perf_event_read_size(event)) + return -ENOSPC; + + WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->child_mutex); + if (read_format & PERF_FORMAT_GROUP) + ret = perf_event_read_group(event, read_format, buf); + else + ret = perf_event_read_one(event, read_format, buf); + mutex_unlock(&event->child_mutex); + + return ret; +} + +static ssize_t +perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + struct perf_event *event = file->private_data; + + return perf_read_hw(event, buf, count); +} + +static unsigned int perf_poll(struct file *file, poll_table *wait) +{ + struct perf_event *event = file->private_data; + struct perf_mmap_data *data; + unsigned int events = POLL_HUP; + + rcu_read_lock(); + data = rcu_dereference(event->data); + if (data) + events = atomic_xchg(&data->poll, 0); + rcu_read_unlock(); + + poll_wait(file, &event->waitq, wait); + + return events; +} + +static void perf_event_reset(struct perf_event *event) +{ + (void)perf_event_read(event); + atomic64_set(&event->count, 0); + perf_event_update_userpage(event); +} + +/* + * Holding the top-level event's child_mutex means that any + * descendant process that has inherited this event will block + * in sync_child_event if it goes to exit, thus satisfying the + * task existence requirements of perf_event_enable/disable. + */ +static void perf_event_for_each_child(struct perf_event *event, + void (*func)(struct perf_event *)) +{ + struct perf_event *child; + + WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->child_mutex); + func(event); + list_for_each_entry(child, &event->child_list, child_list) + func(child); + mutex_unlock(&event->child_mutex); +} + +static void perf_event_for_each(struct perf_event *event, + void (*func)(struct perf_event *)) +{ + struct perf_event_context *ctx = event->ctx; + struct perf_event *sibling; + + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + event = event->group_leader; + + perf_event_for_each_child(event, func); + func(event); + list_for_each_entry(sibling, &event->sibling_list, group_entry) + perf_event_for_each_child(event, func); + mutex_unlock(&ctx->mutex); +} + +static int perf_event_period(struct perf_event *event, u64 __user *arg) +{ + struct perf_event_context *ctx = event->ctx; + unsigned long size; + int ret = 0; + u64 value; + + if (!event->attr.sample_period) + return -EINVAL; + + size = copy_from_user(&value, arg, sizeof(value)); + if (size != sizeof(value)) + return -EFAULT; + + if (!value) + return -EINVAL; + + spin_lock_irq(&ctx->lock); + if (event->attr.freq) { + if (value > sysctl_perf_event_sample_rate) { + ret = -EINVAL; + goto unlock; + } + + event->attr.sample_freq = value; + } else { + event->attr.sample_period = value; + event->hw.sample_period = value; + } +unlock: + spin_unlock_irq(&ctx->lock); + + return ret; +} + +int perf_event_set_output(struct perf_event *event, int output_fd); + +static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct perf_event *event = file->private_data; + void (*func)(struct perf_event *); + u32 flags = arg; + + switch (cmd) { + case PERF_EVENT_IOC_ENABLE: + func = perf_event_enable; + break; + case PERF_EVENT_IOC_DISABLE: + func = perf_event_disable; + break; + case PERF_EVENT_IOC_RESET: + func = perf_event_reset; + break; + + case PERF_EVENT_IOC_REFRESH: + return perf_event_refresh(event, arg); + + case PERF_EVENT_IOC_PERIOD: + return perf_event_period(event, (u64 __user *)arg); + + case PERF_EVENT_IOC_SET_OUTPUT: + return perf_event_set_output(event, arg); + + default: + return -ENOTTY; + } + + if (flags & PERF_IOC_FLAG_GROUP) + perf_event_for_each(event, func); + else + perf_event_for_each_child(event, func); + + return 0; +} + +int perf_event_task_enable(void) +{ + struct perf_event *event; + + mutex_lock(¤t->perf_event_mutex); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) + perf_event_for_each_child(event, perf_event_enable); + mutex_unlock(¤t->perf_event_mutex); + + return 0; +} + +int perf_event_task_disable(void) +{ + struct perf_event *event; + + mutex_lock(¤t->perf_event_mutex); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) + perf_event_for_each_child(event, perf_event_disable); + mutex_unlock(¤t->perf_event_mutex); + + return 0; +} + +#ifndef PERF_EVENT_INDEX_OFFSET +# define PERF_EVENT_INDEX_OFFSET 0 +#endif + +static int perf_event_index(struct perf_event *event) +{ + if (event->state != PERF_EVENT_STATE_ACTIVE) + return 0; + + return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; +} + +/* + * Callers need to ensure there can be no nesting of this function, otherwise + * the seqlock logic goes bad. We can not serialize this because the arch + * code calls this from NMI context. + */ +void perf_event_update_userpage(struct perf_event *event) +{ + struct perf_event_mmap_page *userpg; + struct perf_mmap_data *data; + + rcu_read_lock(); + data = rcu_dereference(event->data); + if (!data) + goto unlock; + + userpg = data->user_page; + + /* + * Disable preemption so as to not let the corresponding user-space + * spin too long if we get preempted. + */ + preempt_disable(); + ++userpg->lock; + barrier(); + userpg->index = perf_event_index(event); + userpg->offset = atomic64_read(&event->count); + if (event->state == PERF_EVENT_STATE_ACTIVE) + userpg->offset -= atomic64_read(&event->hw.prev_count); + + userpg->time_enabled = event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); + + userpg->time_running = event->total_time_running + + atomic64_read(&event->child_total_time_running); + + barrier(); + ++userpg->lock; + preempt_enable(); +unlock: + rcu_read_unlock(); +} + +static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct perf_event *event = vma->vm_file->private_data; + struct perf_mmap_data *data; + int ret = VM_FAULT_SIGBUS; + + if (vmf->flags & FAULT_FLAG_MKWRITE) { + if (vmf->pgoff == 0) + ret = 0; + return ret; + } + + rcu_read_lock(); + data = rcu_dereference(event->data); + if (!data) + goto unlock; + + if (vmf->pgoff == 0) { + vmf->page = virt_to_page(data->user_page); + } else { + int nr = vmf->pgoff - 1; + + if ((unsigned)nr > data->nr_pages) + goto unlock; + + if (vmf->flags & FAULT_FLAG_WRITE) + goto unlock; + + vmf->page = virt_to_page(data->data_pages[nr]); + } + + get_page(vmf->page); + vmf->page->mapping = vma->vm_file->f_mapping; + vmf->page->index = vmf->pgoff; + + ret = 0; +unlock: + rcu_read_unlock(); + + return ret; +} + +static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) +{ + struct perf_mmap_data *data; + unsigned long size; + int i; + + WARN_ON(atomic_read(&event->mmap_count)); + + size = sizeof(struct perf_mmap_data); + size += nr_pages * sizeof(void *); + + data = kzalloc(size, GFP_KERNEL); + if (!data) + goto fail; + + data->user_page = (void *)get_zeroed_page(GFP_KERNEL); + if (!data->user_page) + goto fail_user_page; + + for (i = 0; i < nr_pages; i++) { + data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); + if (!data->data_pages[i]) + goto fail_data_pages; + } + + data->nr_pages = nr_pages; + atomic_set(&data->lock, -1); + + if (event->attr.watermark) { + data->watermark = min_t(long, PAGE_SIZE * nr_pages, + event->attr.wakeup_watermark); + } + if (!data->watermark) + data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4); + + rcu_assign_pointer(event->data, data); + + return 0; + +fail_data_pages: + for (i--; i >= 0; i--) + free_page((unsigned long)data->data_pages[i]); + + free_page((unsigned long)data->user_page); + +fail_user_page: + kfree(data); + +fail: + return -ENOMEM; +} + +static void perf_mmap_free_page(unsigned long addr) +{ + struct page *page = virt_to_page((void *)addr); + + page->mapping = NULL; + __free_page(page); +} + +static void __perf_mmap_data_free(struct rcu_head *rcu_head) +{ + struct perf_mmap_data *data; + int i; + + data = container_of(rcu_head, struct perf_mmap_data, rcu_head); + + perf_mmap_free_page((unsigned long)data->user_page); + for (i = 0; i < data->nr_pages; i++) + perf_mmap_free_page((unsigned long)data->data_pages[i]); + + kfree(data); +} + +static void perf_mmap_data_free(struct perf_event *event) +{ + struct perf_mmap_data *data = event->data; + + WARN_ON(atomic_read(&event->mmap_count)); + + rcu_assign_pointer(event->data, NULL); + call_rcu(&data->rcu_head, __perf_mmap_data_free); +} + +static void perf_mmap_open(struct vm_area_struct *vma) +{ + struct perf_event *event = vma->vm_file->private_data; + + atomic_inc(&event->mmap_count); +} + +static void perf_mmap_close(struct vm_area_struct *vma) +{ + struct perf_event *event = vma->vm_file->private_data; + + WARN_ON_ONCE(event->ctx->parent_ctx); + if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { + struct user_struct *user = current_user(); + + atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm); + vma->vm_mm->locked_vm -= event->data->nr_locked; + perf_mmap_data_free(event); + mutex_unlock(&event->mmap_mutex); + } +} + +static struct vm_operations_struct perf_mmap_vmops = { + .open = perf_mmap_open, + .close = perf_mmap_close, + .fault = perf_mmap_fault, + .page_mkwrite = perf_mmap_fault, +}; + +static int perf_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct perf_event *event = file->private_data; + unsigned long user_locked, user_lock_limit; + struct user_struct *user = current_user(); + unsigned long locked, lock_limit; + unsigned long vma_size; + unsigned long nr_pages; + long user_extra, extra; + int ret = 0; + + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vma_size = vma->vm_end - vma->vm_start; + nr_pages = (vma_size / PAGE_SIZE) - 1; + + /* + * If we have data pages ensure they're a power-of-two number, so we + * can do bitmasks instead of modulo. + */ + if (nr_pages != 0 && !is_power_of_2(nr_pages)) + return -EINVAL; + + if (vma_size != PAGE_SIZE * (1 + nr_pages)) + return -EINVAL; + + if (vma->vm_pgoff != 0) + return -EINVAL; + + WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->mmap_mutex); + if (event->output) { + ret = -EINVAL; + goto unlock; + } + + if (atomic_inc_not_zero(&event->mmap_count)) { + if (nr_pages != event->data->nr_pages) + ret = -EINVAL; + goto unlock; + } + + user_extra = nr_pages + 1; + user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); + + /* + * Increase the limit linearly with more CPUs: + */ + user_lock_limit *= num_online_cpus(); + + user_locked = atomic_long_read(&user->locked_vm) + user_extra; + + extra = 0; + if (user_locked > user_lock_limit) + extra = user_locked - user_lock_limit; + + lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit >>= PAGE_SHIFT; + locked = vma->vm_mm->locked_vm + extra; + + if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && + !capable(CAP_IPC_LOCK)) { + ret = -EPERM; + goto unlock; + } + + WARN_ON(event->data); + ret = perf_mmap_data_alloc(event, nr_pages); + if (ret) + goto unlock; + + atomic_set(&event->mmap_count, 1); + atomic_long_add(user_extra, &user->locked_vm); + vma->vm_mm->locked_vm += extra; + event->data->nr_locked = extra; + if (vma->vm_flags & VM_WRITE) + event->data->writable = 1; + +unlock: + mutex_unlock(&event->mmap_mutex); + + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &perf_mmap_vmops; + + return ret; +} + +static int perf_fasync(int fd, struct file *filp, int on) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct perf_event *event = filp->private_data; + int retval; + + mutex_lock(&inode->i_mutex); + retval = fasync_helper(fd, filp, on, &event->fasync); + mutex_unlock(&inode->i_mutex); + + if (retval < 0) + return retval; + + return 0; +} + +static const struct file_operations perf_fops = { + .release = perf_release, + .read = perf_read, + .poll = perf_poll, + .unlocked_ioctl = perf_ioctl, + .compat_ioctl = perf_ioctl, + .mmap = perf_mmap, + .fasync = perf_fasync, +}; + +/* + * Perf event wakeup + * + * If there's data, ensure we set the poll() state and publish everything + * to user-space before waking everybody up. + */ + +void perf_event_wakeup(struct perf_event *event) +{ + wake_up_all(&event->waitq); + + if (event->pending_kill) { + kill_fasync(&event->fasync, SIGIO, event->pending_kill); + event->pending_kill = 0; + } +} + +/* + * Pending wakeups + * + * Handle the case where we need to wakeup up from NMI (or rq->lock) context. + * + * The NMI bit means we cannot possibly take locks. Therefore, maintain a + * single linked list and use cmpxchg() to add entries lockless. + */ + +static void perf_pending_event(struct perf_pending_entry *entry) +{ + struct perf_event *event = container_of(entry, + struct perf_event, pending); + + if (event->pending_disable) { + event->pending_disable = 0; + __perf_event_disable(event); + } + + if (event->pending_wakeup) { + event->pending_wakeup = 0; + perf_event_wakeup(event); + } +} + +#define PENDING_TAIL ((struct perf_pending_entry *)-1UL) + +static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { + PENDING_TAIL, +}; + +static void perf_pending_queue(struct perf_pending_entry *entry, + void (*func)(struct perf_pending_entry *)) +{ + struct perf_pending_entry **head; + + if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) + return; + + entry->func = func; + + head = &get_cpu_var(perf_pending_head); + + do { + entry->next = *head; + } while (cmpxchg(head, entry->next, entry) != entry->next); + + set_perf_event_pending(); + + put_cpu_var(perf_pending_head); +} + +static int __perf_pending_run(void) +{ + struct perf_pending_entry *list; + int nr = 0; + + list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); + while (list != PENDING_TAIL) { + void (*func)(struct perf_pending_entry *); + struct perf_pending_entry *entry = list; + + list = list->next; + + func = entry->func; + entry->next = NULL; + /* + * Ensure we observe the unqueue before we issue the wakeup, + * so that we won't be waiting forever. + * -- see perf_not_pending(). + */ + smp_wmb(); + + func(entry); + nr++; + } + + return nr; +} + +static inline int perf_not_pending(struct perf_event *event) +{ + /* + * If we flush on whatever cpu we run, there is a chance we don't + * need to wait. + */ + get_cpu(); + __perf_pending_run(); + put_cpu(); + + /* + * Ensure we see the proper queue state before going to sleep + * so that we do not miss the wakeup. -- see perf_pending_handle() + */ + smp_rmb(); + return event->pending.next == NULL; +} + +static void perf_pending_sync(struct perf_event *event) +{ + wait_event(event->waitq, perf_not_pending(event)); +} + +void perf_event_do_pending(void) +{ + __perf_pending_run(); +} + +/* + * Callchain support -- arch specific + */ + +__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + return NULL; +} + +/* + * Output + */ +static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, + unsigned long offset, unsigned long head) +{ + unsigned long mask; + + if (!data->writable) + return true; + + mask = (data->nr_pages << PAGE_SHIFT) - 1; + + offset = (offset - tail) & mask; + head = (head - tail) & mask; + + if ((int)(head - offset) < 0) + return false; + + return true; +} + +static void perf_output_wakeup(struct perf_output_handle *handle) +{ + atomic_set(&handle->data->poll, POLL_IN); + + if (handle->nmi) { + handle->event->pending_wakeup = 1; + perf_pending_queue(&handle->event->pending, + perf_pending_event); + } else + perf_event_wakeup(handle->event); +} + +/* + * Curious locking construct. + * + * We need to ensure a later event_id doesn't publish a head when a former + * event_id isn't done writing. However since we need to deal with NMIs we + * cannot fully serialize things. + * + * What we do is serialize between CPUs so we only have to deal with NMI + * nesting on a single CPU. + * + * We only publish the head (and generate a wakeup) when the outer-most + * event_id completes. + */ +static void perf_output_lock(struct perf_output_handle *handle) +{ + struct perf_mmap_data *data = handle->data; + int cpu; + + handle->locked = 0; + + local_irq_save(handle->flags); + cpu = smp_processor_id(); + + if (in_nmi() && atomic_read(&data->lock) == cpu) + return; + + while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) + cpu_relax(); + + handle->locked = 1; +} + +static void perf_output_unlock(struct perf_output_handle *handle) +{ + struct perf_mmap_data *data = handle->data; + unsigned long head; + int cpu; + + data->done_head = data->head; + + if (!handle->locked) + goto out; + +again: + /* + * The xchg implies a full barrier that ensures all writes are done + * before we publish the new head, matched by a rmb() in userspace when + * reading this position. + */ + while ((head = atomic_long_xchg(&data->done_head, 0))) + data->user_page->data_head = head; + + /* + * NMI can happen here, which means we can miss a done_head update. + */ + + cpu = atomic_xchg(&data->lock, -1); + WARN_ON_ONCE(cpu != smp_processor_id()); + + /* + * Therefore we have to validate we did not indeed do so. + */ + if (unlikely(atomic_long_read(&data->done_head))) { + /* + * Since we had it locked, we can lock it again. + */ + while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) + cpu_relax(); + + goto again; + } + + if (atomic_xchg(&data->wakeup, 0)) + perf_output_wakeup(handle); +out: + local_irq_restore(handle->flags); +} + +void perf_output_copy(struct perf_output_handle *handle, + const void *buf, unsigned int len) +{ + unsigned int pages_mask; + unsigned int offset; + unsigned int size; + void **pages; + + offset = handle->offset; + pages_mask = handle->data->nr_pages - 1; + pages = handle->data->data_pages; + + do { + unsigned int page_offset; + int nr; + + nr = (offset >> PAGE_SHIFT) & pages_mask; + page_offset = offset & (PAGE_SIZE - 1); + size = min_t(unsigned int, PAGE_SIZE - page_offset, len); + + memcpy(pages[nr] + page_offset, buf, size); + + len -= size; + buf += size; + offset += size; + } while (len); + + handle->offset = offset; + + /* + * Check we didn't copy past our reservation window, taking the + * possible unsigned int wrap into account. + */ + WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); +} + +int perf_output_begin(struct perf_output_handle *handle, + struct perf_event *event, unsigned int size, + int nmi, int sample) +{ + struct perf_event *output_event; + struct perf_mmap_data *data; + unsigned long tail, offset, head; + int have_lost; + struct { + struct perf_event_header header; + u64 id; + u64 lost; + } lost_event; + + rcu_read_lock(); + /* + * For inherited events we send all the output towards the parent. + */ + if (event->parent) + event = event->parent; + + output_event = rcu_dereference(event->output); + if (output_event) + event = output_event; + + data = rcu_dereference(event->data); + if (!data) + goto out; + + handle->data = data; + handle->event = event; + handle->nmi = nmi; + handle->sample = sample; + + if (!data->nr_pages) + goto fail; + + have_lost = atomic_read(&data->lost); + if (have_lost) + size += sizeof(lost_event); + + perf_output_lock(handle); + + do { + /* + * Userspace could choose to issue a mb() before updating the + * tail pointer. So that all reads will be completed before the + * write is issued. + */ + tail = ACCESS_ONCE(data->user_page->data_tail); + smp_rmb(); + offset = head = atomic_long_read(&data->head); + head += size; + if (unlikely(!perf_output_space(data, tail, offset, head))) + goto fail; + } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); + + handle->offset = offset; + handle->head = head; + + if (head - tail > data->watermark) + atomic_set(&data->wakeup, 1); + + if (have_lost) { + lost_event.header.type = PERF_RECORD_LOST; + lost_event.header.misc = 0; + lost_event.header.size = sizeof(lost_event); + lost_event.id = event->id; + lost_event.lost = atomic_xchg(&data->lost, 0); + + perf_output_put(handle, lost_event); + } + + return 0; + +fail: + atomic_inc(&data->lost); + perf_output_unlock(handle); +out: + rcu_read_unlock(); + + return -ENOSPC; +} + +void perf_output_end(struct perf_output_handle *handle) +{ + struct perf_event *event = handle->event; + struct perf_mmap_data *data = handle->data; + + int wakeup_events = event->attr.wakeup_events; + + if (handle->sample && wakeup_events) { + int events = atomic_inc_return(&data->events); + if (events >= wakeup_events) { + atomic_sub(wakeup_events, &data->events); + atomic_set(&data->wakeup, 1); + } + } + + perf_output_unlock(handle); + rcu_read_unlock(); +} + +static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) +{ + /* + * only top level events have the pid namespace they were created in + */ + if (event->parent) + event = event->parent; + + return task_tgid_nr_ns(p, event->ns); +} + +static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) +{ + /* + * only top level events have the pid namespace they were created in + */ + if (event->parent) + event = event->parent; + + return task_pid_nr_ns(p, event->ns); +} + +static void perf_output_read_one(struct perf_output_handle *handle, + struct perf_event *event) +{ + u64 read_format = event->attr.read_format; + u64 values[4]; + int n = 0; + + values[n++] = atomic64_read(&event->count); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = event->total_time_running + + atomic64_read(&event->child_total_time_running); + } + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(event); + + perf_output_copy(handle, values, n * sizeof(u64)); +} + +/* + * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. + */ +static void perf_output_read_group(struct perf_output_handle *handle, + struct perf_event *event) +{ + struct perf_event *leader = event->group_leader, *sub; + u64 read_format = event->attr.read_format; + u64 values[5]; + int n = 0; + + values[n++] = 1 + leader->nr_siblings; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = leader->total_time_enabled; + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = leader->total_time_running; + + if (leader != event) + leader->pmu->read(leader); + + values[n++] = atomic64_read(&leader->count); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(leader); + + perf_output_copy(handle, values, n * sizeof(u64)); + + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + n = 0; + + if (sub != event) + sub->pmu->read(sub); + + values[n++] = atomic64_read(&sub->count); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(sub); + + perf_output_copy(handle, values, n * sizeof(u64)); + } +} + +static void perf_output_read(struct perf_output_handle *handle, + struct perf_event *event) +{ + if (event->attr.read_format & PERF_FORMAT_GROUP) + perf_output_read_group(handle, event); + else + perf_output_read_one(handle, event); +} + +void perf_output_sample(struct perf_output_handle *handle, + struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event) +{ + u64 sample_type = data->type; + + perf_output_put(handle, *header); + + if (sample_type & PERF_SAMPLE_IP) + perf_output_put(handle, data->ip); + + if (sample_type & PERF_SAMPLE_TID) + perf_output_put(handle, data->tid_entry); + + if (sample_type & PERF_SAMPLE_TIME) + perf_output_put(handle, data->time); + + if (sample_type & PERF_SAMPLE_ADDR) + perf_output_put(handle, data->addr); + + if (sample_type & PERF_SAMPLE_ID) + perf_output_put(handle, data->id); + + if (sample_type & PERF_SAMPLE_STREAM_ID) + perf_output_put(handle, data->stream_id); + + if (sample_type & PERF_SAMPLE_CPU) + perf_output_put(handle, data->cpu_entry); + + if (sample_type & PERF_SAMPLE_PERIOD) + perf_output_put(handle, data->period); + + if (sample_type & PERF_SAMPLE_READ) + perf_output_read(handle, event); + + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (data->callchain) { + int size = 1; + + if (data->callchain) + size += data->callchain->nr; + + size *= sizeof(u64); + + perf_output_copy(handle, data->callchain, size); + } else { + u64 nr = 0; + perf_output_put(handle, nr); + } + } + + if (sample_type & PERF_SAMPLE_RAW) { + if (data->raw) { + perf_output_put(handle, data->raw->size); + perf_output_copy(handle, data->raw->data, + data->raw->size); + } else { + struct { + u32 size; + u32 data; + } raw = { + .size = sizeof(u32), + .data = 0, + }; + perf_output_put(handle, raw); + } + } +} + +void perf_prepare_sample(struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event, + struct pt_regs *regs) +{ + u64 sample_type = event->attr.sample_type; + + data->type = sample_type; + + header->type = PERF_RECORD_SAMPLE; + header->size = sizeof(*header); + + header->misc = 0; + header->misc |= perf_misc_flags(regs); + + if (sample_type & PERF_SAMPLE_IP) { + data->ip = perf_instruction_pointer(regs); + + header->size += sizeof(data->ip); + } + + if (sample_type & PERF_SAMPLE_TID) { + /* namespace issues */ + data->tid_entry.pid = perf_event_pid(event, current); + data->tid_entry.tid = perf_event_tid(event, current); + + header->size += sizeof(data->tid_entry); + } + + if (sample_type & PERF_SAMPLE_TIME) { + data->time = perf_clock(); + + header->size += sizeof(data->time); + } + + if (sample_type & PERF_SAMPLE_ADDR) + header->size += sizeof(data->addr); + + if (sample_type & PERF_SAMPLE_ID) { + data->id = primary_event_id(event); + + header->size += sizeof(data->id); + } + + if (sample_type & PERF_SAMPLE_STREAM_ID) { + data->stream_id = event->id; + + header->size += sizeof(data->stream_id); + } + + if (sample_type & PERF_SAMPLE_CPU) { + data->cpu_entry.cpu = raw_smp_processor_id(); + data->cpu_entry.reserved = 0; + + header->size += sizeof(data->cpu_entry); + } + + if (sample_type & PERF_SAMPLE_PERIOD) + header->size += sizeof(data->period); + + if (sample_type & PERF_SAMPLE_READ) + header->size += perf_event_read_size(event); + + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + int size = 1; + + data->callchain = perf_callchain(regs); + + if (data->callchain) + size += data->callchain->nr; + + header->size += size * sizeof(u64); + } + + if (sample_type & PERF_SAMPLE_RAW) { + int size = sizeof(u32); + + if (data->raw) + size += data->raw->size; + else + size += sizeof(u32); + + WARN_ON_ONCE(size & (sizeof(u64)-1)); + header->size += size; + } +} + +static void perf_event_output(struct perf_event *event, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct perf_output_handle handle; + struct perf_event_header header; + + perf_prepare_sample(&header, data, event, regs); + + if (perf_output_begin(&handle, event, header.size, nmi, 1)) + return; + + perf_output_sample(&handle, &header, data, event); + + perf_output_end(&handle); +} + +/* + * read event_id + */ + +struct perf_read_event { + struct perf_event_header header; + + u32 pid; + u32 tid; +}; + +static void +perf_event_read_event(struct perf_event *event, + struct task_struct *task) +{ + struct perf_output_handle handle; + struct perf_read_event read_event = { + .header = { + .type = PERF_RECORD_READ, + .misc = 0, + .size = sizeof(read_event) + perf_event_read_size(event), + }, + .pid = perf_event_pid(event, task), + .tid = perf_event_tid(event, task), + }; + int ret; + + ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); + if (ret) + return; + + perf_output_put(&handle, read_event); + perf_output_read(&handle, event); + + perf_output_end(&handle); +} + +/* + * task tracking -- fork/exit + * + * enabled by: attr.comm | attr.mmap | attr.task + */ + +struct perf_task_event { + struct task_struct *task; + struct perf_event_context *task_ctx; + + struct { + struct perf_event_header header; + + u32 pid; + u32 ppid; + u32 tid; + u32 ptid; + u64 time; + } event_id; +}; + +static void perf_event_task_output(struct perf_event *event, + struct perf_task_event *task_event) +{ + struct perf_output_handle handle; + int size; + struct task_struct *task = task_event->task; + int ret; + + size = task_event->event_id.header.size; + ret = perf_output_begin(&handle, event, size, 0, 0); + + if (ret) + return; + + task_event->event_id.pid = perf_event_pid(event, task); + task_event->event_id.ppid = perf_event_pid(event, current); + + task_event->event_id.tid = perf_event_tid(event, task); + task_event->event_id.ptid = perf_event_tid(event, current); + + task_event->event_id.time = perf_clock(); + + perf_output_put(&handle, task_event->event_id); + + perf_output_end(&handle); +} + +static int perf_event_task_match(struct perf_event *event) +{ + if (event->attr.comm || event->attr.mmap || event->attr.task) + return 1; + + return 0; +} + +static void perf_event_task_ctx(struct perf_event_context *ctx, + struct perf_task_event *task_event) +{ + struct perf_event *event; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_event_task_match(event)) + perf_event_task_output(event, task_event); + } + rcu_read_unlock(); +} + +static void perf_event_task_event(struct perf_task_event *task_event) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx = task_event->task_ctx; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_event_task_ctx(&cpuctx->ctx, task_event); + put_cpu_var(perf_cpu_context); + + rcu_read_lock(); + if (!ctx) + ctx = rcu_dereference(task_event->task->perf_event_ctxp); + if (ctx) + perf_event_task_ctx(ctx, task_event); + rcu_read_unlock(); +} + +static void perf_event_task(struct task_struct *task, + struct perf_event_context *task_ctx, + int new) +{ + struct perf_task_event task_event; + + if (!atomic_read(&nr_comm_events) && + !atomic_read(&nr_mmap_events) && + !atomic_read(&nr_task_events)) + return; + + task_event = (struct perf_task_event){ + .task = task, + .task_ctx = task_ctx, + .event_id = { + .header = { + .type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT, + .misc = 0, + .size = sizeof(task_event.event_id), + }, + /* .pid */ + /* .ppid */ + /* .tid */ + /* .ptid */ + }, + }; + + perf_event_task_event(&task_event); +} + +void perf_event_fork(struct task_struct *task) +{ + perf_event_task(task, NULL, 1); +} + +/* + * comm tracking + */ + +struct perf_comm_event { + struct task_struct *task; + char *comm; + int comm_size; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + } event_id; +}; + +static void perf_event_comm_output(struct perf_event *event, + struct perf_comm_event *comm_event) +{ + struct perf_output_handle handle; + int size = comm_event->event_id.header.size; + int ret = perf_output_begin(&handle, event, size, 0, 0); + + if (ret) + return; + + comm_event->event_id.pid = perf_event_pid(event, comm_event->task); + comm_event->event_id.tid = perf_event_tid(event, comm_event->task); + + perf_output_put(&handle, comm_event->event_id); + perf_output_copy(&handle, comm_event->comm, + comm_event->comm_size); + perf_output_end(&handle); +} + +static int perf_event_comm_match(struct perf_event *event) +{ + if (event->attr.comm) + return 1; + + return 0; +} + +static void perf_event_comm_ctx(struct perf_event_context *ctx, + struct perf_comm_event *comm_event) +{ + struct perf_event *event; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_event_comm_match(event)) + perf_event_comm_output(event, comm_event); + } + rcu_read_unlock(); +} + +static void perf_event_comm_event(struct perf_comm_event *comm_event) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; + unsigned int size; + char comm[TASK_COMM_LEN]; + + memset(comm, 0, sizeof(comm)); + strncpy(comm, comm_event->task->comm, sizeof(comm)); + size = ALIGN(strlen(comm)+1, sizeof(u64)); + + comm_event->comm = comm; + comm_event->comm_size = size; + + comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_event_comm_ctx(&cpuctx->ctx, comm_event); + put_cpu_var(perf_cpu_context); + + rcu_read_lock(); + /* + * doesn't really matter which of the child contexts the + * events ends up in. + */ + ctx = rcu_dereference(current->perf_event_ctxp); + if (ctx) + perf_event_comm_ctx(ctx, comm_event); + rcu_read_unlock(); +} + +void perf_event_comm(struct task_struct *task) +{ + struct perf_comm_event comm_event; + + if (task->perf_event_ctxp) + perf_event_enable_on_exec(task); + + if (!atomic_read(&nr_comm_events)) + return; + + comm_event = (struct perf_comm_event){ + .task = task, + /* .comm */ + /* .comm_size */ + .event_id = { + .header = { + .type = PERF_RECORD_COMM, + .misc = 0, + /* .size */ + }, + /* .pid */ + /* .tid */ + }, + }; + + perf_event_comm_event(&comm_event); +} + +/* + * mmap tracking + */ + +struct perf_mmap_event { + struct vm_area_struct *vma; + + const char *file_name; + int file_size; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + u64 start; + u64 len; + u64 pgoff; + } event_id; +}; + +static void perf_event_mmap_output(struct perf_event *event, + struct perf_mmap_event *mmap_event) +{ + struct perf_output_handle handle; + int size = mmap_event->event_id.header.size; + int ret = perf_output_begin(&handle, event, size, 0, 0); + + if (ret) + return; + + mmap_event->event_id.pid = perf_event_pid(event, current); + mmap_event->event_id.tid = perf_event_tid(event, current); + + perf_output_put(&handle, mmap_event->event_id); + perf_output_copy(&handle, mmap_event->file_name, + mmap_event->file_size); + perf_output_end(&handle); +} + +static int perf_event_mmap_match(struct perf_event *event, + struct perf_mmap_event *mmap_event) +{ + if (event->attr.mmap) + return 1; + + return 0; +} + +static void perf_event_mmap_ctx(struct perf_event_context *ctx, + struct perf_mmap_event *mmap_event) +{ + struct perf_event *event; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_event_mmap_match(event, mmap_event)) + perf_event_mmap_output(event, mmap_event); + } + rcu_read_unlock(); +} + +static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; + struct vm_area_struct *vma = mmap_event->vma; + struct file *file = vma->vm_file; + unsigned int size; + char tmp[16]; + char *buf = NULL; + const char *name; + + memset(tmp, 0, sizeof(tmp)); + + if (file) { + /* + * d_path works from the end of the buffer backwards, so we + * need to add enough zero bytes after the string to handle + * the 64bit alignment we do later. + */ + buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL); + if (!buf) { + name = strncpy(tmp, "//enomem", sizeof(tmp)); + goto got_name; + } + name = d_path(&file->f_path, buf, PATH_MAX); + if (IS_ERR(name)) { + name = strncpy(tmp, "//toolong", sizeof(tmp)); + goto got_name; + } + } else { + if (arch_vma_name(mmap_event->vma)) { + name = strncpy(tmp, arch_vma_name(mmap_event->vma), + sizeof(tmp)); + goto got_name; + } + + if (!vma->vm_mm) { + name = strncpy(tmp, "[vdso]", sizeof(tmp)); + goto got_name; + } + + name = strncpy(tmp, "//anon", sizeof(tmp)); + goto got_name; + } + +got_name: + size = ALIGN(strlen(name)+1, sizeof(u64)); + + mmap_event->file_name = name; + mmap_event->file_size = size; + + mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); + put_cpu_var(perf_cpu_context); + + rcu_read_lock(); + /* + * doesn't really matter which of the child contexts the + * events ends up in. + */ + ctx = rcu_dereference(current->perf_event_ctxp); + if (ctx) + perf_event_mmap_ctx(ctx, mmap_event); + rcu_read_unlock(); + + kfree(buf); +} + +void __perf_event_mmap(struct vm_area_struct *vma) +{ + struct perf_mmap_event mmap_event; + + if (!atomic_read(&nr_mmap_events)) + return; + + mmap_event = (struct perf_mmap_event){ + .vma = vma, + /* .file_name */ + /* .file_size */ + .event_id = { + .header = { + .type = PERF_RECORD_MMAP, + .misc = 0, + /* .size */ + }, + /* .pid */ + /* .tid */ + .start = vma->vm_start, + .len = vma->vm_end - vma->vm_start, + .pgoff = vma->vm_pgoff, + }, + }; + + perf_event_mmap_event(&mmap_event); +} + +/* + * IRQ throttle logging + */ + +static void perf_log_throttle(struct perf_event *event, int enable) +{ + struct perf_output_handle handle; + int ret; + + struct { + struct perf_event_header header; + u64 time; + u64 id; + u64 stream_id; + } throttle_event = { + .header = { + .type = PERF_RECORD_THROTTLE, + .misc = 0, + .size = sizeof(throttle_event), + }, + .time = perf_clock(), + .id = primary_event_id(event), + .stream_id = event->id, + }; + + if (enable) + throttle_event.header.type = PERF_RECORD_UNTHROTTLE; + + ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); + if (ret) + return; + + perf_output_put(&handle, throttle_event); + perf_output_end(&handle); +} + +/* + * Generic event overflow handling, sampling. + */ + +static int __perf_event_overflow(struct perf_event *event, int nmi, + int throttle, struct perf_sample_data *data, + struct pt_regs *regs) +{ + int events = atomic_read(&event->event_limit); + struct hw_perf_event *hwc = &event->hw; + int ret = 0; + + throttle = (throttle && event->pmu->unthrottle != NULL); + + if (!throttle) { + hwc->interrupts++; + } else { + if (hwc->interrupts != MAX_INTERRUPTS) { + hwc->interrupts++; + if (HZ * hwc->interrupts > + (u64)sysctl_perf_event_sample_rate) { + hwc->interrupts = MAX_INTERRUPTS; + perf_log_throttle(event, 0); + ret = 1; + } + } else { + /* + * Keep re-disabling events even though on the previous + * pass we disabled it - just in case we raced with a + * sched-in and the event got enabled again: + */ + ret = 1; + } + } + + if (event->attr.freq) { + u64 now = perf_clock(); + s64 delta = now - hwc->freq_stamp; + + hwc->freq_stamp = now; + + if (delta > 0 && delta < TICK_NSEC) + perf_adjust_period(event, NSEC_PER_SEC / (int)delta); + } + + /* + * XXX event_limit might not quite work as expected on inherited + * events + */ + + event->pending_kill = POLL_IN; + if (events && atomic_dec_and_test(&event->event_limit)) { + ret = 1; + event->pending_kill = POLL_HUP; + if (nmi) { + event->pending_disable = 1; + perf_pending_queue(&event->pending, + perf_pending_event); + } else + perf_event_disable(event); + } + + perf_event_output(event, nmi, data, regs); + return ret; +} + +int perf_event_overflow(struct perf_event *event, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + return __perf_event_overflow(event, nmi, 1, data, regs); +} + +/* + * Generic software event infrastructure + */ + +/* + * We directly increment event->count and keep a second value in + * event->hw.period_left to count intervals. This period event + * is kept in the range [-sample_period, 0] so that we can use the + * sign as trigger. + */ + +static u64 perf_swevent_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 period = hwc->last_period; + u64 nr, offset; + s64 old, val; + + hwc->last_period = hwc->sample_period; + +again: + old = val = atomic64_read(&hwc->period_left); + if (val < 0) + return 0; + + nr = div64_u64(period + val, period); + offset = nr * period; + val -= offset; + if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) + goto again; + + return nr; +} + +static void perf_swevent_overflow(struct perf_event *event, + int nmi, struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct hw_perf_event *hwc = &event->hw; + int throttle = 0; + u64 overflow; + + data->period = event->hw.last_period; + overflow = perf_swevent_set_period(event); + + if (hwc->interrupts == MAX_INTERRUPTS) + return; + + for (; overflow; overflow--) { + if (__perf_event_overflow(event, nmi, throttle, + data, regs)) { + /* + * We inhibit the overflow from happening when + * hwc->interrupts == MAX_INTERRUPTS. + */ + break; + } + throttle = 1; + } +} + +static void perf_swevent_unthrottle(struct perf_event *event) +{ + /* + * Nothing to do, we already reset hwc->interrupts. + */ +} + +static void perf_swevent_add(struct perf_event *event, u64 nr, + int nmi, struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct hw_perf_event *hwc = &event->hw; + + atomic64_add(nr, &event->count); + + if (!hwc->sample_period) + return; + + if (!regs) + return; + + if (!atomic64_add_negative(nr, &hwc->period_left)) + perf_swevent_overflow(event, nmi, data, regs); +} + +static int perf_swevent_is_counting(struct perf_event *event) +{ + /* + * The event is active, we're good! + */ + if (event->state == PERF_EVENT_STATE_ACTIVE) + return 1; + + /* + * The event is off/error, not counting. + */ + if (event->state != PERF_EVENT_STATE_INACTIVE) + return 0; + + /* + * The event is inactive, if the context is active + * we're part of a group that didn't make it on the 'pmu', + * not counting. + */ + if (event->ctx->is_active) + return 0; + + /* + * We're inactive and the context is too, this means the + * task is scheduled out, we're counting events that happen + * to us, like migration events. + */ + return 1; +} + +static int perf_swevent_match(struct perf_event *event, + enum perf_type_id type, + u32 event_id, struct pt_regs *regs) +{ + if (!perf_swevent_is_counting(event)) + return 0; + + if (event->attr.type != type) + return 0; + if (event->attr.config != event_id) + return 0; + + if (regs) { + if (event->attr.exclude_user && user_mode(regs)) + return 0; + + if (event->attr.exclude_kernel && !user_mode(regs)) + return 0; + } + + return 1; +} + +static void perf_swevent_ctx_event(struct perf_event_context *ctx, + enum perf_type_id type, + u32 event_id, u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct perf_event *event; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_swevent_match(event, type, event_id, regs)) + perf_swevent_add(event, nr, nmi, data, regs); + } + rcu_read_unlock(); +} + +static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) +{ + if (in_nmi()) + return &cpuctx->recursion[3]; + + if (in_irq()) + return &cpuctx->recursion[2]; + + if (in_softirq()) + return &cpuctx->recursion[1]; + + return &cpuctx->recursion[0]; +} + +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + int *recursion = perf_swevent_recursion_context(cpuctx); + struct perf_event_context *ctx; + + if (*recursion) + goto out; + + (*recursion)++; + barrier(); + + perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, + nr, nmi, data, regs); + rcu_read_lock(); + /* + * doesn't really matter which of the child contexts the + * events ends up in. + */ + ctx = rcu_dereference(current->perf_event_ctxp); + if (ctx) + perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); + rcu_read_unlock(); + + barrier(); + (*recursion)--; + +out: + put_cpu_var(perf_cpu_context); +} + +void __perf_sw_event(u32 event_id, u64 nr, int nmi, + struct pt_regs *regs, u64 addr) +{ + struct perf_sample_data data = { + .addr = addr, + }; + + do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, + &data, regs); +} + +static void perf_swevent_read(struct perf_event *event) +{ +} + +static int perf_swevent_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->sample_period) { + hwc->last_period = hwc->sample_period; + perf_swevent_set_period(event); + } + return 0; +} + +static void perf_swevent_disable(struct perf_event *event) +{ +} + +static const struct pmu perf_ops_generic = { + .enable = perf_swevent_enable, + .disable = perf_swevent_disable, + .read = perf_swevent_read, + .unthrottle = perf_swevent_unthrottle, +}; + +/* + * hrtimer based swevent callback + */ + +static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) +{ + enum hrtimer_restart ret = HRTIMER_RESTART; + struct perf_sample_data data; + struct pt_regs *regs; + struct perf_event *event; + u64 period; + + event = container_of(hrtimer, struct perf_event, hw.hrtimer); + event->pmu->read(event); + + data.addr = 0; + regs = get_irq_regs(); + /* + * In case we exclude kernel IPs or are somehow not in interrupt + * context, provide the next best thing, the user IP. + */ + if ((event->attr.exclude_kernel || !regs) && + !event->attr.exclude_user) + regs = task_pt_regs(current); + + if (regs) { + if (perf_event_overflow(event, 0, &data, regs)) + ret = HRTIMER_NORESTART; + } + + period = max_t(u64, 10000, event->hw.sample_period); + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); + + return ret; +} + +/* + * Software event: cpu wall time clock + */ + +static void cpu_clock_perf_event_update(struct perf_event *event) +{ + int cpu = raw_smp_processor_id(); + s64 prev; + u64 now; + + now = cpu_clock(cpu); + prev = atomic64_read(&event->hw.prev_count); + atomic64_set(&event->hw.prev_count, now); + atomic64_add(now - prev, &event->count); +} + +static int cpu_clock_perf_event_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int cpu = raw_smp_processor_id(); + + atomic64_set(&hwc->prev_count, cpu_clock(cpu)); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swevent_hrtimer; + if (hwc->sample_period) { + u64 period = max_t(u64, 10000, hwc->sample_period); + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(period), 0, + HRTIMER_MODE_REL, 0); + } + + return 0; +} + +static void cpu_clock_perf_event_disable(struct perf_event *event) +{ + if (event->hw.sample_period) + hrtimer_cancel(&event->hw.hrtimer); + cpu_clock_perf_event_update(event); +} + +static void cpu_clock_perf_event_read(struct perf_event *event) +{ + cpu_clock_perf_event_update(event); +} + +static const struct pmu perf_ops_cpu_clock = { + .enable = cpu_clock_perf_event_enable, + .disable = cpu_clock_perf_event_disable, + .read = cpu_clock_perf_event_read, +}; + +/* + * Software event: task time clock + */ + +static void task_clock_perf_event_update(struct perf_event *event, u64 now) +{ + u64 prev; + s64 delta; + + prev = atomic64_xchg(&event->hw.prev_count, now); + delta = now - prev; + atomic64_add(delta, &event->count); +} + +static int task_clock_perf_event_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 now; + + now = event->ctx->time; + + atomic64_set(&hwc->prev_count, now); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swevent_hrtimer; + if (hwc->sample_period) { + u64 period = max_t(u64, 10000, hwc->sample_period); + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(period), 0, + HRTIMER_MODE_REL, 0); + } + + return 0; +} + +static void task_clock_perf_event_disable(struct perf_event *event) +{ + if (event->hw.sample_period) + hrtimer_cancel(&event->hw.hrtimer); + task_clock_perf_event_update(event, event->ctx->time); + +} + +static void task_clock_perf_event_read(struct perf_event *event) +{ + u64 time; + + if (!in_nmi()) { + update_context_time(event->ctx); + time = event->ctx->time; + } else { + u64 now = perf_clock(); + u64 delta = now - event->ctx->timestamp; + time = event->ctx->time + delta; + } + + task_clock_perf_event_update(event, time); +} + +static const struct pmu perf_ops_task_clock = { + .enable = task_clock_perf_event_enable, + .disable = task_clock_perf_event_disable, + .read = task_clock_perf_event_read, +}; + +#ifdef CONFIG_EVENT_PROFILE +void perf_tp_event(int event_id, u64 addr, u64 count, void *record, + int entry_size) +{ + struct perf_raw_record raw = { + .size = entry_size, + .data = record, + }; + + struct perf_sample_data data = { + .addr = addr, + .raw = &raw, + }; + + struct pt_regs *regs = get_irq_regs(); + + if (!regs) + regs = task_pt_regs(current); + + do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, + &data, regs); +} +EXPORT_SYMBOL_GPL(perf_tp_event); + +extern int ftrace_profile_enable(int); +extern void ftrace_profile_disable(int); + +static void tp_perf_event_destroy(struct perf_event *event) +{ + ftrace_profile_disable(event->attr.config); +} + +static const struct pmu *tp_perf_event_init(struct perf_event *event) +{ + /* + * Raw tracepoint data is a severe data leak, only allow root to + * have these. + */ + if ((event->attr.sample_type & PERF_SAMPLE_RAW) && + perf_paranoid_tracepoint_raw() && + !capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + if (ftrace_profile_enable(event->attr.config)) + return NULL; + + event->destroy = tp_perf_event_destroy; + + return &perf_ops_generic; +} +#else +static const struct pmu *tp_perf_event_init(struct perf_event *event) +{ + return NULL; +} +#endif + +atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; + +static void sw_perf_event_destroy(struct perf_event *event) +{ + u64 event_id = event->attr.config; + + WARN_ON(event->parent); + + atomic_dec(&perf_swevent_enabled[event_id]); +} + +static const struct pmu *sw_perf_event_init(struct perf_event *event) +{ + const struct pmu *pmu = NULL; + u64 event_id = event->attr.config; + + /* + * Software events (currently) can't in general distinguish + * between user, kernel and hypervisor events. + * However, context switches and cpu migrations are considered + * to be kernel events, and page faults are never hypervisor + * events. + */ + switch (event_id) { + case PERF_COUNT_SW_CPU_CLOCK: + pmu = &perf_ops_cpu_clock; + + break; + case PERF_COUNT_SW_TASK_CLOCK: + /* + * If the user instantiates this as a per-cpu event, + * use the cpu_clock event instead. + */ + if (event->ctx->task) + pmu = &perf_ops_task_clock; + else + pmu = &perf_ops_cpu_clock; + + break; + case PERF_COUNT_SW_PAGE_FAULTS: + case PERF_COUNT_SW_PAGE_FAULTS_MIN: + case PERF_COUNT_SW_PAGE_FAULTS_MAJ: + case PERF_COUNT_SW_CONTEXT_SWITCHES: + case PERF_COUNT_SW_CPU_MIGRATIONS: + if (!event->parent) { + atomic_inc(&perf_swevent_enabled[event_id]); + event->destroy = sw_perf_event_destroy; + } + pmu = &perf_ops_generic; + break; + } + + return pmu; +} + +/* + * Allocate and initialize a event structure + */ +static struct perf_event * +perf_event_alloc(struct perf_event_attr *attr, + int cpu, + struct perf_event_context *ctx, + struct perf_event *group_leader, + struct perf_event *parent_event, + gfp_t gfpflags) +{ + const struct pmu *pmu; + struct perf_event *event; + struct hw_perf_event *hwc; + long err; + + event = kzalloc(sizeof(*event), gfpflags); + if (!event) + return ERR_PTR(-ENOMEM); + + /* + * Single events are their own group leaders, with an + * empty sibling list: + */ + if (!group_leader) + group_leader = event; + + mutex_init(&event->child_mutex); + INIT_LIST_HEAD(&event->child_list); + + INIT_LIST_HEAD(&event->group_entry); + INIT_LIST_HEAD(&event->event_entry); + INIT_LIST_HEAD(&event->sibling_list); + init_waitqueue_head(&event->waitq); + + mutex_init(&event->mmap_mutex); + + event->cpu = cpu; + event->attr = *attr; + event->group_leader = group_leader; + event->pmu = NULL; + event->ctx = ctx; + event->oncpu = -1; + + event->parent = parent_event; + + event->ns = get_pid_ns(current->nsproxy->pid_ns); + event->id = atomic64_inc_return(&perf_event_id); + + event->state = PERF_EVENT_STATE_INACTIVE; + + if (attr->disabled) + event->state = PERF_EVENT_STATE_OFF; + + pmu = NULL; + + hwc = &event->hw; + hwc->sample_period = attr->sample_period; + if (attr->freq && attr->sample_freq) + hwc->sample_period = 1; + hwc->last_period = hwc->sample_period; + + atomic64_set(&hwc->period_left, hwc->sample_period); + + /* + * we currently do not support PERF_FORMAT_GROUP on inherited events + */ + if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) + goto done; + + switch (attr->type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + pmu = hw_perf_event_init(event); + break; + + case PERF_TYPE_SOFTWARE: + pmu = sw_perf_event_init(event); + break; + + case PERF_TYPE_TRACEPOINT: + pmu = tp_perf_event_init(event); + break; + + default: + break; + } +done: + err = 0; + if (!pmu) + err = -EINVAL; + else if (IS_ERR(pmu)) + err = PTR_ERR(pmu); + + if (err) { + if (event->ns) + put_pid_ns(event->ns); + kfree(event); + return ERR_PTR(err); + } + + event->pmu = pmu; + + if (!event->parent) { + atomic_inc(&nr_events); + if (event->attr.mmap) + atomic_inc(&nr_mmap_events); + if (event->attr.comm) + atomic_inc(&nr_comm_events); + if (event->attr.task) + atomic_inc(&nr_task_events); + } + + return event; +} + +static int perf_copy_attr(struct perf_event_attr __user *uattr, + struct perf_event_attr *attr) +{ + u32 size; + int ret; + + if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) + return -EFAULT; + + /* + * zero the full structure, so that a short copy will be nice. + */ + memset(attr, 0, sizeof(*attr)); + + ret = get_user(size, &uattr->size); + if (ret) + return ret; + + if (size > PAGE_SIZE) /* silly large */ + goto err_size; + + if (!size) /* abi compat */ + size = PERF_ATTR_SIZE_VER0; + + if (size < PERF_ATTR_SIZE_VER0) + goto err_size; + + /* + * If we're handed a bigger struct than we know of, + * ensure all the unknown bits are 0 - i.e. new + * user-space does not rely on any kernel feature + * extensions we dont know about yet. + */ + if (size > sizeof(*attr)) { + unsigned char __user *addr; + unsigned char __user *end; + unsigned char val; + + addr = (void __user *)uattr + sizeof(*attr); + end = (void __user *)uattr + size; + + for (; addr < end; addr++) { + ret = get_user(val, addr); + if (ret) + return ret; + if (val) + goto err_size; + } + size = sizeof(*attr); + } + + ret = copy_from_user(attr, uattr, size); + if (ret) + return -EFAULT; + + /* + * If the type exists, the corresponding creation will verify + * the attr->config. + */ + if (attr->type >= PERF_TYPE_MAX) + return -EINVAL; + + if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) + return -EINVAL; + + if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) + return -EINVAL; + + if (attr->read_format & ~(PERF_FORMAT_MAX-1)) + return -EINVAL; + +out: + return ret; + +err_size: + put_user(sizeof(*attr), &uattr->size); + ret = -E2BIG; + goto out; +} + +int perf_event_set_output(struct perf_event *event, int output_fd) +{ + struct perf_event *output_event = NULL; + struct file *output_file = NULL; + struct perf_event *old_output; + int fput_needed = 0; + int ret = -EINVAL; + + if (!output_fd) + goto set; + + output_file = fget_light(output_fd, &fput_needed); + if (!output_file) + return -EBADF; + + if (output_file->f_op != &perf_fops) + goto out; + + output_event = output_file->private_data; + + /* Don't chain output fds */ + if (output_event->output) + goto out; + + /* Don't set an output fd when we already have an output channel */ + if (event->data) + goto out; + + atomic_long_inc(&output_file->f_count); + +set: + mutex_lock(&event->mmap_mutex); + old_output = event->output; + rcu_assign_pointer(event->output, output_event); + mutex_unlock(&event->mmap_mutex); + + if (old_output) { + /* + * we need to make sure no existing perf_output_*() + * is still referencing this event. + */ + synchronize_rcu(); + fput(old_output->filp); + } + + ret = 0; +out: + fput_light(output_file, fput_needed); + return ret; +} + +/** + * sys_perf_event_open - open a performance event, associate it to a task/cpu + * + * @attr_uptr: event_id type attributes for monitoring/sampling + * @pid: target pid + * @cpu: target cpu + * @group_fd: group leader event fd + */ +SYSCALL_DEFINE5(perf_event_open, + struct perf_event_attr __user *, attr_uptr, + pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) +{ + struct perf_event *event, *group_leader; + struct perf_event_attr attr; + struct perf_event_context *ctx; + struct file *event_file = NULL; + struct file *group_file = NULL; + int fput_needed = 0; + int fput_needed2 = 0; + int err; + + /* for future expandability... */ + if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) + return -EINVAL; + + err = perf_copy_attr(attr_uptr, &attr); + if (err) + return err; + + if (!attr.exclude_kernel) { + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EACCES; + } + + if (attr.freq) { + if (attr.sample_freq > sysctl_perf_event_sample_rate) + return -EINVAL; + } + + /* + * Get the target context (task or percpu): + */ + ctx = find_get_context(pid, cpu); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + /* + * Look up the group leader (we will attach this event to it): + */ + group_leader = NULL; + if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { + err = -EINVAL; + group_file = fget_light(group_fd, &fput_needed); + if (!group_file) + goto err_put_context; + if (group_file->f_op != &perf_fops) + goto err_put_context; + + group_leader = group_file->private_data; + /* + * Do not allow a recursive hierarchy (this new sibling + * becoming part of another group-sibling): + */ + if (group_leader->group_leader != group_leader) + goto err_put_context; + /* + * Do not allow to attach to a group in a different + * task or CPU context: + */ + if (group_leader->ctx != ctx) + goto err_put_context; + /* + * Only a group leader can be exclusive or pinned + */ + if (attr.exclusive || attr.pinned) + goto err_put_context; + } + + event = perf_event_alloc(&attr, cpu, ctx, group_leader, + NULL, GFP_KERNEL); + err = PTR_ERR(event); + if (IS_ERR(event)) + goto err_put_context; + + err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0); + if (err < 0) + goto err_free_put_context; + + event_file = fget_light(err, &fput_needed2); + if (!event_file) + goto err_free_put_context; + + if (flags & PERF_FLAG_FD_OUTPUT) { + err = perf_event_set_output(event, group_fd); + if (err) + goto err_fput_free_put_context; + } + + event->filp = event_file; + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + perf_install_in_context(ctx, event, cpu); + ++ctx->generation; + mutex_unlock(&ctx->mutex); + + event->owner = current; + get_task_struct(current); + mutex_lock(¤t->perf_event_mutex); + list_add_tail(&event->owner_entry, ¤t->perf_event_list); + mutex_unlock(¤t->perf_event_mutex); + +err_fput_free_put_context: + fput_light(event_file, fput_needed2); + +err_free_put_context: + if (err < 0) + kfree(event); + +err_put_context: + if (err < 0) + put_ctx(ctx); + + fput_light(group_file, fput_needed); + + return err; +} + +/* + * inherit a event from parent task to child task: + */ +static struct perf_event * +inherit_event(struct perf_event *parent_event, + struct task_struct *parent, + struct perf_event_context *parent_ctx, + struct task_struct *child, + struct perf_event *group_leader, + struct perf_event_context *child_ctx) +{ + struct perf_event *child_event; + + /* + * Instead of creating recursive hierarchies of events, + * we link inherited events back to the original parent, + * which has a filp for sure, which we use as the reference + * count: + */ + if (parent_event->parent) + parent_event = parent_event->parent; + + child_event = perf_event_alloc(&parent_event->attr, + parent_event->cpu, child_ctx, + group_leader, parent_event, + GFP_KERNEL); + if (IS_ERR(child_event)) + return child_event; + get_ctx(child_ctx); + + /* + * Make the child state follow the state of the parent event, + * not its attr.disabled bit. We hold the parent's mutex, + * so we won't race with perf_event_{en, dis}able_family. + */ + if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) + child_event->state = PERF_EVENT_STATE_INACTIVE; + else + child_event->state = PERF_EVENT_STATE_OFF; + + if (parent_event->attr.freq) + child_event->hw.sample_period = parent_event->hw.sample_period; + + /* + * Link it up in the child's context: + */ + add_event_to_ctx(child_event, child_ctx); + + /* + * Get a reference to the parent filp - we will fput it + * when the child event exits. This is safe to do because + * we are in the parent and we know that the filp still + * exists and has a nonzero count: + */ + atomic_long_inc(&parent_event->filp->f_count); + + /* + * Link this into the parent event's child list + */ + WARN_ON_ONCE(parent_event->ctx->parent_ctx); + mutex_lock(&parent_event->child_mutex); + list_add_tail(&child_event->child_list, &parent_event->child_list); + mutex_unlock(&parent_event->child_mutex); + + return child_event; +} + +static int inherit_group(struct perf_event *parent_event, + struct task_struct *parent, + struct perf_event_context *parent_ctx, + struct task_struct *child, + struct perf_event_context *child_ctx) +{ + struct perf_event *leader; + struct perf_event *sub; + struct perf_event *child_ctr; + + leader = inherit_event(parent_event, parent, parent_ctx, + child, NULL, child_ctx); + if (IS_ERR(leader)) + return PTR_ERR(leader); + list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { + child_ctr = inherit_event(sub, parent, parent_ctx, + child, leader, child_ctx); + if (IS_ERR(child_ctr)) + return PTR_ERR(child_ctr); + } + return 0; +} + +static void sync_child_event(struct perf_event *child_event, + struct task_struct *child) +{ + struct perf_event *parent_event = child_event->parent; + u64 child_val; + + if (child_event->attr.inherit_stat) + perf_event_read_event(child_event, child); + + child_val = atomic64_read(&child_event->count); + + /* + * Add back the child's count to the parent's count: + */ + atomic64_add(child_val, &parent_event->count); + atomic64_add(child_event->total_time_enabled, + &parent_event->child_total_time_enabled); + atomic64_add(child_event->total_time_running, + &parent_event->child_total_time_running); + + /* + * Remove this event from the parent's list + */ + WARN_ON_ONCE(parent_event->ctx->parent_ctx); + mutex_lock(&parent_event->child_mutex); + list_del_init(&child_event->child_list); + mutex_unlock(&parent_event->child_mutex); + + /* + * Release the parent event, if this was the last + * reference to it. + */ + fput(parent_event->filp); +} + +static void +__perf_event_exit_task(struct perf_event *child_event, + struct perf_event_context *child_ctx, + struct task_struct *child) +{ + struct perf_event *parent_event; + + update_event_times(child_event); + perf_event_remove_from_context(child_event); + + parent_event = child_event->parent; + /* + * It can happen that parent exits first, and has events + * that are still around due to the child reference. These + * events need to be zapped - but otherwise linger. + */ + if (parent_event) { + sync_child_event(child_event, child); + free_event(child_event); + } +} + +/* + * When a child task exits, feed back event values to parent events. + */ +void perf_event_exit_task(struct task_struct *child) +{ + struct perf_event *child_event, *tmp; + struct perf_event_context *child_ctx; + unsigned long flags; + + if (likely(!child->perf_event_ctxp)) { + perf_event_task(child, NULL, 0); + return; + } + + local_irq_save(flags); + /* + * We can't reschedule here because interrupts are disabled, + * and either child is current or it is a task that can't be + * scheduled, so we are now safe from rescheduling changing + * our context. + */ + child_ctx = child->perf_event_ctxp; + __perf_event_task_sched_out(child_ctx); + + /* + * Take the context lock here so that if find_get_context is + * reading child->perf_event_ctxp, we wait until it has + * incremented the context's refcount before we do put_ctx below. + */ + spin_lock(&child_ctx->lock); + child->perf_event_ctxp = NULL; + /* + * If this context is a clone; unclone it so it can't get + * swapped to another process while we're removing all + * the events from it. + */ + unclone_ctx(child_ctx); + spin_unlock_irqrestore(&child_ctx->lock, flags); + + /* + * Report the task dead after unscheduling the events so that we + * won't get any samples after PERF_RECORD_EXIT. We can however still + * get a few PERF_RECORD_READ events. + */ + perf_event_task(child, child_ctx, 0); + + /* + * We can recurse on the same lock type through: + * + * __perf_event_exit_task() + * sync_child_event() + * fput(parent_event->filp) + * perf_release() + * mutex_lock(&ctx->mutex) + * + * But since its the parent context it won't be the same instance. + */ + mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); + +again: + list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, + group_entry) + __perf_event_exit_task(child_event, child_ctx, child); + + /* + * If the last event was a group event, it will have appended all + * its siblings to the list, but we obtained 'tmp' before that which + * will still point to the list head terminating the iteration. + */ + if (!list_empty(&child_ctx->group_list)) + goto again; + + mutex_unlock(&child_ctx->mutex); + + put_ctx(child_ctx); +} + +/* + * free an unexposed, unused context as created by inheritance by + * init_task below, used by fork() in case of fail. + */ +void perf_event_free_task(struct task_struct *task) +{ + struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event *event, *tmp; + + if (!ctx) + return; + + mutex_lock(&ctx->mutex); +again: + list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { + struct perf_event *parent = event->parent; + + if (WARN_ON_ONCE(!parent)) + continue; + + mutex_lock(&parent->child_mutex); + list_del_init(&event->child_list); + mutex_unlock(&parent->child_mutex); + + fput(parent->filp); + + list_del_event(event, ctx); + free_event(event); + } + + if (!list_empty(&ctx->group_list)) + goto again; + + mutex_unlock(&ctx->mutex); + + put_ctx(ctx); +} + +/* + * Initialize the perf_event context in task_struct + */ +int perf_event_init_task(struct task_struct *child) +{ + struct perf_event_context *child_ctx, *parent_ctx; + struct perf_event_context *cloned_ctx; + struct perf_event *event; + struct task_struct *parent = current; + int inherited_all = 1; + int ret = 0; + + child->perf_event_ctxp = NULL; + + mutex_init(&child->perf_event_mutex); + INIT_LIST_HEAD(&child->perf_event_list); + + if (likely(!parent->perf_event_ctxp)) + return 0; + + /* + * This is executed from the parent task context, so inherit + * events that have been marked for cloning. + * First allocate and initialize a context for the child. + */ + + child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); + if (!child_ctx) + return -ENOMEM; + + __perf_event_init_context(child_ctx, child); + child->perf_event_ctxp = child_ctx; + get_task_struct(child); + + /* + * If the parent's context is a clone, pin it so it won't get + * swapped under us. + */ + parent_ctx = perf_pin_task_context(parent); + + /* + * No need to check if parent_ctx != NULL here; since we saw + * it non-NULL earlier, the only reason for it to become NULL + * is if we exit, and since we're currently in the middle of + * a fork we can't be exiting at the same time. + */ + + /* + * Lock the parent list. No need to lock the child - not PID + * hashed yet and not running, so nobody can access it. + */ + mutex_lock(&parent_ctx->mutex); + + /* + * We dont have to disable NMIs - we are only looking at + * the list, not manipulating it: + */ + list_for_each_entry_rcu(event, &parent_ctx->event_list, event_entry) { + if (event != event->group_leader) + continue; + + if (!event->attr.inherit) { + inherited_all = 0; + continue; + } + + ret = inherit_group(event, parent, parent_ctx, + child, child_ctx); + if (ret) { + inherited_all = 0; + break; + } + } + + if (inherited_all) { + /* + * Mark the child context as a clone of the parent + * context, or of whatever the parent is a clone of. + * Note that if the parent is a clone, it could get + * uncloned at any point, but that doesn't matter + * because the list of events and the generation + * count can't have changed since we took the mutex. + */ + cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); + if (cloned_ctx) { + child_ctx->parent_ctx = cloned_ctx; + child_ctx->parent_gen = parent_ctx->parent_gen; + } else { + child_ctx->parent_ctx = parent_ctx; + child_ctx->parent_gen = parent_ctx->generation; + } + get_ctx(child_ctx->parent_ctx); + } + + mutex_unlock(&parent_ctx->mutex); + + perf_unpin_context(parent_ctx); + + return ret; +} + +static void __cpuinit perf_event_init_cpu(int cpu) +{ + struct perf_cpu_context *cpuctx; + + cpuctx = &per_cpu(perf_cpu_context, cpu); + __perf_event_init_context(&cpuctx->ctx, NULL); + + spin_lock(&perf_resource_lock); + cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; + spin_unlock(&perf_resource_lock); + + hw_perf_event_setup(cpu); +} + +#ifdef CONFIG_HOTPLUG_CPU +static void __perf_event_exit_cpu(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event_context *ctx = &cpuctx->ctx; + struct perf_event *event, *tmp; + + list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) + __perf_event_remove_from_context(event); +} +static void perf_event_exit_cpu(int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_event_context *ctx = &cpuctx->ctx; + + mutex_lock(&ctx->mutex); + smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); + mutex_unlock(&ctx->mutex); +} +#else +static inline void perf_event_exit_cpu(int cpu) { } +#endif + +static int __cpuinit +perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action) { + + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + perf_event_init_cpu(cpu); + break; + + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + hw_perf_event_setup_online(cpu); + break; + + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + perf_event_exit_cpu(cpu); + break; + + default: + break; + } + + return NOTIFY_OK; +} + +/* + * This has to have a higher priority than migration_notifier in sched.c. + */ +static struct notifier_block __cpuinitdata perf_cpu_nb = { + .notifier_call = perf_cpu_notify, + .priority = 20, +}; + +void __init perf_event_init(void) +{ + perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, + (void *)(long)smp_processor_id()); + perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, + (void *)(long)smp_processor_id()); + register_cpu_notifier(&perf_cpu_nb); +} + +static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf) +{ + return sprintf(buf, "%d\n", perf_reserved_percpu); +} + +static ssize_t +perf_set_reserve_percpu(struct sysdev_class *class, + const char *buf, + size_t count) +{ + struct perf_cpu_context *cpuctx; + unsigned long val; + int err, cpu, mpt; + + err = strict_strtoul(buf, 10, &val); + if (err) + return err; + if (val > perf_max_events) + return -EINVAL; + + spin_lock(&perf_resource_lock); + perf_reserved_percpu = val; + for_each_online_cpu(cpu) { + cpuctx = &per_cpu(perf_cpu_context, cpu); + spin_lock_irq(&cpuctx->ctx.lock); + mpt = min(perf_max_events - cpuctx->ctx.nr_events, + perf_max_events - perf_reserved_percpu); + cpuctx->max_pertask = mpt; + spin_unlock_irq(&cpuctx->ctx.lock); + } + spin_unlock(&perf_resource_lock); + + return count; +} + +static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf) +{ + return sprintf(buf, "%d\n", perf_overcommit); +} + +static ssize_t +perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count) +{ + unsigned long val; + int err; + + err = strict_strtoul(buf, 10, &val); + if (err) + return err; + if (val > 1) + return -EINVAL; + + spin_lock(&perf_resource_lock); + perf_overcommit = val; + spin_unlock(&perf_resource_lock); + + return count; +} + +static SYSDEV_CLASS_ATTR( + reserve_percpu, + 0644, + perf_show_reserve_percpu, + perf_set_reserve_percpu + ); + +static SYSDEV_CLASS_ATTR( + overcommit, + 0644, + perf_show_overcommit, + perf_set_overcommit + ); + +static struct attribute *perfclass_attrs[] = { + &attr_reserve_percpu.attr, + &attr_overcommit.attr, + NULL +}; + +static struct attribute_group perfclass_attr_group = { + .attrs = perfclass_attrs, + .name = "perf_events", +}; + +static int __init perf_event_sysfs_init(void) +{ + return sysfs_create_group(&cpu_sysdev_class.kset.kobj, + &perfclass_attr_group); +} +device_initcall(perf_event_sysfs_init); diff --git a/kernel/sched.c b/kernel/sched.c index faf4d463bbf..291c8d213d1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include @@ -2059,7 +2059,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); #endif - perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS, + perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); } p->se.vruntime -= old_cfsrq->min_vruntime - @@ -2724,7 +2724,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); - perf_counter_task_sched_in(current, cpu_of(rq)); + perf_event_task_sched_in(current, cpu_of(rq)); finish_lock_switch(rq, prev); fire_sched_in_preempt_notifiers(current); @@ -5199,7 +5199,7 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); spin_unlock(&rq->lock); - perf_counter_task_tick(curr, cpu); + perf_event_task_tick(curr, cpu); #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); @@ -5415,7 +5415,7 @@ need_resched_nonpreemptible: if (likely(prev != next)) { sched_info_switch(prev, next); - perf_counter_task_sched_out(prev, next, cpu); + perf_event_task_sched_out(prev, next, cpu); rq->nr_switches++; rq->curr = next; @@ -7692,7 +7692,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) /* * Register at high priority so that task migration (migrate_all_tasks) * happens before everything else. This has to be lower priority than - * the notifier in the perf_counter subsystem, though. + * the notifier in the perf_event subsystem, though. */ static struct notifier_block __cpuinitdata migration_notifier = { .notifier_call = migration_call, @@ -9549,7 +9549,7 @@ void __init sched_init(void) alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); #endif /* SMP */ - perf_counter_init(); + perf_event_init(); scheduler_running = 1; } diff --git a/kernel/sys.c b/kernel/sys.c index b3f1097c76f..ea5c3bcac88 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -1511,11 +1511,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_TSC: error = SET_TSC_CTL(arg2); break; - case PR_TASK_PERF_COUNTERS_DISABLE: - error = perf_counter_task_disable(); + case PR_TASK_PERF_EVENTS_DISABLE: + error = perf_event_task_disable(); break; - case PR_TASK_PERF_COUNTERS_ENABLE: - error = perf_counter_task_enable(); + case PR_TASK_PERF_EVENTS_ENABLE: + error = perf_event_task_enable(); break; case PR_GET_TIMERSLACK: error = current->timer_slack_ns; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 68320f6b07b..515bc230ac2 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -177,4 +177,4 @@ cond_syscall(sys_eventfd); cond_syscall(sys_eventfd2); /* performance counters: */ -cond_syscall(sys_perf_counter_open); +cond_syscall(sys_perf_event_open); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1a631ba684a..6ba49c7cb12 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include @@ -964,28 +964,28 @@ static struct ctl_table kern_table[] = { .child = slow_work_sysctls, }, #endif -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS { .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_paranoid", - .data = &sysctl_perf_counter_paranoid, - .maxlen = sizeof(sysctl_perf_counter_paranoid), + .procname = "perf_event_paranoid", + .data = &sysctl_perf_event_paranoid, + .maxlen = sizeof(sysctl_perf_event_paranoid), .mode = 0644, .proc_handler = &proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_mlock_kb", - .data = &sysctl_perf_counter_mlock, - .maxlen = sizeof(sysctl_perf_counter_mlock), + .procname = "perf_event_mlock_kb", + .data = &sysctl_perf_event_mlock, + .maxlen = sizeof(sysctl_perf_event_mlock), .mode = 0644, .proc_handler = &proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_max_sample_rate", - .data = &sysctl_perf_counter_sample_rate, - .maxlen = sizeof(sysctl_perf_counter_sample_rate), + .procname = "perf_event_max_sample_rate", + .data = &sysctl_perf_event_sample_rate, + .maxlen = sizeof(sysctl_perf_event_sample_rate), .mode = 0644, .proc_handler = &proc_dointvec, }, diff --git a/kernel/timer.c b/kernel/timer.c index bbb51074680..811e5c39145 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include @@ -1187,7 +1187,7 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = __get_cpu_var(tvec_bases); - perf_counter_do_pending(); + perf_event_do_pending(); hrtimer_run_pending(); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8712ce3c6a0..233f3483ac8 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include "trace_output.h" @@ -414,7 +414,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); + perf_tp_event(sys_data->enter_id, 0, 1, rec, size); } while(0); } @@ -476,7 +476,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) rec.nr = syscall_nr; rec.ret = syscall_get_return_value(current, regs); - perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); + perf_tp_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); } int reg_prof_syscall_exit(char *name) diff --git a/mm/mmap.c b/mm/mmap.c index 26892e346d8..376492ed08f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include @@ -1220,7 +1220,7 @@ munmap_back: if (correct_wcount) atomic_inc(&inode->i_writecount); out: - perf_counter_mmap(vma); + perf_event_mmap(vma); mm->total_vm += len >> PAGE_SHIFT; vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); @@ -2308,7 +2308,7 @@ int install_special_mapping(struct mm_struct *mm, mm->total_vm += len >> PAGE_SHIFT; - perf_counter_mmap(vma); + perf_event_mmap(vma); return 0; } diff --git a/mm/mprotect.c b/mm/mprotect.c index d80311baeb2..8bc969d8112 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -300,7 +300,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); if (error) goto out; - perf_counter_mmap(vma); + perf_event_mmap(vma); nstart = tmp; if (nstart < prev->vm_end) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0aba8b6e9c5..b5f1953b614 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -318,7 +318,7 @@ export PERL_PATH LIB_FILE=libperf.a -LIB_H += ../../include/linux/perf_counter.h +LIB_H += ../../include/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h LIB_H += util/include/linux/list.h diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 043d85b7e25..1ec74161581 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -505,7 +505,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } - if (event->header.misc & PERF_EVENT_MISC_KERNEL) { + if (event->header.misc & PERF_RECORD_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -513,7 +513,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf(" ...... dso: %s\n", dso->name); - } else if (event->header.misc & PERF_EVENT_MISC_USER) { + } else if (event->header.misc & PERF_RECORD_MISC_USER) { show = SHOW_USER; level = '.'; @@ -565,7 +565,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->mmap.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", + dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), event->mmap.pid, @@ -575,7 +575,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) event->mmap.filename); if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); return 0; } @@ -591,14 +591,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) struct thread *thread; thread = threads__findnew(event->comm.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } total_comm++; @@ -614,7 +614,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->fork.pid, &threads, &last_match); parent = threads__findnew(event->fork.ppid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", + dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->fork.pid, event->fork.ppid); @@ -627,7 +627,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head) return 0; if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); return -1; } total_fork++; @@ -639,23 +639,23 @@ static int process_event(event_t *event, unsigned long offset, unsigned long head) { switch (event->header.type) { - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return process_sample_event(event, offset, head); - case PERF_EVENT_MMAP: + case PERF_RECORD_MMAP: return process_mmap_event(event, offset, head); - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event, offset, head); - case PERF_EVENT_FORK: + case PERF_RECORD_FORK: return process_fork_event(event, offset, head); /* * We dont process them right now but they are fine: */ - case PERF_EVENT_THROTTLE: - case PERF_EVENT_UNTHROTTLE: + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: return 0; default: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2459e5a22ed..a5a050af8e7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -77,7 +77,7 @@ static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; static unsigned long mmap_read_head(struct mmap_data *md) { - struct perf_counter_mmap_page *pc = md->base; + struct perf_event_mmap_page *pc = md->base; long head; head = pc->data_head; @@ -88,7 +88,7 @@ static unsigned long mmap_read_head(struct mmap_data *md) static void mmap_write_tail(struct mmap_data *md, unsigned long tail) { - struct perf_counter_mmap_page *pc = md->base; + struct perf_event_mmap_page *pc = md->base; /* * ensure all reads are done before we write the tail out. @@ -233,7 +233,7 @@ static pid_t pid_synthesize_comm_event(pid_t pid, int full) } } - comm_ev.header.type = PERF_EVENT_COMM; + comm_ev.header.type = PERF_RECORD_COMM; size = ALIGN(size, sizeof(u64)); comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); @@ -288,7 +288,7 @@ static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid) while (1) { char bf[BUFSIZ], *pbf = bf; struct mmap_event mmap_ev = { - .header = { .type = PERF_EVENT_MMAP }, + .header = { .type = PERF_RECORD_MMAP }, }; int n; size_t size; @@ -355,7 +355,7 @@ static void synthesize_all(void) static int group_fd; -static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr) +static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr) { struct perf_header_attr *h_attr; @@ -371,7 +371,7 @@ static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int static void create_counter(int counter, int cpu, pid_t pid) { - struct perf_counter_attr *attr = attrs + counter; + struct perf_event_attr *attr = attrs + counter; struct perf_header_attr *h_attr; int track = !counter; /* only the first counter needs these */ struct { @@ -417,7 +417,7 @@ static void create_counter(int counter, int cpu, pid_t pid) attr->disabled = 1; try_again: - fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); + fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0); if (fd[nr_cpu][counter] < 0) { int err = errno; @@ -444,7 +444,7 @@ try_again: printf("\n"); error("perfcounter syscall returned with %d (%s)\n", fd[nr_cpu][counter], strerror(err)); - die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n"); + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } @@ -478,7 +478,7 @@ try_again: if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { int ret; - ret = ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, multiplex_fd); + ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); assert(ret != -1); } else { event_array[nr_poll].fd = fd[nr_cpu][counter]; @@ -496,7 +496,7 @@ try_again: } } - ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE); + ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE); } static void open_counters(int cpu, pid_t pid) @@ -642,7 +642,7 @@ static int __cmd_record(int argc, const char **argv) if (done) { for (i = 0; i < nr_cpu; i++) { for (counter = 0; counter < nr_counters; counter++) - ioctl(fd[i][counter], PERF_COUNTER_IOC_DISABLE); + ioctl(fd[i][counter], PERF_EVENT_IOC_DISABLE); } } } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cdf9a8d27bb..19669c20088 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1121,7 +1121,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", + dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, @@ -1158,9 +1158,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (comm_list && !strlist__has_entry(comm_list, thread->comm)) return 0; - cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (cpumode == PERF_EVENT_MISC_KERNEL) { + if (cpumode == PERF_RECORD_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -1168,7 +1168,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf(" ...... dso: %s\n", dso->name); - } else if (cpumode == PERF_EVENT_MISC_USER) { + } else if (cpumode == PERF_RECORD_MISC_USER) { show = SHOW_USER; level = '.'; @@ -1210,7 +1210,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->mmap.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n", + dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), event->mmap.pid, @@ -1221,7 +1221,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) event->mmap.filename); if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); return 0; } @@ -1238,14 +1238,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->comm.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm_adjust(thread, event->comm.comm)) { - dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } total_comm++; @@ -1262,10 +1262,10 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->fork.pid, &threads, &last_match); parent = threads__findnew(event->fork.ppid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n", + dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n", (void *)(offset + head), (void *)(long)(event->header.size), - event->header.type == PERF_EVENT_FORK ? "FORK" : "EXIT", + event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT", event->fork.pid, event->fork.tid, event->fork.ppid, event->fork.ptid); @@ -1276,11 +1276,11 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) if (thread == parent) return 0; - if (event->header.type == PERF_EVENT_EXIT) + if (event->header.type == PERF_RECORD_EXIT) return 0; if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); return -1; } total_fork++; @@ -1291,7 +1291,7 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) static int process_lost_event(event_t *event, unsigned long offset, unsigned long head) { - dump_printf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n", + dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->lost.id, @@ -1305,7 +1305,7 @@ process_lost_event(event_t *event, unsigned long offset, unsigned long head) static int process_read_event(event_t *event, unsigned long offset, unsigned long head) { - struct perf_counter_attr *attr; + struct perf_event_attr *attr; attr = perf_header__find_attr(event->read.id, header); @@ -1319,7 +1319,7 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head) event->read.value); } - dump_printf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", + dump_printf("%p [%p]: PERF_RECORD_READ: %d %d %s %Lu\n", (void *)(offset + head), (void *)(long)(event->header.size), event->read.pid, @@ -1337,31 +1337,31 @@ process_event(event_t *event, unsigned long offset, unsigned long head) trace_event(event); switch (event->header.type) { - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return process_sample_event(event, offset, head); - case PERF_EVENT_MMAP: + case PERF_RECORD_MMAP: return process_mmap_event(event, offset, head); - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event, offset, head); - case PERF_EVENT_FORK: - case PERF_EVENT_EXIT: + case PERF_RECORD_FORK: + case PERF_RECORD_EXIT: return process_task_event(event, offset, head); - case PERF_EVENT_LOST: + case PERF_RECORD_LOST: return process_lost_event(event, offset, head); - case PERF_EVENT_READ: + case PERF_RECORD_READ: return process_read_event(event, offset, head); /* * We dont process them right now but they are fine: */ - case PERF_EVENT_THROTTLE: - case PERF_EVENT_UNTHROTTLE: + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: return 0; default: diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 275d79c6627..ea9c15c0cdf 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1573,7 +1573,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", + dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, @@ -1589,9 +1589,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } - cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (cpumode == PERF_EVENT_MISC_KERNEL) { + if (cpumode == PERF_RECORD_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -1599,7 +1599,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf(" ...... dso: %s\n", dso->name); - } else if (cpumode == PERF_EVENT_MISC_USER) { + } else if (cpumode == PERF_RECORD_MISC_USER) { show = SHOW_USER; level = '.'; @@ -1626,23 +1626,23 @@ process_event(event_t *event, unsigned long offset, unsigned long head) nr_events++; switch (event->header.type) { - case PERF_EVENT_MMAP: + case PERF_RECORD_MMAP: return 0; - case PERF_EVENT_LOST: + case PERF_RECORD_LOST: nr_lost_chunks++; nr_lost_events += event->lost.lost; return 0; - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event, offset, head); - case PERF_EVENT_EXIT ... PERF_EVENT_READ: + case PERF_RECORD_EXIT ... PERF_RECORD_READ: return 0; - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return process_sample_event(event, offset, head); - case PERF_EVENT_MAX: + case PERF_RECORD_MAX: default: return -1; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 61b828236c1..16af2d82e85 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -48,7 +48,7 @@ #include #include -static struct perf_counter_attr default_attrs[] = { +static struct perf_event_attr default_attrs[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, @@ -130,11 +130,11 @@ struct stats runtime_cycles_stats; attrs[counter].config == PERF_COUNT_##c) #define ERR_PERF_OPEN \ -"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" +"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" static void create_perf_stat_counter(int counter, int pid) { - struct perf_counter_attr *attr = attrs + counter; + struct perf_event_attr *attr = attrs + counter; if (scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | @@ -144,7 +144,7 @@ static void create_perf_stat_counter(int counter, int pid) unsigned int cpu; for (cpu = 0; cpu < nr_cpus; cpu++) { - fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); + fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0); if (fd[cpu][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[cpu][counter], strerror(errno)); @@ -154,7 +154,7 @@ static void create_perf_stat_counter(int counter, int pid) attr->disabled = 1; attr->enable_on_exec = 1; - fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); + fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); if (fd[0][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[0][counter], strerror(errno)); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 60040639627..4405681b313 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -937,21 +937,21 @@ process_event(event_t *event) switch (event->header.type) { - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event); - case PERF_EVENT_FORK: + case PERF_RECORD_FORK: return process_fork_event(event); - case PERF_EVENT_EXIT: + case PERF_RECORD_EXIT: return process_exit_event(event); - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return queue_sample_event(event); /* * We dont process them right now but they are fine: */ - case PERF_EVENT_MMAP: - case PERF_EVENT_THROTTLE: - case PERF_EVENT_UNTHROTTLE: + case PERF_RECORD_MMAP: + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: return 0; default: diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4002ccb3675..1ca88896eee 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -901,7 +901,7 @@ struct mmap_data { static unsigned int mmap_read_head(struct mmap_data *md) { - struct perf_counter_mmap_page *pc = md->base; + struct perf_event_mmap_page *pc = md->base; int head; head = pc->data_head; @@ -977,9 +977,9 @@ static void mmap_read_counter(struct mmap_data *md) old += size; - if (event->header.type == PERF_EVENT_SAMPLE) { + if (event->header.type == PERF_RECORD_SAMPLE) { int user = - (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER; + (event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER; process_event(event->ip.ip, md->counter, user); } } @@ -1005,7 +1005,7 @@ int group_fd; static void start_counter(int i, int counter) { - struct perf_counter_attr *attr; + struct perf_event_attr *attr; int cpu; cpu = profile_cpu; @@ -1019,7 +1019,7 @@ static void start_counter(int i, int counter) attr->inherit = (cpu < 0) && inherit; try_again: - fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0); + fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); if (fd[i][counter] < 0) { int err = errno; @@ -1044,7 +1044,7 @@ try_again: printf("\n"); error("perfcounter syscall returned with %d (%s)\n", fd[i][counter], strerror(err)); - die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n"); + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } assert(fd[i][counter] >= 0); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 914ab366e36..e9d256e2f47 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -35,14 +35,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->comm.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } total_comm++; @@ -82,7 +82,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", + dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, @@ -98,9 +98,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } - cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (cpumode == PERF_EVENT_MISC_KERNEL) { + if (cpumode == PERF_RECORD_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -108,7 +108,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf(" ...... dso: %s\n", dso->name); - } else if (cpumode == PERF_EVENT_MISC_USER) { + } else if (cpumode == PERF_RECORD_MISC_USER) { show = SHOW_USER; level = '.'; @@ -146,19 +146,19 @@ process_event(event_t *event, unsigned long offset, unsigned long head) trace_event(event); switch (event->header.type) { - case PERF_EVENT_MMAP ... PERF_EVENT_LOST: + case PERF_RECORD_MMAP ... PERF_RECORD_LOST: return 0; - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event, offset, head); - case PERF_EVENT_EXIT ... PERF_EVENT_READ: + case PERF_RECORD_EXIT ... PERF_RECORD_READ: return 0; - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return process_sample_event(event, offset, head); - case PERF_EVENT_MAX: + case PERF_RECORD_MAX: default: return -1; } diff --git a/tools/perf/design.txt b/tools/perf/design.txt index f71e0d245cb..f1946d107b1 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -18,10 +18,10 @@ underlying hardware counters. Performance counters are accessed via special file descriptors. There's one file descriptor per virtual counter used. -The special file descriptor is opened via the perf_counter_open() +The special file descriptor is opened via the perf_event_open() system call: - int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr, + int sys_perf_event_open(struct perf_event_hw_event *hw_event_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); @@ -32,9 +32,9 @@ can be used to set the blocking mode, etc. Multiple counters can be kept open at a time, and the counters can be poll()ed. -When creating a new counter fd, 'perf_counter_hw_event' is: +When creating a new counter fd, 'perf_event_hw_event' is: -struct perf_counter_hw_event { +struct perf_event_hw_event { /* * The MSB of the config word signifies if the rest contains cpu * specific (raw) counter configuration data, if unset, the next @@ -93,7 +93,7 @@ specified by 'event_id': /* * Generalized performance counter event types, used by the hw_event.event_id - * parameter of the sys_perf_counter_open() syscall: + * parameter of the sys_perf_event_open() syscall: */ enum hw_event_ids { /* @@ -159,7 +159,7 @@ in size. * reads on the counter should return the indicated quantities, * in increasing order of bit value, after the counter value. */ -enum perf_counter_read_format { +enum perf_event_read_format { PERF_FORMAT_TOTAL_TIME_ENABLED = 1, PERF_FORMAT_TOTAL_TIME_RUNNING = 2, }; @@ -178,7 +178,7 @@ interrupt: * Bits that can be set in hw_event.record_type to request information * in the overflow packets. */ -enum perf_counter_record_format { +enum perf_event_record_format { PERF_RECORD_IP = 1U << 0, PERF_RECORD_TID = 1U << 1, PERF_RECORD_TIME = 1U << 2, @@ -228,7 +228,7 @@ these events are recorded in the ring-buffer (see below). The 'comm' bit allows tracking of process comm data on process creation. This too is recorded in the ring-buffer (see below). -The 'pid' parameter to the perf_counter_open() system call allows the +The 'pid' parameter to the perf_event_open() system call allows the counter to be specific to a task: pid == 0: if the pid parameter is zero, the counter is attached to the @@ -258,7 +258,7 @@ The 'flags' parameter is currently unused and must be zero. The 'group_fd' parameter allows counter "groups" to be set up. A counter group has one counter which is the group "leader". The leader -is created first, with group_fd = -1 in the perf_counter_open call +is created first, with group_fd = -1 in the perf_event_open call that creates it. The rest of the group members are created subsequently, with group_fd giving the fd of the group leader. (A single counter on its own is created with group_fd = -1 and is @@ -277,13 +277,13 @@ tracking are logged into a ring-buffer. This ring-buffer is created and accessed through mmap(). The mmap size should be 1+2^n pages, where the first page is a meta-data page -(struct perf_counter_mmap_page) that contains various bits of information such +(struct perf_event_mmap_page) that contains various bits of information such as where the ring-buffer head is. /* * Structure of the page that can be mapped via mmap */ -struct perf_counter_mmap_page { +struct perf_event_mmap_page { __u32 version; /* version number of this structure */ __u32 compat_version; /* lowest version this is compat with */ @@ -317,7 +317,7 @@ struct perf_counter_mmap_page { * Control data for the mmap() data buffer. * * User-space reading this value should issue an rmb(), on SMP capable - * platforms, after reading this value -- see perf_counter_wakeup(). + * platforms, after reading this value -- see perf_event_wakeup(). */ __u32 data_head; /* head in the data section */ }; @@ -327,9 +327,9 @@ NOTE: the hw-counter userspace bits are arch specific and are currently only The following 2^n pages are the ring-buffer which contains events of the form: -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (1 << 1) -#define PERF_EVENT_MISC_OVERFLOW (1 << 2) +#define PERF_RECORD_MISC_KERNEL (1 << 0) +#define PERF_RECORD_MISC_USER (1 << 1) +#define PERF_RECORD_MISC_OVERFLOW (1 << 2) struct perf_event_header { __u32 type; @@ -353,8 +353,8 @@ enum perf_event_type { * char filename[]; * }; */ - PERF_EVENT_MMAP = 1, - PERF_EVENT_MUNMAP = 2, + PERF_RECORD_MMAP = 1, + PERF_RECORD_MUNMAP = 2, /* * struct { @@ -364,10 +364,10 @@ enum perf_event_type { * char comm[]; * }; */ - PERF_EVENT_COMM = 3, + PERF_RECORD_COMM = 3, /* - * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field + * When header.misc & PERF_RECORD_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* * * struct { @@ -397,7 +397,7 @@ Notification of new events is possible through poll()/select()/epoll() and fcntl() managing signals. Normally a notification is generated for every page filled, however one can -additionally set perf_counter_hw_event.wakeup_events to generate one every +additionally set perf_event_hw_event.wakeup_events to generate one every so many counter overflow events. Future work will include a splice() interface to the ring-buffer. @@ -409,11 +409,11 @@ events but does continue to exist and maintain its count value. An individual counter or counter group can be enabled with - ioctl(fd, PERF_COUNTER_IOC_ENABLE); + ioctl(fd, PERF_EVENT_IOC_ENABLE); or disabled with - ioctl(fd, PERF_COUNTER_IOC_DISABLE); + ioctl(fd, PERF_EVENT_IOC_DISABLE); Enabling or disabling the leader of a group enables or disables the whole group; that is, while the group leader is disabled, none of the @@ -424,16 +424,16 @@ other counter. Additionally, non-inherited overflow counters can use - ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr); + ioctl(fd, PERF_EVENT_IOC_REFRESH, nr); to enable a counter for 'nr' events, after which it gets disabled again. A process can enable or disable all the counter groups that are attached to it, using prctl: - prctl(PR_TASK_PERF_COUNTERS_ENABLE); + prctl(PR_TASK_PERF_EVENTS_ENABLE); - prctl(PR_TASK_PERF_COUNTERS_DISABLE); + prctl(PR_TASK_PERF_EVENTS_DISABLE); This applies to all counters on the current process, whether created by this process or by another, and doesn't affect any counters that @@ -447,11 +447,11 @@ Arch requirements If your architecture does not have hardware performance metrics, you can still use the generic software counters based on hrtimers for sampling. -So to start with, in order to add HAVE_PERF_COUNTERS to your Kconfig, you +So to start with, in order to add HAVE_PERF_EVENTS to your Kconfig, you will need at least this: - - asm/perf_counter.h - a basic stub will suffice at first + - asm/perf_event.h - a basic stub will suffice at first - support for atomic64 types (and associated helper functions) - - set_perf_counter_pending() implemented + - set_perf_event_pending() implemented If your architecture does have hardware capabilities, you can override the -weak stub hw_perf_counter_init() to register hardware counters. +weak stub hw_perf_event_init() to register hardware counters. diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 2abeb20d0bf..8cc4623afd6 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -52,15 +52,15 @@ #include #include -#include "../../include/linux/perf_counter.h" +#include "../../include/linux/perf_event.h" #include "util/types.h" /* - * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all + * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all * counters in the current task. */ -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 +#define PR_TASK_PERF_EVENTS_DISABLE 31 +#define PR_TASK_PERF_EVENTS_ENABLE 32 #ifndef NSEC_PER_SEC # define NSEC_PER_SEC 1000000000ULL @@ -90,12 +90,12 @@ static inline unsigned long long rdclock(void) _min1 < _min2 ? _min1 : _min2; }) static inline int -sys_perf_counter_open(struct perf_counter_attr *attr, +sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { attr->size = sizeof(*attr); - return syscall(__NR_perf_counter_open, attr, pid, cpu, + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 018d414a09d..2c9c26d6ded 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -1,5 +1,5 @@ -#ifndef __PERF_EVENT_H -#define __PERF_EVENT_H +#ifndef __PERF_RECORD_H +#define __PERF_RECORD_H #include "../perf.h" #include "util.h" #include diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index bb4fca3efcc..e306857b2c2 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -9,7 +9,7 @@ /* * Create new perf.data header attribute: */ -struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr) +struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr) { struct perf_header_attr *self = malloc(sizeof(*self)); @@ -134,7 +134,7 @@ struct perf_file_section { }; struct perf_file_attr { - struct perf_counter_attr attr; + struct perf_event_attr attr; struct perf_file_section ids; }; @@ -320,7 +320,7 @@ u64 perf_header__sample_type(struct perf_header *header) return type; } -struct perf_counter_attr * +struct perf_event_attr * perf_header__find_attr(u64 id, struct perf_header *header) { int i; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 7b0e84a8717..a0761bc7863 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -1,12 +1,12 @@ #ifndef _PERF_HEADER_H #define _PERF_HEADER_H -#include "../../../include/linux/perf_counter.h" +#include "../../../include/linux/perf_event.h" #include #include "types.h" struct perf_header_attr { - struct perf_counter_attr attr; + struct perf_event_attr attr; int ids, size; u64 *id; off_t id_offset; @@ -34,11 +34,11 @@ char *perf_header__find_event(u64 id); struct perf_header_attr * -perf_header_attr__new(struct perf_counter_attr *attr); +perf_header_attr__new(struct perf_event_attr *attr); void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); u64 perf_header__sample_type(struct perf_header *header); -struct perf_counter_attr * +struct perf_event_attr * perf_header__find_attr(u64 id, struct perf_header *header); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 89172fd0038..13ab4b842d4 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -10,7 +10,7 @@ int nr_counters; -struct perf_counter_attr attrs[MAX_COUNTERS]; +struct perf_event_attr attrs[MAX_COUNTERS]; struct event_symbol { u8 type; @@ -48,13 +48,13 @@ static struct event_symbol event_symbols[] = { { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, }; -#define __PERF_COUNTER_FIELD(config, name) \ - ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) +#define __PERF_EVENT_FIELD(config, name) \ + ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) -#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) -#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) -#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) -#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) +#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) +#define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG) +#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) +#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) static const char *hw_event_names[] = { "cycles", @@ -352,7 +352,7 @@ static int parse_aliases(const char **str, const char *names[][MAX_ALIASES], int } static enum event_result -parse_generic_hw_event(const char **str, struct perf_counter_attr *attr) +parse_generic_hw_event(const char **str, struct perf_event_attr *attr) { const char *s = *str; int cache_type = -1, cache_op = -1, cache_result = -1; @@ -417,7 +417,7 @@ parse_single_tracepoint_event(char *sys_name, const char *evt_name, unsigned int evt_length, char *flags, - struct perf_counter_attr *attr, + struct perf_event_attr *attr, const char **strp) { char evt_path[MAXPATHLEN]; @@ -505,7 +505,7 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags) static enum event_result parse_tracepoint_event(const char **strp, - struct perf_counter_attr *attr) + struct perf_event_attr *attr) { const char *evt_name; char *flags; @@ -563,7 +563,7 @@ static int check_events(const char *str, unsigned int i) } static enum event_result -parse_symbolic_event(const char **strp, struct perf_counter_attr *attr) +parse_symbolic_event(const char **strp, struct perf_event_attr *attr) { const char *str = *strp; unsigned int i; @@ -582,7 +582,7 @@ parse_symbolic_event(const char **strp, struct perf_counter_attr *attr) } static enum event_result -parse_raw_event(const char **strp, struct perf_counter_attr *attr) +parse_raw_event(const char **strp, struct perf_event_attr *attr) { const char *str = *strp; u64 config; @@ -601,7 +601,7 @@ parse_raw_event(const char **strp, struct perf_counter_attr *attr) } static enum event_result -parse_numeric_event(const char **strp, struct perf_counter_attr *attr) +parse_numeric_event(const char **strp, struct perf_event_attr *attr) { const char *str = *strp; char *endp; @@ -623,7 +623,7 @@ parse_numeric_event(const char **strp, struct perf_counter_attr *attr) } static enum event_result -parse_event_modifier(const char **strp, struct perf_counter_attr *attr) +parse_event_modifier(const char **strp, struct perf_event_attr *attr) { const char *str = *strp; int eu = 1, ek = 1, eh = 1; @@ -656,7 +656,7 @@ parse_event_modifier(const char **strp, struct perf_counter_attr *attr) * Symbolic names are (almost) exactly matched. */ static enum event_result -parse_event_symbols(const char **str, struct perf_counter_attr *attr) +parse_event_symbols(const char **str, struct perf_event_attr *attr) { enum event_result ret; @@ -711,7 +711,7 @@ static void store_event_type(const char *orgname) int parse_events(const struct option *opt __used, const char *str, int unset __used) { - struct perf_counter_attr attr; + struct perf_event_attr attr; enum event_result ret; if (strchr(str, ':')) diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 60704c15961..30c60811284 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -16,7 +16,7 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config); extern int nr_counters; -extern struct perf_counter_attr attrs[MAX_COUNTERS]; +extern struct perf_event_attr attrs[MAX_COUNTERS]; extern const char *event_name(int ctr); extern const char *__event_name(int type, u64 config); diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 1fd824c1f1c..af4b0573b37 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -480,12 +480,12 @@ out: } static struct tracepoint_path * -get_tracepoints_path(struct perf_counter_attr *pattrs, int nb_counters) +get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events) { struct tracepoint_path path, *ppath = &path; int i; - for (i = 0; i < nb_counters; i++) { + for (i = 0; i < nb_events; i++) { if (pattrs[i].type != PERF_TYPE_TRACEPOINT) continue; ppath->next = tracepoint_id_to_path(pattrs[i].config); @@ -496,7 +496,7 @@ get_tracepoints_path(struct perf_counter_attr *pattrs, int nb_counters) return path.next; } -void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters) +void read_tracing_data(struct perf_event_attr *pattrs, int nb_events) { char buf[BUFSIZ]; struct tracepoint_path *tps; @@ -530,7 +530,7 @@ void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters) page_size = getpagesize(); write_or_die(&page_size, 4); - tps = get_tracepoints_path(pattrs, nb_counters); + tps = get_tracepoints_path(pattrs, nb_events); read_header_files(); read_ftrace_files(tps); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index d35ebf1e29f..693f815c942 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -240,6 +240,6 @@ unsigned long long raw_field_value(struct event *event, const char *name, void *data); void *raw_field_ptr(struct event *event, const char *name, void *data); -void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters); +void read_tracing_data(struct perf_event_attr *pattrs, int nb_events); #endif /* _TRACE_EVENTS_H */ -- cgit v1.2.3-70-g09d2 From d19f352484467a5e518639ddff0554669c10ffab Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Sep 2009 17:01:53 -0700 Subject: ksm: define MADV_MERGEABLE and MADV_UNMERGEABLE The out-of-tree KSM used ioctls on fds cloned from /dev/ksm to register a memory area for merging: we prefer now to use an madvise(2) interface. This patch just defines MADV_MERGEABLE (to tell KSM it may merge pages in this area found identical to pages in other mergeable areas) and MADV_UNMERGEABLE (to undo that). Most architectures use asm-generic, but alpha, mips, parisc, xtensa need their own definitions: included here for mmotm convenience, but we'll probably want to split this and feed pieces to arch maintainers. Based upon earlier patches by Chris Wright and Izik Eidus. Signed-off-by: Hugh Dickins Signed-off-by: Chris Wright Signed-off-by: Izik Eidus Cc: Michael Kerrisk Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Ralf Baechle Cc: Kyle McMartin Cc: Helge Deller Cc: Chris Zankel Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Wu Fengguang Cc: Balbir Singh Cc: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Lee Schermerhorn Cc: Avi Kivity Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/mman.h | 3 +++ arch/mips/include/asm/mman.h | 3 +++ arch/parisc/include/asm/mman.h | 3 +++ arch/xtensa/include/asm/mman.h | 3 +++ include/asm-generic/mman-common.h | 3 +++ 5 files changed, 15 insertions(+) (limited to 'arch/mips/include/asm') diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h index 90d7c35d286..c77c55756a7 100644 --- a/arch/alpha/include/asm/mman.h +++ b/arch/alpha/include/asm/mman.h @@ -48,6 +48,9 @@ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h index e4d6f1fb1cf..f15554d1518 100644 --- a/arch/mips/include/asm/mman.h +++ b/arch/mips/include/asm/mman.h @@ -71,6 +71,9 @@ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h index defe752cc99..a12d9d43f50 100644 --- a/arch/parisc/include/asm/mman.h +++ b/arch/parisc/include/asm/mman.h @@ -54,6 +54,9 @@ #define MADV_16M_PAGES 24 /* Use 16 Megabyte pages */ #define MADV_64M_PAGES 26 /* Use 64 Megabyte pages */ +#define MADV_MERGEABLE 65 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */ + /* compatibility flags */ #define MAP_FILE 0 #define MAP_VARIABLE 0 diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h index 9b92620c8a1..6e55b4d1f9c 100644 --- a/arch/xtensa/include/asm/mman.h +++ b/arch/xtensa/include/asm/mman.h @@ -78,6 +78,9 @@ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h index 3b69ad34189..dd63bd38864 100644 --- a/include/asm-generic/mman-common.h +++ b/include/asm-generic/mman-common.h @@ -35,6 +35,9 @@ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ + /* compatibility flags */ #define MAP_FILE 0 -- cgit v1.2.3-70-g09d2 From 62eede62dafb4a6633eae7ffbeb34c60dba5e7b1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Sep 2009 17:03:34 -0700 Subject: mm: ZERO_PAGE without PTE_SPECIAL Reinstate anonymous use of ZERO_PAGE to all architectures, not just to those which __HAVE_ARCH_PTE_SPECIAL: as suggested by Nick Piggin. Contrary to how I'd imagined it, there's nothing ugly about this, just a zero_pfn test built into one or another block of vm_normal_page(). But the MIPS ZERO_PAGE-of-many-colours case demands is_zero_pfn() and my_zero_pfn() inlines. Reinstate its mremap move_pte() shuffling of ZERO_PAGEs we did from 2.6.17 to 2.6.19? Not unless someone shouts for that: it would have to take vm_flags to weed out some cases. Signed-off-by: Hugh Dickins Cc: Rik van Riel Reviewed-by: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Mel Gorman Cc: Minchan Kim Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/asm/pgtable.h | 10 ++++++++++ mm/memory.c | 36 +++++++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 11 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 1a9f9b25755..d6eb6134abe 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -76,6 +76,16 @@ extern unsigned long zero_page_mask; #define ZERO_PAGE(vaddr) \ (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))) +#define is_zero_pfn is_zero_pfn +static inline int is_zero_pfn(unsigned long pfn) +{ + extern unsigned long zero_pfn; + unsigned long offset_from_zero_pfn = pfn - zero_pfn; + return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); +} + +#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) + extern void paging_init(void); /* diff --git a/mm/memory.c b/mm/memory.c index 5c694f2b9c1..9bdbd10cb41 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -108,7 +108,7 @@ static int __init disable_randmaps(char *s) } __setup("norandmaps", disable_randmaps); -static unsigned long zero_pfn __read_mostly; +unsigned long zero_pfn __read_mostly; /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() @@ -455,6 +455,20 @@ static inline int is_cow_mapping(unsigned int flags) return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } +#ifndef is_zero_pfn +static inline int is_zero_pfn(unsigned long pfn) +{ + return pfn == zero_pfn; +} +#endif + +#ifndef my_zero_pfn +static inline unsigned long my_zero_pfn(unsigned long addr) +{ + return zero_pfn; +} +#endif + /* * vm_normal_page -- This function gets the "struct page" associated with a pte. * @@ -512,7 +526,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, goto check_pfn; if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) return NULL; - if (pfn != zero_pfn) + if (!is_zero_pfn(pfn)) print_bad_pte(vma, addr, pte, NULL); return NULL; } @@ -534,6 +548,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, } } + if (is_zero_pfn(pfn)) + return NULL; check_pfn: if (unlikely(pfn > highest_memmap_pfn)) { print_bad_pte(vma, addr, pte, NULL); @@ -1161,7 +1177,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, page = vm_normal_page(vma, address, pte); if (unlikely(!page)) { if ((flags & FOLL_DUMP) || - pte_pfn(pte) != zero_pfn) + !is_zero_pfn(pte_pfn(pte))) goto bad_page; page = pte_page(pte); } @@ -1443,10 +1459,6 @@ struct page *get_dump_page(unsigned long addr) if (__get_user_pages(current, current->mm, addr, 1, FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1) return NULL; - if (page == ZERO_PAGE(0)) { - page_cache_release(page); - return NULL; - } flush_cache_page(vma, addr, page_to_pfn(page)); return page; } @@ -1629,7 +1641,8 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, * If we don't have pte special, then we have to use the pfn_valid() * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* * refcount the page if pfn_valid is true (hence insert_page rather - * than insert_pfn). + * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP + * without pte special, it would there be refcounted as a normal page. */ if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) { struct page *page; @@ -2097,7 +2110,7 @@ gotten: if (unlikely(anon_vma_prepare(vma))) goto oom; - if (pte_pfn(orig_pte) == zero_pfn) { + if (is_zero_pfn(pte_pfn(orig_pte))) { new_page = alloc_zeroed_user_highpage_movable(vma, address); if (!new_page) goto oom; @@ -2658,8 +2671,9 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, spinlock_t *ptl; pte_t entry; - if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) { - entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot)); + if (!(flags & FAULT_FLAG_WRITE)) { + entry = pte_mkspecial(pfn_pte(my_zero_pfn(address), + vma->vm_page_prot)); ptl = pte_lockptr(mm, pmd); spin_lock(ptl); if (!pte_none(*page_table)) -- cgit v1.2.3-70-g09d2 From 90f72aa58bbf076b68e289fbd71eb829bc505923 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 21 Sep 2009 17:03:45 -0700 Subject: mm: add MAP_HUGETLB for mmaping pseudo-anonymous huge page regions Add a flag for mmap that will be used to request a huge page region that will look like anonymous memory to user space. This is accomplished by using a file on the internal vfsmount. MAP_HUGETLB is a modifier of MAP_ANONYMOUS and so must be specified with it. The region will behave the same as a MAP_ANONYMOUS region using small pages. The patch also adds the MAP_STACK flag, which was previously defined only on some architectures but not on others. Since MAP_STACK is meant to be a hint only, architectures can define it without assigning a specific meaning to it. Signed-off-by: Arnd Bergmann Cc: Eric B Munson Cc: Hugh Dickins Cc: David Rientjes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/mman.h | 2 ++ arch/arm/include/asm/mman.h | 2 ++ arch/avr32/include/asm/mman.h | 2 ++ arch/cris/include/asm/mman.h | 2 ++ arch/frv/include/asm/mman.h | 2 ++ arch/h8300/include/asm/mman.h | 2 ++ arch/ia64/include/asm/mman.h | 2 ++ arch/m32r/include/asm/mman.h | 2 ++ arch/m68k/include/asm/mman.h | 2 ++ arch/mips/include/asm/mman.h | 2 ++ arch/mn10300/include/asm/mman.h | 2 ++ arch/parisc/include/asm/mman.h | 2 ++ arch/powerpc/include/asm/mman.h | 2 ++ arch/s390/include/asm/mman.h | 2 ++ arch/sparc/include/asm/mman.h | 2 ++ arch/xtensa/include/asm/mman.h | 2 ++ include/asm-generic/mman.h | 1 + 17 files changed, 33 insertions(+) (limited to 'arch/mips/include/asm') diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h index c77c55756a7..99c56d47879 100644 --- a/arch/alpha/include/asm/mman.h +++ b/arch/alpha/include/asm/mman.h @@ -28,6 +28,8 @@ #define MAP_NORESERVE 0x10000 /* don't check for reservations */ #define MAP_POPULATE 0x20000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x40000 /* do not block on IO */ +#define MAP_STACK 0x80000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x100000 /* create a huge page mapping */ #define MS_ASYNC 1 /* sync memory asynchronously */ #define MS_SYNC 2 /* synchronous memory sync */ diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h index fc26976d8e3..6464d471bc7 100644 --- a/arch/arm/include/asm/mman.h +++ b/arch/arm/include/asm/mman.h @@ -10,6 +10,8 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/arch/avr32/include/asm/mman.h b/arch/avr32/include/asm/mman.h index 9a92b15f6a6..38cea1b597c 100644 --- a/arch/avr32/include/asm/mman.h +++ b/arch/avr32/include/asm/mman.h @@ -10,6 +10,8 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/asm/mman.h index b7f0afba3ce..de6b903b22c 100644 --- a/arch/cris/include/asm/mman.h +++ b/arch/cris/include/asm/mman.h @@ -12,6 +12,8 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/arch/frv/include/asm/mman.h b/arch/frv/include/asm/mman.h index 58c1d11e2ac..1939343322b 100644 --- a/arch/frv/include/asm/mman.h +++ b/arch/frv/include/asm/mman.h @@ -10,6 +10,8 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/arch/h8300/include/asm/mman.h b/arch/h8300/include/asm/mman.h index cf35f0a6f12..eacacd04032 100644 --- a/arch/h8300/include/asm/mman.h +++ b/arch/h8300/include/asm/mman.h @@ -10,6 +10,8 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h index 48cf8b98a0b..cf55884e7f3 100644 --- a/arch/ia64/include/asm/mman.h +++ b/arch/ia64/include/asm/mman.h @@ -18,6 +18,8 @@ #define MAP_NORESERVE 0x04000 /* don't check for reservations */ #define MAP_POPULATE 0x08000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/arch/m32r/include/asm/mman.h b/arch/m32r/include/asm/mman.h index 04a5f40aa40..d191089808f 100644 --- a/arch/m32r/include/asm/mman.h +++ b/arch/m32r/include/asm/mman.h @@ -10,6 +10,8 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/arch/m68k/include/asm/mman.h b/arch/m68k/include/asm/mman.h index 9f5c4c4b3c7..c421fef55f5 100644 --- a/arch/m68k/include/asm/mman.h +++ b/arch/m68k/include/asm/mman.h @@ -10,6 +10,8 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h index f15554d1518..a2250f390a2 100644 --- a/arch/mips/include/asm/mman.h +++ b/arch/mips/include/asm/mman.h @@ -46,6 +46,8 @@ #define MAP_LOCKED 0x8000 /* pages are locked */ #define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x20000 /* do not block on IO */ +#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x80000 /* create a huge page mapping */ /* * Flags for msync diff --git a/arch/mn10300/include/asm/mman.h b/arch/mn10300/include/asm/mman.h index d04fac1da5a..94611c356bb 100644 --- a/arch/mn10300/include/asm/mman.h +++ b/arch/mn10300/include/asm/mman.h @@ -21,6 +21,8 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h index a12d9d43f50..9749c8afe83 100644 --- a/arch/parisc/include/asm/mman.h +++ b/arch/parisc/include/asm/mman.h @@ -22,6 +22,8 @@ #define MAP_GROWSDOWN 0x8000 /* stack-like segment */ #define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x20000 /* do not block on IO */ +#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x80000 /* create a huge page mapping */ #define MS_SYNC 1 /* synchronous memory sync */ #define MS_ASYNC 2 /* sync memory asynchronously */ diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 7b1c49811a2..d4a7f645c5d 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -25,6 +25,8 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #ifdef __KERNEL__ #ifdef CONFIG_PPC64 diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h index f63fe7b431e..22714ca181a 100644 --- a/arch/s390/include/asm/mman.h +++ b/arch/s390/include/asm/mman.h @@ -18,6 +18,8 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h index 988192e8e95..c3029ad6619 100644 --- a/arch/sparc/include/asm/mman.h +++ b/arch/sparc/include/asm/mman.h @@ -20,6 +20,8 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h index 6e55b4d1f9c..fca4db425f6 100644 --- a/arch/xtensa/include/asm/mman.h +++ b/arch/xtensa/include/asm/mman.h @@ -53,6 +53,8 @@ #define MAP_LOCKED 0x8000 /* pages are locked */ #define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x20000 /* do not block on IO */ +#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x80000 /* create a huge page mapping */ /* * Flags for msync diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h index 7cab4de2bca..32c8bd6a196 100644 --- a/include/asm-generic/mman.h +++ b/include/asm-generic/mman.h @@ -11,6 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -- cgit v1.2.3-70-g09d2 From f5564b823bbe211bab98d12de7b1f7d42cfb4a87 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:25 -0600 Subject: cpumask: remove the now-obsoleted pcibus_to_cpumask(): mips cpumask_of_pcibus() is the new version. Signed-off-by: Rusty Russell --- arch/mips/include/asm/mach-ip27/topology.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 23059170700..697244a7d39 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -29,7 +29,6 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS]; struct pci_bus; extern int pcibus_to_node(struct pci_bus *); -#define pcibus_to_cpumask(bus) (cpu_online_map) #define cpumask_of_pcibus(bus) (cpu_online_mask) extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; -- cgit v1.2.3-70-g09d2 From 29c337a034b5526e80a785409d15d3b7c7edecf4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:26 -0600 Subject: cpumask: remove obsolete node_to_cpumask now everyone uses cpumask_of_node Signed-off-by: Rusty Russell --- arch/alpha/include/asm/topology.h | 17 ----------------- arch/ia64/include/asm/topology.h | 1 - arch/mips/include/asm/mach-ip27/topology.h | 1 - arch/mips/sgi-ip27/ip27-memory.c | 2 +- arch/powerpc/include/asm/topology.h | 5 ----- arch/sh/include/asm/topology.h | 1 - arch/sparc/include/asm/topology_64.h | 14 -------------- include/asm-generic/topology.h | 17 ----------------- 8 files changed, 1 insertion(+), 57 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h index f5bd6cd4b3b..36b3a30ba0e 100644 --- a/arch/alpha/include/asm/topology.h +++ b/arch/alpha/include/asm/topology.h @@ -22,23 +22,6 @@ static inline int cpu_to_node(int cpu) return node; } -static inline cpumask_t node_to_cpumask(int node) -{ - cpumask_t node_cpu_mask = CPU_MASK_NONE; - int cpu; - - for_each_online_cpu(cpu) { - if (cpu_to_node(cpu) == node) - cpu_set(cpu, node_cpu_mask); - } - -#ifdef DEBUG_NUMA - printk("node %d: cpu_mask: %016lx\n", node, node_cpu_mask); -#endif - - return node_cpu_mask; -} - extern struct cpumask node_to_cpumask_map[]; /* FIXME: This is dumb, recalculating every time. But simple. */ static const struct cpumask *cpumask_of_node(int node) diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index d0141fbf51d..e85da7f1db5 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -33,7 +33,6 @@ /* * Returns a bitmask of CPUs on Node 'node'. */ -#define node_to_cpumask(node) (node_to_cpu_mask[node]) #define cpumask_of_node(node) (&node_to_cpu_mask[node]) /* diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 697244a7d39..f6837422fe6 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -24,7 +24,6 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS]; #define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid) #define parent_node(node) (node) -#define node_to_cpumask(node) (hub_data(node)->h_cpus) #define cpumask_of_node(node) (&hub_data(node)->h_cpus) struct pci_bus; extern int pcibus_to_node(struct pci_bus *); diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 060d853d7b3..f61c164d1e6 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -421,7 +421,7 @@ static void __init node_mem_init(cnodeid_t node) /* * A node with nothing. We use it to avoid any special casing in - * node_to_cpumask + * cpumask_of_node */ static struct node_data null_node = { .hub = { diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 9a3300d6a27..829bf3c9b68 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -17,11 +17,6 @@ static inline int cpu_to_node(int cpu) #define parent_node(node) (node) -static inline cpumask_t node_to_cpumask(int node) -{ - return numa_cpumask_lookup_table[node]; -} - #define cpumask_of_node(node) (&numa_cpumask_lookup_table[node]) int of_node_to_nid(struct device_node *device); diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index f8c40cc6505..65e7bd2f224 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -31,7 +31,6 @@ #define cpu_to_node(cpu) ((void)(cpu),0) #define parent_node(node) ((void)(node),0) -#define node_to_cpumask(node) ((void)node, cpu_online_map) #define cpumask_of_node(node) ((void)node, cpu_online_mask) #define pcibus_to_node(bus) ((void)(bus), -1) diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 26cd25c0839..75752e106f4 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -12,22 +12,8 @@ static inline int cpu_to_node(int cpu) #define parent_node(node) (node) -static inline cpumask_t node_to_cpumask(int node) -{ - return numa_cpumask_lookup_table[node]; -} #define cpumask_of_node(node) (&numa_cpumask_lookup_table[node]) -/* - * Returns a pointer to the cpumask of CPUs on Node 'node'. - * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" - */ -#define node_to_cpumask_ptr(v, node) \ - cpumask_t *v = &(numa_cpumask_lookup_table[node]) - -#define node_to_cpumask_ptr_next(v, node) \ - v = &(numa_cpumask_lookup_table[node]) - struct pci_bus; #ifdef CONFIG_PCI extern int pcibus_to_node(struct pci_bus *pbus); diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index 88bada2ebc4..510df36dd5d 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -37,9 +37,6 @@ #ifndef parent_node #define parent_node(node) ((void)(node),0) #endif -#ifndef node_to_cpumask -#define node_to_cpumask(node) ((void)node, cpu_online_map) -#endif #ifndef cpumask_of_node #define cpumask_of_node(node) ((void)node, cpu_online_mask) #endif @@ -55,18 +52,4 @@ #endif /* CONFIG_NUMA */ -/* - * returns pointer to cpumask for specified node - * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" - */ -#ifndef node_to_cpumask_ptr - -#define node_to_cpumask_ptr(v, node) \ - cpumask_t _##v = node_to_cpumask(node); \ - const cpumask_t *v = &_##v - -#define node_to_cpumask_ptr_next(v, node) \ - _##v = node_to_cpumask(node) -#endif - #endif /* _ASM_GENERIC_TOPOLOGY_H */ -- cgit v1.2.3-70-g09d2 From 48a048fed82a8e5fdd8618574f6d3de1a0d67a50 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:44 -0600 Subject: cpumask: arch_send_call_function_ipi_mask: mips We're weaning the core code off handing cpumask's around on-stack. This introduces arch_send_call_function_ipi_mask(), and by defining it, the old arch_send_call_function_ipi is defined by the core code. We also take the chance to wean the implementations off the obsolescent for_each_cpu_mask(): making send_ipi_mask take the pointer seemed the most natural way to ensure all implementations used for_each_cpu. Signed-off-by: Rusty Russell --- arch/mips/include/asm/smp-ops.h | 2 +- arch/mips/include/asm/smp.h | 3 ++- arch/mips/kernel/smp-cmp.c | 4 ++-- arch/mips/kernel/smp-mt.c | 4 ++-- arch/mips/kernel/smp-up.c | 3 ++- arch/mips/kernel/smp.c | 2 +- arch/mips/mipssim/sim_smtc.c | 5 +++-- arch/mips/mti-malta/malta-smtc.c | 4 ++-- arch/mips/pmc-sierra/yosemite/smp.c | 4 ++-- arch/mips/sgi-ip27/ip27-smp.c | 4 ++-- arch/mips/sibyte/bcm1480/smp.c | 5 +++-- arch/mips/sibyte/sb1250/smp.c | 5 +++-- 12 files changed, 25 insertions(+), 20 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index fd545547b8a..9e09af34c8a 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -19,7 +19,7 @@ struct task_struct; struct plat_smp_ops { void (*send_ipi_single)(int cpu, unsigned int action); - void (*send_ipi_mask)(cpumask_t mask, unsigned int action); + void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action); void (*init_secondary)(void); void (*smp_finish)(void); void (*cpus_done)(void); diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index aaa2d4ab26d..48c1967961a 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -78,6 +78,7 @@ extern void play_dead(void); extern asmlinkage void smp_call_function_interrupt(void); extern void arch_send_call_function_single_ipi(int cpu); -extern void arch_send_call_function_ipi(cpumask_t mask); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); +#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #endif /* __ASM_SMP_H */ diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index ad0ff5dc4d5..e5cf5b88fc2 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -80,11 +80,11 @@ void cmp_send_ipi_single(int cpu, unsigned int action) local_irq_restore(flags); } -static void cmp_send_ipi_mask(cpumask_t mask, unsigned int action) +static void cmp_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) cmp_send_ipi_single(i, action); } diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index 6f7ee5ac46e..9538ca42e00 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -141,11 +141,11 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action) local_irq_restore(flags); } -static void vsmp_send_ipi_mask(cpumask_t mask, unsigned int action) +static void vsmp_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) vsmp_send_ipi_single(i, action); } diff --git a/arch/mips/kernel/smp-up.c b/arch/mips/kernel/smp-up.c index 2508d55d68f..00500fea275 100644 --- a/arch/mips/kernel/smp-up.c +++ b/arch/mips/kernel/smp-up.c @@ -18,7 +18,8 @@ static void up_send_ipi_single(int cpu, unsigned int action) panic(KERN_ERR "%s called", __func__); } -static inline void up_send_ipi_mask(cpumask_t mask, unsigned int action) +static inline void up_send_ipi_mask(const struct cpumask *mask, + unsigned int action) { panic(KERN_ERR "%s called", __func__); } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 64668a93248..df2ace9558b 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -128,7 +128,7 @@ asmlinkage __cpuinit void start_secondary(void) cpu_idle(); } -void arch_send_call_function_ipi(cpumask_t mask) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { mp_ops->send_ipi_mask(mask, SMP_CALL_FUNCTION); } diff --git a/arch/mips/mipssim/sim_smtc.c b/arch/mips/mipssim/sim_smtc.c index d6e4f656ad1..5da30b6a65b 100644 --- a/arch/mips/mipssim/sim_smtc.c +++ b/arch/mips/mipssim/sim_smtc.c @@ -43,11 +43,12 @@ static void ssmtc_send_ipi_single(int cpu, unsigned int action) /* "CPU" may be TC of same VPE, VPE of same CPU, or different CPU */ } -static inline void ssmtc_send_ipi_mask(cpumask_t mask, unsigned int action) +static inline void ssmtc_send_ipi_mask(const struct cpumask *mask, + unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) ssmtc_send_ipi_single(i, action); } diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index 499ffe5475d..192cfd2a539 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -21,11 +21,11 @@ static void msmtc_send_ipi_single(int cpu, unsigned int action) smtc_send_ipi(cpu, LINUX_SMP_IPI, action); } -static void msmtc_send_ipi_mask(cpumask_t mask, unsigned int action) +static void msmtc_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) msmtc_send_ipi_single(i, action); } diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c index 8ace2771623..326fe7a392e 100644 --- a/arch/mips/pmc-sierra/yosemite/smp.c +++ b/arch/mips/pmc-sierra/yosemite/smp.c @@ -97,11 +97,11 @@ static void yos_send_ipi_single(int cpu, unsigned int action) } } -static void yos_send_ipi_mask(cpumask_t mask, unsigned int action) +static void yos_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) yos_send_ipi_single(i, action); } diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index cbcd7eb83bd..9aa8f2951df 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -165,11 +165,11 @@ static void ip27_send_ipi_single(int destid, unsigned int action) REMOTE_HUB_SEND_INTR(COMPACT_TO_NASID_NODEID(cpu_to_node(destid)), irq); } -static void ip27_send_ipi_mask(cpumask_t mask, unsigned int action) +static void ip27_send_ipi(const struct cpumask *mask, unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) ip27_send_ipi_single(i, action); } diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index 314691648c9..47b347c992e 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -82,11 +82,12 @@ static void bcm1480_send_ipi_single(int cpu, unsigned int action) __raw_writeq((((u64)action)<< 48), mailbox_0_set_regs[cpu]); } -static void bcm1480_send_ipi_mask(cpumask_t mask, unsigned int action) +static void bcm1480_send_ipi_mask(const struct cpumask *mask, + unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) bcm1480_send_ipi_single(i, action); } diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index cad14003b84..c00a5cb1128 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c @@ -70,11 +70,12 @@ static void sb1250_send_ipi_single(int cpu, unsigned int action) __raw_writeq((((u64)action) << 48), mailbox_set_regs[cpu]); } -static inline void sb1250_send_ipi_mask(cpumask_t mask, unsigned int action) +static inline void sb1250_send_ipi_mask(const struct cpumask *mask, + unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) sb1250_send_ipi_single(i, action); } -- cgit v1.2.3-70-g09d2 From 0748bd01773395003208996c4c0b3f80caf80976 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:46 -0600 Subject: cpumask: remove arch_send_call_function_ipi Now everyone is converted to arch_send_call_function_ipi_mask, remove the shim and the #defines. Signed-off-by: Rusty Russell --- arch/alpha/include/asm/smp.h | 1 - arch/arm/include/asm/smp.h | 1 - arch/ia64/include/asm/smp.h | 1 - arch/m32r/include/asm/smp.h | 1 - arch/mips/include/asm/smp.h | 1 - arch/parisc/include/asm/smp.h | 1 - arch/powerpc/include/asm/smp.h | 1 - arch/s390/include/asm/smp.h | 1 - arch/sh/include/asm/smp.h | 1 - arch/sparc/include/asm/smp_64.h | 1 - arch/x86/include/asm/smp.h | 1 - kernel/smp.c | 7 ------- 12 files changed, 18 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h index 8818a1bcdc8..3f390e8cc0b 100644 --- a/arch/alpha/include/asm/smp.h +++ b/arch/alpha/include/asm/smp.h @@ -48,7 +48,6 @@ extern int smp_num_cpus; extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #else /* CONFIG_SMP */ diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index a06e735b262..e0d763be184 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -93,7 +93,6 @@ extern void platform_cpu_enable(unsigned int cpu); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask /* * show local interrupt info diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h index d217d1d4e05..0b3b3997dec 100644 --- a/arch/ia64/include/asm/smp.h +++ b/arch/ia64/include/asm/smp.h @@ -127,7 +127,6 @@ extern int is_multithreading_enabled(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #else /* CONFIG_SMP */ diff --git a/arch/m32r/include/asm/smp.h b/arch/m32r/include/asm/smp.h index c2be49d408a..e67ded1aab9 100644 --- a/arch/m32r/include/asm/smp.h +++ b/arch/m32r/include/asm/smp.h @@ -89,7 +89,6 @@ extern unsigned long send_IPI_mask_phys(cpumask_t, int, int); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #endif /* not __ASSEMBLY__ */ diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 48c1967961a..e15f11a0931 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -79,6 +79,5 @@ extern asmlinkage void smp_call_function_interrupt(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #endif /* __ASM_SMP_H */ diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h index 21eb45a5262..2e73623feb6 100644 --- a/arch/parisc/include/asm/smp.h +++ b/arch/parisc/include/asm/smp.h @@ -30,7 +30,6 @@ extern void smp_send_all_nop(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #endif /* !ASSEMBLY */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 1491bfe822d..d9ea8d39c34 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -147,7 +147,6 @@ extern struct smp_ops_t *smp_ops; extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask /* Definitions relative to the secondary CPU spin loop * and entry point. Not all of them exist on both 32 and diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 6de62189a48..a868b272c25 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -63,7 +63,6 @@ extern int smp_cpu_polarization[]; extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #endif diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h index ca64f43abe6..53ef26ced75 100644 --- a/arch/sh/include/asm/smp.h +++ b/arch/sh/include/asm/smp.h @@ -44,7 +44,6 @@ void plat_send_ipi(unsigned int cpu, unsigned int message); void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #else diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index becb6bf353a..f49e11cd4de 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -36,7 +36,6 @@ extern int sparc64_multi_core; extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask /* * General functions that each host system must provide. diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 6a84ed166ae..1e796782cd7 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -121,7 +121,6 @@ static inline void arch_send_call_function_single_ipi(int cpu) smp_ops.send_call_func_single_ipi(cpu); } -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) { smp_ops.send_call_func_ipi(mask); diff --git a/kernel/smp.c b/kernel/smp.c index fd47a256a24..c9d1c7835c2 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -347,13 +347,6 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, generic_exec_single(cpu, data, wait); } -/* Deprecated: shim for archs using old arch_send_call_function_ipi API. */ - -#ifndef arch_send_call_function_ipi_mask -# define arch_send_call_function_ipi_mask(maskp) \ - arch_send_call_function_ipi(*(maskp)) -#endif - /** * smp_call_function_many(): Run a function on a set of other CPUs. * @mask: The set of cpus to run on (only runs on online subset). -- cgit v1.2.3-70-g09d2 From 55b8cab49dd43d227f0dd49e3524406fdc46d37b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:50 -0600 Subject: cpumask: use mm_cpumask() wrapper: mips Makes code futureproof against the impending change to mm->cpu_vm_mask. It's also a chance to use the new cpumask_ ops which take a pointer (the older ones are deprecated, but there's no hurry for arch code). Signed-off-by: Rusty Russell --- arch/mips/include/asm/mmu_context.h | 10 +++++----- arch/mips/mm/c-octeon.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/mips/include/asm') diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h index d3bea88d874..d9743536a62 100644 --- a/arch/mips/include/asm/mmu_context.h +++ b/arch/mips/include/asm/mmu_context.h @@ -178,8 +178,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * Mark current->active_mm as not "active" anymore. * We don't want to mislead possible IPI tlb flush routines. */ - cpu_clear(cpu, prev->cpu_vm_mask); - cpu_set(cpu, next->cpu_vm_mask); + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); local_irq_restore(flags); } @@ -235,8 +235,8 @@ activate_mm(struct mm_struct *prev, struct mm_struct *next) TLBMISS_HANDLER_SETUP_PGD(next->pgd); /* mark mmu ownership change */ - cpu_clear(cpu, prev->cpu_vm_mask); - cpu_set(cpu, next->cpu_vm_mask); + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); local_irq_restore(flags); } @@ -258,7 +258,7 @@ drop_mmu_context(struct mm_struct *mm, unsigned cpu) local_irq_save(flags); - if (cpu_isset(cpu, mm->cpu_vm_mask)) { + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { get_new_mmu_context(mm, cpu); #ifdef CONFIG_MIPS_MT_SMTC /* See comments for similar code above */ diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 10ab69f7183..94e05e5733c 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -79,7 +79,7 @@ static void octeon_flush_icache_all_cores(struct vm_area_struct *vma) * cores it has been used on */ if (vma) - mask = vma->vm_mm->cpu_vm_mask; + mask = *mm_cpumask(vma->vm_mm); else mask = cpu_online_map; cpu_clear(cpu, mask); -- cgit v1.2.3-70-g09d2