From 4949314c7283ea4f9ade182ca599583b89f7edd6 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 16 Jan 2012 16:57:08 -0800 Subject: target: Allow control CDBs with data > 1 page We need to handle >1 page control cdbs, so extend the code to do a vmap if bigger than 1 page. It seems like kmap() is still preferable if just a page, fewer TLB shootdowns(?), so keep using that when possible. Rename function pair for their new scope. Signed-off-by: Andy Grover Cc: Signed-off-by: Nicholas Bellinger --- include/target/target_core_backend.h | 4 ++-- include/target/target_core_base.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 4866499bdee..e5e6ff98f0f 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -59,7 +59,7 @@ int transport_set_vpd_ident_type(struct t10_vpd *, unsigned char *); int transport_set_vpd_ident(struct t10_vpd *, unsigned char *); /* core helpers also used by command snooping in pscsi */ -void *transport_kmap_first_data_page(struct se_cmd *); -void transport_kunmap_first_data_page(struct se_cmd *); +void *transport_kmap_data_sg(struct se_cmd *); +void transport_kunmap_data_sg(struct se_cmd *); #endif /* TARGET_CORE_BACKEND_H */ diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index daf532bc721..dc4e345a016 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -582,6 +582,7 @@ struct se_cmd { struct scatterlist *t_data_sg; unsigned int t_data_nents; + void *t_data_vmap; struct scatterlist *t_bidi_data_sg; unsigned int t_bidi_data_nents; -- cgit v1.2.3-70-g09d2 From c49d005b6cc8491fad5b24f82805be2d6bcbd3dd Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 17 Jan 2012 11:09:57 +0200 Subject: OMAPDSS: HDMI: PHY burnout fix A hardware bug in the OMAP4 HDMI PHY causes physical damage to the board if the HDMI PHY is kept powered on when the cable is not connected. This patch solves the problem by adding hot-plug-detection into the HDMI IP driver. This is not a real HPD support in the sense that nobody else than the IP driver gets to know about the HPD events, but is only meant to fix the HW bug. The strategy is simple: If the display device is turned off by the user, the PHY power is set to OFF. When the display device is turned on by the user, the PHY power is set either to LDOON or TXON, depending on whether the HDMI cable is connected. The reason to avoid PHY OFF when the display device is on, but the cable is disconnected, is that when the PHY is turned OFF, the HDMI IP is not "ticking" and thus the DISPC does not receive pixel clock from the HDMI IP. This would, for example, prevent any VSYNCs from happening, and would thus affect the users of omapdss. By using LDOON when the cable is disconnected we'll avoid the HW bug, but keep the HDMI working as usual from the user's point of view. Signed-off-by: Tomi Valkeinen --- arch/arm/mach-omap2/board-4430sdp.c | 5 +++ arch/arm/mach-omap2/board-omap4panda.c | 5 +++ drivers/video/omap2/dss/hdmi.c | 3 ++ drivers/video/omap2/dss/ti_hdmi.h | 4 ++ drivers/video/omap2/dss/ti_hdmi_4xxx_ip.c | 68 +++++++++++++++++++++++++++++-- include/video/omapdss.h | 5 +++ 6 files changed, 86 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c index cd6ec517a92..0ce758edaad 100644 --- a/arch/arm/mach-omap2/board-4430sdp.c +++ b/arch/arm/mach-omap2/board-4430sdp.c @@ -732,6 +732,10 @@ static void sdp4430_lcd_init(void) pr_err("%s: Could not get lcd2_reset_gpio\n", __func__); } +static struct omap_dss_hdmi_data sdp4430_hdmi_data = { + .hpd_gpio = HDMI_GPIO_HPD, +}; + static struct omap_dss_device sdp4430_hdmi_device = { .name = "hdmi", .driver_name = "hdmi_panel", @@ -739,6 +743,7 @@ static struct omap_dss_device sdp4430_hdmi_device = { .platform_enable = sdp4430_panel_enable_hdmi, .platform_disable = sdp4430_panel_disable_hdmi, .channel = OMAP_DSS_CHANNEL_DIGIT, + .data = &sdp4430_hdmi_data, }; static struct picodlp_panel_data sdp4430_picodlp_pdata = { diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c index e1b196361f9..370c4b42888 100644 --- a/arch/arm/mach-omap2/board-omap4panda.c +++ b/arch/arm/mach-omap2/board-omap4panda.c @@ -502,6 +502,10 @@ static void omap4_panda_panel_disable_hdmi(struct omap_dss_device *dssdev) gpio_free_array(panda_hdmi_gpios, ARRAY_SIZE(panda_hdmi_gpios)); } +static struct omap_dss_hdmi_data omap4_panda_hdmi_data = { + .hpd_gpio = HDMI_GPIO_HPD, +}; + static struct omap_dss_device omap4_panda_hdmi_device = { .name = "hdmi", .driver_name = "hdmi_panel", @@ -509,6 +513,7 @@ static struct omap_dss_device omap4_panda_hdmi_device = { .platform_enable = omap4_panda_panel_enable_hdmi, .platform_disable = omap4_panda_panel_disable_hdmi, .channel = OMAP_DSS_CHANNEL_DIGIT, + .data = &omap4_panda_hdmi_data, }; static struct omap_dss_device *omap4_panda_dss_devices[] = { diff --git a/drivers/video/omap2/dss/hdmi.c b/drivers/video/omap2/dss/hdmi.c index c39f9c3a92c..d7aa3b05652 100644 --- a/drivers/video/omap2/dss/hdmi.c +++ b/drivers/video/omap2/dss/hdmi.c @@ -497,6 +497,7 @@ bool omapdss_hdmi_detect(void) int omapdss_hdmi_display_enable(struct omap_dss_device *dssdev) { + struct omap_dss_hdmi_data *priv = dssdev->data; int r = 0; DSSDBG("ENTER hdmi_display_enable\n"); @@ -509,6 +510,8 @@ int omapdss_hdmi_display_enable(struct omap_dss_device *dssdev) goto err0; } + hdmi.ip_data.hpd_gpio = priv->hpd_gpio; + r = omap_dss_start_device(dssdev); if (r) { DSSERR("failed to start device\n"); diff --git a/drivers/video/omap2/dss/ti_hdmi.h b/drivers/video/omap2/dss/ti_hdmi.h index 7503f7f619a..50dadba5070 100644 --- a/drivers/video/omap2/dss/ti_hdmi.h +++ b/drivers/video/omap2/dss/ti_hdmi.h @@ -126,6 +126,10 @@ struct hdmi_ip_data { const struct ti_hdmi_ip_ops *ops; struct hdmi_config cfg; struct hdmi_pll_info pll_data; + + /* ti_hdmi_4xxx_ip private data. These should be in a separate struct */ + int hpd_gpio; + bool phy_tx_enabled; }; int ti_hdmi_4xxx_phy_enable(struct hdmi_ip_data *ip_data); void ti_hdmi_4xxx_phy_disable(struct hdmi_ip_data *ip_data); diff --git a/drivers/video/omap2/dss/ti_hdmi_4xxx_ip.c b/drivers/video/omap2/dss/ti_hdmi_4xxx_ip.c index 9af81f18f16..2d72334ca3d 100644 --- a/drivers/video/omap2/dss/ti_hdmi_4xxx_ip.c +++ b/drivers/video/omap2/dss/ti_hdmi_4xxx_ip.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "ti_hdmi_4xxx_ip.h" #include "dss.h" @@ -223,6 +224,49 @@ void ti_hdmi_4xxx_pll_disable(struct hdmi_ip_data *ip_data) hdmi_set_pll_pwr(ip_data, HDMI_PLLPWRCMD_ALLOFF); } +static int hdmi_check_hpd_state(struct hdmi_ip_data *ip_data) +{ + unsigned long flags; + bool hpd; + int r; + /* this should be in ti_hdmi_4xxx_ip private data */ + static DEFINE_SPINLOCK(phy_tx_lock); + + spin_lock_irqsave(&phy_tx_lock, flags); + + hpd = gpio_get_value(ip_data->hpd_gpio); + + if (hpd == ip_data->phy_tx_enabled) { + spin_unlock_irqrestore(&phy_tx_lock, flags); + return 0; + } + + if (hpd) + r = hdmi_set_phy_pwr(ip_data, HDMI_PHYPWRCMD_TXON); + else + r = hdmi_set_phy_pwr(ip_data, HDMI_PHYPWRCMD_LDOON); + + if (r) { + DSSERR("Failed to %s PHY TX power\n", + hpd ? "enable" : "disable"); + goto err; + } + + ip_data->phy_tx_enabled = hpd; +err: + spin_unlock_irqrestore(&phy_tx_lock, flags); + return r; +} + +static irqreturn_t hpd_irq_handler(int irq, void *data) +{ + struct hdmi_ip_data *ip_data = data; + + hdmi_check_hpd_state(ip_data); + + return IRQ_HANDLED; +} + int ti_hdmi_4xxx_phy_enable(struct hdmi_ip_data *ip_data) { u16 r = 0; @@ -232,10 +276,6 @@ int ti_hdmi_4xxx_phy_enable(struct hdmi_ip_data *ip_data) if (r) return r; - r = hdmi_set_phy_pwr(ip_data, HDMI_PHYPWRCMD_TXON); - if (r) - return r; - /* * Read address 0 in order to get the SCP reset done completed * Dummy access performed to make sure reset is done @@ -257,12 +297,32 @@ int ti_hdmi_4xxx_phy_enable(struct hdmi_ip_data *ip_data) /* Write to phy address 3 to change the polarity control */ REG_FLD_MOD(phy_base, HDMI_TXPHY_PAD_CFG_CTRL, 0x1, 27, 27); + r = request_threaded_irq(gpio_to_irq(ip_data->hpd_gpio), + NULL, hpd_irq_handler, + IRQF_DISABLED | IRQF_TRIGGER_RISING | + IRQF_TRIGGER_FALLING, "hpd", ip_data); + if (r) { + DSSERR("HPD IRQ request failed\n"); + hdmi_set_phy_pwr(ip_data, HDMI_PHYPWRCMD_OFF); + return r; + } + + r = hdmi_check_hpd_state(ip_data); + if (r) { + free_irq(gpio_to_irq(ip_data->hpd_gpio), ip_data); + hdmi_set_phy_pwr(ip_data, HDMI_PHYPWRCMD_OFF); + return r; + } + return 0; } void ti_hdmi_4xxx_phy_disable(struct hdmi_ip_data *ip_data) { + free_irq(gpio_to_irq(ip_data->hpd_gpio), ip_data); + hdmi_set_phy_pwr(ip_data, HDMI_PHYPWRCMD_OFF); + ip_data->phy_tx_enabled = false; } static int hdmi_core_ddc_init(struct hdmi_ip_data *ip_data) diff --git a/include/video/omapdss.h b/include/video/omapdss.h index 062b3b24ff1..483f67caa7a 100644 --- a/include/video/omapdss.h +++ b/include/video/omapdss.h @@ -590,6 +590,11 @@ struct omap_dss_device { int (*get_backlight)(struct omap_dss_device *dssdev); }; +struct omap_dss_hdmi_data +{ + int hpd_gpio; +}; + struct omap_dss_driver { struct device_driver driver; -- cgit v1.2.3-70-g09d2 From e050e3f0a71bf7dc2c148b35caff0234decc8198 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 26 Jan 2012 17:03:19 +0100 Subject: perf: Fix broken interrupt rate throttling This patch fixes the sampling interrupt throttling mechanism. It was broken in v3.2. Events were not being unthrottled. The unthrottling mechanism required that events be checked at each timer tick. This patch solves this problem and also separates: - unthrottling - multiplexing - frequency-mode period adjustments Not all of them need to be executed at each timer tick. This third version of the patch is based on my original patch + PeterZ proposal (https://lkml.org/lkml/2012/1/7/87). At each timer tick, for each context: - if the current CPU has throttled events, we unthrottle events - if context has frequency-based events, we adjust sampling periods - if we have reached the jiffies interval, we multiplex (rotate) We decoupled rotation (multiplexing) from frequency-mode sampling period adjustments. They should not necessarily happen at the same rate. Multiplexing is subject to jiffies_interval (currently at 1 but could be higher once the tunable is exposed via sysfs). We have grouped frequency-mode adjustment and unthrottling into the same routine to minimize code duplication. When throttled while in frequency mode, we scan the events only once. We have fixed the threshold enforcement code in __perf_event_overflow(). There was a bug whereby it would allow more than the authorized rate because an increment of hwc->interrupts was not executed at the right place. The patch was tested with low sampling limit (2000) and fixed periods, frequency mode, overcommitted PMU. On a 2.1GHz AMD CPU: $ cat /proc/sys/kernel/perf_event_max_sample_rate 2000 We set a rate of 3000 samples/sec (2.1GHz/3000 = 700000): $ perf record -e cycles,cycles -c 700000 noploop 10 $ perf report -D | tail -21 Aggregated stats: TOTAL events: 80086 MMAP events: 88 COMM events: 2 EXIT events: 4 THROTTLE events: 19996 UNTHROTTLE events: 19996 SAMPLE events: 40000 cycles stats: TOTAL events: 40006 MMAP events: 5 COMM events: 1 EXIT events: 4 THROTTLE events: 9998 UNTHROTTLE events: 9998 SAMPLE events: 20000 cycles stats: TOTAL events: 39996 THROTTLE events: 9998 UNTHROTTLE events: 9998 SAMPLE events: 20000 For 10s, the cap is 2x2000x10 = 40000 samples. We get exactly that: 20000 samples/event. Signed-off-by: Stephane Eranian Cc: # v3.2+ Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120126160319.GA5655@quad Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + kernel/events/core.c | 104 ++++++++++++++++++++++++++++----------------- 2 files changed, 67 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 08855613ceb..abb2776be1b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -587,6 +587,7 @@ struct hw_perf_event { u64 sample_period; u64 last_period; local64_t period_left; + u64 interrupts_seq; u64 interrupts; u64 freq_time_stamp; diff --git a/kernel/events/core.c b/kernel/events/core.c index 32b48c88971..ba36013cfb2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2300,6 +2300,9 @@ do { \ return div64_u64(dividend, divisor); } +static DEFINE_PER_CPU(int, perf_throttled_count); +static DEFINE_PER_CPU(u64, perf_throttled_seq); + static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) { struct hw_perf_event *hwc = &event->hw; @@ -2325,16 +2328,29 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) } } -static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) +/* + * combine freq adjustment with unthrottling to avoid two passes over the + * events. At the same time, make sure, having freq events does not change + * the rate of unthrottling as that would introduce bias. + */ +static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, + int needs_unthr) { struct perf_event *event; struct hw_perf_event *hwc; - u64 interrupts, now; + u64 now, period = TICK_NSEC; s64 delta; - if (!ctx->nr_freq) + /* + * only need to iterate over all events iff: + * - context have events in frequency mode (needs freq adjust) + * - there are events to unthrottle on this cpu + */ + if (!(ctx->nr_freq || needs_unthr)) return; + raw_spin_lock(&ctx->lock); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (event->state != PERF_EVENT_STATE_ACTIVE) continue; @@ -2344,13 +2360,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) hwc = &event->hw; - interrupts = hwc->interrupts; - hwc->interrupts = 0; - - /* - * unthrottle events on the tick - */ - if (interrupts == MAX_INTERRUPTS) { + if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) { + hwc->interrupts = 0; perf_log_throttle(event, 1); event->pmu->start(event, 0); } @@ -2358,14 +2369,26 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) if (!event->attr.freq || !event->attr.sample_freq) continue; - event->pmu->read(event); + /* + * stop the event and update event->count + */ + event->pmu->stop(event, PERF_EF_UPDATE); + now = local64_read(&event->count); delta = now - hwc->freq_count_stamp; hwc->freq_count_stamp = now; + /* + * restart the event + * reload only if value has changed + */ if (delta > 0) perf_adjust_period(event, period, delta); + + event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0); } + + raw_spin_unlock(&ctx->lock); } /* @@ -2388,16 +2411,13 @@ static void rotate_ctx(struct perf_event_context *ctx) */ static void perf_rotate_context(struct perf_cpu_context *cpuctx) { - u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; struct perf_event_context *ctx = NULL; - int rotate = 0, remove = 1, freq = 0; + int rotate = 0, remove = 1; if (cpuctx->ctx.nr_events) { remove = 0; if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) rotate = 1; - if (cpuctx->ctx.nr_freq) - freq = 1; } ctx = cpuctx->task_ctx; @@ -2405,37 +2425,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) remove = 0; if (ctx->nr_events != ctx->nr_active) rotate = 1; - if (ctx->nr_freq) - freq = 1; } - if (!rotate && !freq) + if (!rotate) goto done; perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_pmu_disable(cpuctx->ctx.pmu); - if (freq) { - perf_ctx_adjust_freq(&cpuctx->ctx, interval); - if (ctx) - perf_ctx_adjust_freq(ctx, interval); - } - - if (rotate) { - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - if (ctx) - ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); + if (ctx) + ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); - rotate_ctx(&cpuctx->ctx); - if (ctx) - rotate_ctx(ctx); + rotate_ctx(&cpuctx->ctx); + if (ctx) + rotate_ctx(ctx); - perf_event_sched_in(cpuctx, ctx, current); - } + perf_event_sched_in(cpuctx, ctx, current); perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); - done: if (remove) list_del_init(&cpuctx->rotation_list); @@ -2445,10 +2454,22 @@ void perf_event_task_tick(void) { struct list_head *head = &__get_cpu_var(rotation_list); struct perf_cpu_context *cpuctx, *tmp; + struct perf_event_context *ctx; + int throttled; WARN_ON(!irqs_disabled()); + __this_cpu_inc(perf_throttled_seq); + throttled = __this_cpu_xchg(perf_throttled_count, 0); + list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { + ctx = &cpuctx->ctx; + perf_adjust_freq_unthr_context(ctx, throttled); + + ctx = cpuctx->task_ctx; + if (ctx) + perf_adjust_freq_unthr_context(ctx, throttled); + if (cpuctx->jiffies_interval == 1 || !(jiffies % cpuctx->jiffies_interval)) perf_rotate_context(cpuctx); @@ -4509,6 +4530,7 @@ static int __perf_event_overflow(struct perf_event *event, { int events = atomic_read(&event->event_limit); struct hw_perf_event *hwc = &event->hw; + u64 seq; int ret = 0; /* @@ -4518,14 +4540,20 @@ static int __perf_event_overflow(struct perf_event *event, if (unlikely(!is_sampling_event(event))) return 0; - if (unlikely(hwc->interrupts >= max_samples_per_tick)) { - if (throttle) { + seq = __this_cpu_read(perf_throttled_seq); + if (seq != hwc->interrupts_seq) { + hwc->interrupts_seq = seq; + hwc->interrupts = 1; + } else { + hwc->interrupts++; + if (unlikely(throttle + && hwc->interrupts >= max_samples_per_tick)) { + __this_cpu_inc(perf_throttled_count); hwc->interrupts = MAX_INTERRUPTS; perf_log_throttle(event, 0); ret = 1; } - } else - hwc->interrupts++; + } if (event->attr.freq) { u64 now = perf_clock(); -- cgit v1.2.3-70-g09d2 From 8422fa110334cea79ab16c474902edb21a8b3168 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 30 Jan 2012 17:10:58 +0800 Subject: ALSA: Add #ifdef CONFIG_PCI guard for snd_pci_quirk_* functions This fixes below build warning when CONFIG_PCI is not set. CC sound/sound_core.o In file included from sound/sound_core.c:15: include/sound/core.h:454: warning: 'struct pci_dev' declared inside parameter list include/sound/core.h:454: warning: its scope is only this definition or declaration, which is probably not what you want Signed-off-by: Axel Lin Signed-off-by: Takashi Iwai --- include/sound/core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 5ab255f196c..cea1b5426df 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -417,6 +417,7 @@ static inline int __snd_bug_on(int cond) #define gameport_get_port_data(gp) (gp)->port_data #endif +#ifdef CONFIG_PCI /* PCI quirk list helper */ struct snd_pci_quirk { unsigned short subvendor; /* PCI subvendor ID */ @@ -456,5 +457,6 @@ snd_pci_quirk_lookup(struct pci_dev *pci, const struct snd_pci_quirk *list); const struct snd_pci_quirk * snd_pci_quirk_lookup_id(u16 vendor, u16 device, const struct snd_pci_quirk *list); +#endif #endif /* __SOUND_CORE_H */ -- cgit v1.2.3-70-g09d2 From 1a30871fe635d3e92972e6b93e39ff65bb57e52d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 16 Jan 2012 11:07:16 +0200 Subject: mtd: fix MTD suspend Commits 3fe4bae88460869a8e553397cd9057a4ee7ca341 and 079c985e7a6f4ce60f931cebfdd5ee3c3 broke MTD suspend in 2 ways: 1. When the '->suspend' method is not present, we return -EOPNOTSUPP, but the callers of 'mtd_suspend()' expects 0 instead. 2. Checking of the 'mtd' parameter against NULL has been incorrectly removed in 'mtd_cls_suspend()'. This patch fixes the breakages. This has been found, analyzed, reported and tested by Rafael J. Wysocki . Note, this patch is not needed in the stable tree because it causes a regression introduced during the v3.3 merge window. Reported-by: Rafael J. Wysocki Tested-by: Rafael J. Wysocki Tested-by: Russell King Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/mtdcore.c | 2 +- include/linux/mtd/mtd.h | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 6ae9ca01388..9a9ce71a71f 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -119,7 +119,7 @@ static int mtd_cls_suspend(struct device *dev, pm_message_t state) { struct mtd_info *mtd = dev_get_drvdata(dev); - return mtd_suspend(mtd); + return mtd ? mtd_suspend(mtd) : 0; } static int mtd_cls_resume(struct device *dev) diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 1a81fde8f33..d8c7aad7331 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -427,9 +427,7 @@ static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) static inline int mtd_suspend(struct mtd_info *mtd) { - if (!mtd->suspend) - return -EOPNOTSUPP; - return mtd->suspend(mtd); + return mtd->suspend ? mtd->suspend(mtd) : 0; } static inline void mtd_resume(struct mtd_info *mtd) -- cgit v1.2.3-70-g09d2 From b923650b84068b74b6df838aa8f9b2a350171de6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 30 Jan 2012 00:20:48 +0200 Subject: lib: add NO_GENERIC_PCI_IOPORT_MAP Some architectures need to override the way IO port mapping is done on PCI devices. Supply a generic macro that calls ioport_map, and make it possible for architectures to override. Signed-off-by: Michael S. Tsirkin Acked-by: Arnd Bergmann --- include/asm-generic/pci_iomap.h | 10 ++++++++++ lib/Kconfig | 3 +++ lib/pci_iomap.c | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/pci_iomap.h b/include/asm-generic/pci_iomap.h index 8de4b73e19e..e58fcf89137 100644 --- a/include/asm-generic/pci_iomap.h +++ b/include/asm-generic/pci_iomap.h @@ -15,6 +15,16 @@ struct pci_dev; #ifdef CONFIG_PCI /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); +/* Create a virtual mapping cookie for a port on a given PCI device. + * Do not call this directly, it exists to make it easier for architectures + * to override */ +#ifdef CONFIG_NO_GENERIC_PCI_IOPORT_MAP +extern void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port, + unsigned int nr); +#else +#define __pci_ioport_map(dev, port, nr) ioport_map((port), (nr)) +#endif + #else static inline void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) { diff --git a/lib/Kconfig b/lib/Kconfig index 169eb7c598e..1df13883481 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -19,6 +19,9 @@ config RATIONAL config GENERIC_FIND_FIRST_BIT bool +config NO_GENERIC_PCI_IOPORT_MAP + bool + config GENERIC_PCI_IOMAP bool diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c index 4b0fdc22e68..0d83ea8a960 100644 --- a/lib/pci_iomap.c +++ b/lib/pci_iomap.c @@ -34,7 +34,7 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) if (maxlen && len > maxlen) len = maxlen; if (flags & IORESOURCE_IO) - return ioport_map(start, len); + return __pci_ioport_map(dev, start, len); if (flags & IORESOURCE_MEM) { if (flags & IORESOURCE_CACHEABLE) return ioremap(start, len); -- cgit v1.2.3-70-g09d2 From 15eb77a07c714ac80201abd0a9568888bcee6276 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 17 Jan 2012 11:18:56 -0600 Subject: writeback: fix NULL bdi->dev in trace writeback_single_inode bdi_prune_sb() resets sb->s_bdi to default_backing_dev_info when the tearing down the original bdi. Fix trace_writeback_single_inode to use sb->s_bdi=default_backing_dev_info rather than bdi->dev=NULL for a teared down bdi. Cc: Reported-by: Rabin Vincent Tested-by: Rabin Vincent Signed-off-by: Wu Fengguang --- fs/fs-writeback.c | 16 ++++++++-------- include/trace/events/writeback.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f855916657b..5b4a9362d5a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -52,14 +52,6 @@ struct wb_writeback_work { struct completion *done; /* set if the caller waits */ }; -/* - * Include the creation of the trace points after defining the - * wb_writeback_work structure so that the definition remains local to this - * file. - */ -#define CREATE_TRACE_POINTS -#include - /* * We don't actually have pdflush, but this one is exported though /proc... */ @@ -92,6 +84,14 @@ static inline struct inode *wb_inode(struct list_head *head) return list_entry(head, struct inode, i_wb_list); } +/* + * Include the creation of the trace points after defining the + * wb_writeback_work structure and inline functions so that the definition + * remains local to this file. + */ +#define CREATE_TRACE_POINTS +#include + /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ static void bdi_wakeup_flusher(struct backing_dev_info *bdi) { diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 8588a891802..06d302ebcb7 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -426,7 +426,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, TP_fast_assign( strncpy(__entry->name, - dev_name(inode->i_mapping->backing_dev_info->dev), 32); + dev_name(inode_to_bdi(inode)->dev), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; -- cgit v1.2.3-70-g09d2 From 3310225dfc71a35a2cc9340c15c0e08b14b3c754 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Mon, 9 Jan 2012 11:53:50 -0600 Subject: lib: proportion: lower PROP_MAX_SHIFT to 32 on 64-bit kernel PROP_MAX_SHIFT should be set to <=32 on 64-bit box. This fixes two bugs in the below lines of bdi_dirty_limit(): bdi_dirty *= numerator; do_div(bdi_dirty, denominator); 1) divide error: do_div() only uses the lower 32 bit of the denominator, which may trimmed to be 0 when PROP_MAX_SHIFT > 32. 2) overflow: (bdi_dirty * numerator) could easily overflow if numerator used up to 48 bits, leaving only 16 bits to bdi_dirty Cc: Cc: Peter Zijlstra Reported-by: Ilya Tumaykin Tested-by: Ilya Tumaykin Signed-off-by: Wu Fengguang --- include/linux/proportions.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/proportions.h b/include/linux/proportions.h index ef35bb73f69..26a8a4ed9b0 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -81,7 +81,11 @@ void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) * Limit the time part in order to ensure there are some bits left for the * cycle counter and fraction multiply. */ +#if BITS_PER_LONG == 32 #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) +#else +#define PROP_MAX_SHIFT (BITS_PER_LONG/2) +#endif #define PROP_FRAC_SHIFT (BITS_PER_LONG - PROP_MAX_SHIFT - 1) #define PROP_FRAC_BASE (1UL << PROP_FRAC_SHIFT) -- cgit v1.2.3-70-g09d2 From 3cccd1543ab623a5065335bf08350e06ffc788ab Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:16 +0200 Subject: lib/mpi: replaced MPI_NULL with normal NULL MPI_NULL is replaced with normal NULL. Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/mpi.h | 2 -- lib/mpi/mpicoder.c | 8 ++++---- lib/mpi/mpiutil.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 06f88994cca..d02cca6cc8c 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -57,8 +57,6 @@ struct gcry_mpi { typedef struct gcry_mpi *MPI; -#define MPI_NULL NULL - #define mpi_get_nlimbs(a) ((a)->nlimbs) #define mpi_is_neg(a) ((a)->sign) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 6116fc4990d..d7684aa7f65 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -34,7 +34,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits) uint8_t *frame, *fr_pt; int i = 0, n; size_t asnlen = DIM(asn); - MPI a = MPI_NULL; + MPI a = NULL; if (SHA1_DIGEST_LENGTH + asnlen + 4 > nframe) pr_info("MPI: can't encode a %d bit MD into a %d bits frame\n", @@ -48,7 +48,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits) */ frame = kmalloc(nframe, GFP_KERNEL); if (!frame) - return MPI_NULL; + return NULL; n = 0; frame[n++] = 0; frame[n++] = 1; /* block type */ @@ -92,7 +92,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) int i, j; unsigned nbits, nbytes, nlimbs, nread = 0; mpi_limb_t a; - MPI val = MPI_NULL; + MPI val = NULL; if (*ret_nread < 2) goto leave; @@ -109,7 +109,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; val = mpi_alloc(nlimbs); if (!val) - return MPI_NULL; + return NULL; i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; i %= BYTES_PER_MPI_LIMB; val->nbits = nbits; diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index eefc55d6b7f..6bfc41f62b8 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -135,7 +135,7 @@ int mpi_copy(MPI *copied, const MPI a) size_t i; MPI b; - *copied = MPI_NULL; + *copied = NULL; if (a) { b = mpi_alloc(a->nlimbs); -- cgit v1.2.3-70-g09d2 From e9c8d7a03e69093e4c33c5056a45c1233a42e8a4 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 18 Jan 2012 10:14:25 +0100 Subject: dma: sh_dma: not all SH DMAC implementations support MEMCPY Add a flag to allow platforms to specify, whether a DMAC instance supports the MEMCPY operation. To avoid regressions, preserve the current default. Signed-off-by: Guennadi Liakhovetski Acked-by: Paul Mundt Signed-off-by: Vinod Koul --- drivers/dma/shdma.c | 3 ++- include/linux/sh_dma.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 54043cd831c..812fd76e9c1 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -1262,7 +1262,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev) INIT_LIST_HEAD(&shdev->common.channels); - dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask); + if (!pdata->slave_only) + dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask); if (pdata->slave && pdata->slave_num) dma_cap_set(DMA_SLAVE, shdev->common.cap_mask); diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h index 8cd7fe59cf1..425450b980b 100644 --- a/include/linux/sh_dma.h +++ b/include/linux/sh_dma.h @@ -70,6 +70,7 @@ struct sh_dmae_pdata { unsigned int needs_tend_set:1; unsigned int no_dmars:1; unsigned int chclr_present:1; + unsigned int slave_only:1; }; /* DMA register */ -- cgit v1.2.3-70-g09d2 From b18db3d91234c03ad080d317878c7c77672ba326 Mon Sep 17 00:00:00 2001 From: Heiko Stübner Date: Wed, 1 Feb 2012 09:12:24 -0800 Subject: Input: gpio_keys - fix struct device declared inside parameter list A struct device parameter is used in the enable and disable callbacks to distinguish between different gpio_keys devices. Platforms that don't use these callbacks may not include struct device at all, as seen on arch/arm/mach-s3c2410/mach-n30.c Signed-off-by: Heiko Stuebner Signed-off-by: Dmitry Torokhov --- include/linux/gpio_keys.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index b5ca4b2c08e..004ff33ab38 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -1,6 +1,8 @@ #ifndef _GPIO_KEYS_H #define _GPIO_KEYS_H +struct device; + struct gpio_keys_button { /* Configuration parameters */ unsigned int code; /* input event code (KEY_*, SW_*) */ -- cgit v1.2.3-70-g09d2 From 7d731019218e49a9811f6d0adec4b1cfcb752bed Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 1 Feb 2012 11:10:24 -0800 Subject: mtd: fix merge conflict resolution breakage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes merge conflict resolution breakage introduced by merge d3712b9dfcf4 ("Merge tag 'for-linus' of git://github.com/prasad-joshi/logfs_upstream"). The commit changed 'mtd_can_have_bb()' function and made it always return zero, which is incorrect. Instead, we need it to return whether the underlying flash device can have bad eraseblocks or not. UBI needs this information because it affects how it handles the underlying flash. E.g., if the underlying flash is NOR, it cannot have bad blocks and any write or erase error is fatal, and all we can do is to switch to R/O mode. We do not need to reserve a pool of good eraseblocks for bad eraseblocks handling, and so on. This patch also removes 'mtd_can_have_bb()' invocations from Logfs to ensure correct Logfs behavior. I've tested that with this patch UBI works on top of NOR and NAND flashes emulated by mtdram and nandsim correspondingly. This patch is based on patch from Linus Torvalds. Signed-off-by: Artem Bityutskiy Acked-by: Jörn Engel Acked-by: Prasad Joshi Acked-by: Brian Norris Signed-off-by: Linus Torvalds --- fs/logfs/dev_mtd.c | 6 ------ include/linux/mtd/mtd.h | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index e97404d611e..9c501449450 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -152,9 +152,6 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = 0; while (mtd_block_isbad(mtd, *ofs)) { *ofs += mtd->erasesize; @@ -172,9 +169,6 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = mtd->size - mtd->erasesize; while (mtd_block_isbad(mtd, *ofs)) { *ofs -= mtd->erasesize; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 221295208fd..887ebe318c7 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -489,7 +489,7 @@ static inline int mtd_has_oob(const struct mtd_info *mtd) static inline int mtd_can_have_bb(const struct mtd_info *mtd) { - return 0; + return !!mtd->block_isbad; } /* Kernel-side ioctl definitions */ -- cgit v1.2.3-70-g09d2 From efcdbf24fd5daa88060869e51ed49f68b7ac8708 Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Mon, 30 Jan 2012 14:16:06 -0800 Subject: net: Disambiguate kernel message Some of our machines were reporting: TCP: too many of orphaned sockets even when the number of orphaned sockets was well below the limit. We print a different message depending on whether we're out of TCP memory or there are too many orphaned sockets. Also move the check out of line and cleanup the messages that were printed. Signed-off-by: Arun Sharma Suggested-by: Mohan Srinivasan Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: David Miller Cc: Glauber Costa Cc: Ingo Molnar Cc: Joe Perches Signed-off-by: David S. Miller --- include/net/tcp.h | 14 ++++++++++---- net/ipv4/tcp.c | 19 +++++++++++++++---- net/ipv4/tcp_timer.c | 5 +---- 3 files changed, 26 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index d49db0113a0..42c29bfbcee 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -273,6 +273,14 @@ static inline int between(__u32 seq1, __u32 seq2, __u32 seq3) return seq3 - seq2 >= seq1 - seq2; } +static inline bool tcp_out_of_memory(struct sock *sk) +{ + if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && + sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2)) + return true; + return false; +} + static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { struct percpu_counter *ocp = sk->sk_prot->orphan_count; @@ -283,13 +291,11 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) if (orphans << shift > sysctl_tcp_max_orphans) return true; } - - if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2)) - return true; return false; } +extern bool tcp_check_oom(struct sock *sk, int shift); + /* syncookies: remember time of last synqueue overflow */ static inline void tcp_synq_overflow(struct sock *sk) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 06373b4a449..a34f5cfdd44 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1876,6 +1876,20 @@ void tcp_shutdown(struct sock *sk, int how) } EXPORT_SYMBOL(tcp_shutdown); +bool tcp_check_oom(struct sock *sk, int shift) +{ + bool too_many_orphans, out_of_socket_memory; + + too_many_orphans = tcp_too_many_orphans(sk, shift); + out_of_socket_memory = tcp_out_of_memory(sk); + + if (too_many_orphans && net_ratelimit()) + pr_info("TCP: too many orphaned sockets\n"); + if (out_of_socket_memory && net_ratelimit()) + pr_info("TCP: out of memory -- consider tuning tcp_mem\n"); + return too_many_orphans || out_of_socket_memory; +} + void tcp_close(struct sock *sk, long timeout) { struct sk_buff *skb; @@ -2015,10 +2029,7 @@ adjudge_to_death: } if (sk->sk_state != TCP_CLOSE) { sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, 0)) { - if (net_ratelimit()) - printk(KERN_INFO "TCP: too many of orphaned " - "sockets\n"); + if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(sock_net(sk), diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index a516d1e399d..cd2e0723266 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -77,10 +77,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) if (sk->sk_err_soft) shift++; - if (tcp_too_many_orphans(sk, shift)) { - if (net_ratelimit()) - printk(KERN_INFO "Out of socket memory\n"); - + if (tcp_check_oom(sk, shift)) { /* Catch exceptional cases, when connection requires reset. * 1. Last segment was sent recently. */ if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || -- cgit v1.2.3-70-g09d2 From 504b61630ab65296b6c9113cce834574e8cc01de Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 31 Jan 2012 16:43:50 -0800 Subject: usb: ch9.h: usb_endpoint_maxp() uses __le16_to_cpu() The usb/ch9.h will be installed to /usr/include/linux, and be used from user space. But le16_to_cpu() is only defined for kernel code. Without this patch, user space compile will be broken. Special thanks to Stefan Becker Reported-by: Stefan Becker Signed-off-by: Kuninori Morimoto Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ch9.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 61b29057b05..3b6f628880f 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -589,7 +589,7 @@ static inline int usb_endpoint_is_isoc_out( */ static inline int usb_endpoint_maxp(const struct usb_endpoint_descriptor *epd) { - return le16_to_cpu(epd->wMaxPacketSize); + return __le16_to_cpu(epd->wMaxPacketSize); } /*-------------------------------------------------------------------------*/ -- cgit v1.2.3-70-g09d2 From c31c151b1c4a29da4dc92212aa8648fb4f8557b9 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Thu, 2 Feb 2012 07:18:00 +0000 Subject: net/hyperv: Fix the page buffer when an RNDIS message goes beyond page boundary There is a possible data corruption if an RNDIS message goes beyond page boundary in the sending code path. This patch fixes the problem. Signed-off-by: Haiyang Zhang Signed-off-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 8 ++++---- drivers/net/hyperv/rndis_filter.c | 13 +++++++++++++ include/linux/hyperv.h | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 69193fcb764..466c58a7353 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -151,10 +151,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) int ret; unsigned int i, num_pages, npg_data; - /* Add multipage for skb->data and additional one for RNDIS */ + /* Add multipages for skb->data and additional 2 for RNDIS */ npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1) >> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1; - num_pages = skb_shinfo(skb)->nr_frags + npg_data + 1; + num_pages = skb_shinfo(skb)->nr_frags + npg_data + 2; /* Allocate a netvsc packet based on # of frags. */ packet = kzalloc(sizeof(struct hv_netvsc_packet) + @@ -173,8 +173,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) sizeof(struct hv_netvsc_packet) + (num_pages * sizeof(struct hv_page_buffer)); - /* Setup the rndis header */ - packet->page_buf_cnt = num_pages; + /* If the rndis msg goes beyond 1 page, we will add 1 later */ + packet->page_buf_cnt = num_pages - 1; /* Initialize it from the skb */ packet->total_data_buflen = skb->len; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index dc2e3849573..133b7fbf859 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -797,6 +797,19 @@ int rndis_filter_send(struct hv_device *dev, (unsigned long)rndisMessage & (PAGE_SIZE-1); pkt->page_buf[0].len = rndisMessageSize; + /* Add one page_buf if the rndis msg goes beyond page boundary */ + if (pkt->page_buf[0].offset + rndisMessageSize > PAGE_SIZE) { + int i; + for (i = pkt->page_buf_cnt; i > 1; i--) + pkt->page_buf[i] = pkt->page_buf[i-1]; + pkt->page_buf_cnt++; + pkt->page_buf[0].len = PAGE_SIZE - pkt->page_buf[0].offset; + pkt->page_buf[1].pfn = virt_to_phys((void *)((ulong) + rndisMessage + pkt->page_buf[0].len)) >> PAGE_SHIFT; + pkt->page_buf[1].offset = 0; + pkt->page_buf[1].len = rndisMessageSize - pkt->page_buf[0].len; + } + /* Save the packet send completion and context */ filterPacket->completion = pkt->completion.send.send_completion; filterPacket->completion_ctx = diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 62b908e0e59..0ae065a5fcb 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -35,7 +35,7 @@ #include -#define MAX_PAGE_BUFFER_COUNT 18 +#define MAX_PAGE_BUFFER_COUNT 19 #define MAX_MULTIPAGE_BUFFER_COUNT 32 /* 128K */ #pragma pack(push, 1) -- cgit v1.2.3-70-g09d2 From 8cdb878dcb359fd1137e9abdee9322f5e9bcfdf8 Mon Sep 17 00:00:00 2001 From: Christopher Yeoh Date: Thu, 2 Feb 2012 11:34:09 +1030 Subject: Fix race in process_vm_rw_core This fixes the race in process_vm_core found by Oleg (see http://article.gmane.org/gmane.linux.kernel/1235667/ for details). This has been updated since I last sent it as the creation of the new mm_access() function did almost exactly the same thing as parts of the previous version of this patch did. In order to use mm_access() even when /proc isn't enabled, we move it to kernel/fork.c where other related process mm access functions already are. Signed-off-by: Chris Yeoh Signed-off-by: Linus Torvalds --- fs/proc/base.c | 20 -------------------- include/linux/sched.h | 6 ++++++ kernel/fork.c | 20 ++++++++++++++++++++ mm/process_vm_access.c | 23 +++++++++-------------- 4 files changed, 35 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/fs/proc/base.c b/fs/proc/base.c index d9512bd03e6..d4548dd49b0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) -{ - struct mm_struct *mm; - int err; - - err = mutex_lock_killable(&task->signal->cred_guard_mutex); - if (err) - return ERR_PTR(err); - - mm = get_task_mm(task); - if (mm && mm != current->mm && - !ptrace_may_access(task, mode)) { - mmput(mm); - mm = ERR_PTR(-EACCES); - } - mutex_unlock(&task->signal->cred_guard_mutex); - - return mm; -} - struct mm_struct *mm_for_maps(struct task_struct *task) { return mm_access(task, PTRACE_MODE_READ); diff --git a/include/linux/sched.h b/include/linux/sched.h index 2234985a5e6..7d379a6bfd8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2259,6 +2259,12 @@ static inline void mmdrop(struct mm_struct * mm) extern void mmput(struct mm_struct *); /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); +/* + * Grab a reference to a task's mm, if it is not already going away + * and ptrace_may_access with the mode parameter passed to it + * succeeds. + */ +extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); /* Allocate a new mm structure and copy contents from tsk->mm */ diff --git a/kernel/fork.c b/kernel/fork.c index 051f090d40c..1b2ef3c23ae 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -647,6 +647,26 @@ struct mm_struct *get_task_mm(struct task_struct *task) } EXPORT_SYMBOL_GPL(get_task_mm); +struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) +{ + struct mm_struct *mm; + int err; + + err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return ERR_PTR(err); + + mm = get_task_mm(task); + if (mm && mm != current->mm && + !ptrace_may_access(task, mode)) { + mmput(mm); + mm = ERR_PTR(-EACCES); + } + mutex_unlock(&task->signal->cred_guard_mutex); + + return mm; +} + /* Please note the differences between mmput and mm_release. * mmput is called whenever we stop holding onto a mm_struct, * error success whatever. diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index e920aa3ce10..c20ff48994c 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -298,23 +298,18 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec, goto free_proc_pages; } - task_lock(task); - if (__ptrace_may_access(task, PTRACE_MODE_ATTACH)) { - task_unlock(task); - rc = -EPERM; - goto put_task_struct; - } - mm = task->mm; - - if (!mm || (task->flags & PF_KTHREAD)) { - task_unlock(task); - rc = -EINVAL; + mm = mm_access(task, PTRACE_MODE_ATTACH); + if (!mm || IS_ERR(mm)) { + rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; + /* + * Explicitly map EACCES to EPERM as EPERM is a more a + * appropriate error code for process_vw_readv/writev + */ + if (rc == -EACCES) + rc = -EPERM; goto put_task_struct; } - atomic_inc(&mm->mm_users); - task_unlock(task); - for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) { rc = process_vm_rw_single_vec( (unsigned long)rvec[i].iov_base, rvec[i].iov_len, -- cgit v1.2.3-70-g09d2 From ff05f603c3238010769787f3ba54c48c290ed3e5 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 2 Feb 2012 15:29:08 -0800 Subject: include/linux/lp8727.h: Remove executable bit Signed-off-by: Josh Triplett Signed-off-by: Linus Torvalds --- include/linux/lp8727.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 include/linux/lp8727.h (limited to 'include') diff --git a/include/linux/lp8727.h b/include/linux/lp8727.h old mode 100755 new mode 100644 -- cgit v1.2.3-70-g09d2 From f8447d6c213273b444c81eaa2449f55510229d4f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sat, 14 Jan 2012 20:58:43 +0100 Subject: mfd: Store twl6040-codec mclk configuration Store the last used mclk configuration for the PLL. Signed-off-by: Peter Ujfalusi Signed-off-by: Samuel Ortiz --- drivers/mfd/twl6040-core.c | 3 +++ include/linux/mfd/twl6040.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c index dda86293dc9..c2088f4c454 100644 --- a/drivers/mfd/twl6040-core.c +++ b/drivers/mfd/twl6040-core.c @@ -282,6 +282,7 @@ int twl6040_power(struct twl6040 *twl6040, int on) /* Default PLL configuration after power up */ twl6040->pll = TWL6040_SYSCLK_SEL_LPPLL; twl6040->sysclk = 19200000; + twl6040->mclk = 32768; } else { /* already powered-down */ if (!twl6040->power_count) { @@ -305,6 +306,7 @@ int twl6040_power(struct twl6040 *twl6040, int on) twl6040_power_down(twl6040); } twl6040->sysclk = 0; + twl6040->mclk = 0; } out: @@ -421,6 +423,7 @@ int twl6040_set_pll(struct twl6040 *twl6040, int pll_id, } twl6040->sysclk = freq_out; + twl6040->mclk = freq_in; twl6040->pll = pll_id; pll_out: diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h index 2463c261959..9bc9ac651da 100644 --- a/include/linux/mfd/twl6040.h +++ b/include/linux/mfd/twl6040.h @@ -187,8 +187,10 @@ struct twl6040 { int rev; u8 vibra_ctrl_cache[2]; + /* PLL configuration */ int pll; unsigned int sysclk; + unsigned int mclk; unsigned int irq; unsigned int irq_base; -- cgit v1.2.3-70-g09d2 From 331818f1c468a24e581aedcbe52af799366a9dfe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Feb 2012 18:30:53 -0500 Subject: NFSv4: Fix an Oops in the NFSv4 getacl code Commit bf118a342f10dafe44b14451a1392c3254629a1f (NFSv4: include bitmap in nfsv4 get acl data) introduces the 'acl_scratch' page for the case where we may need to decode multi-page data. However it fails to take into account the fact that the variable may be NULL (for the case where we're not doing multi-page decode), and it also attaches it to the encoding xdr_stream rather than the decoding one. The immediate result is an Oops in nfs4_xdr_enc_getacl due to the call to page_address() with a NULL page pointer. Signed-off-by: Trond Myklebust Cc: Andy Adamson Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 8 ++++---- fs/nfs/nfs4xdr.c | 5 ++++- include/linux/nfs_xdr.h | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f0c849c98fe..d202e04aca9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3575,8 +3575,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu } if (npages > 1) { /* for decoding across pages */ - args.acl_scratch = alloc_page(GFP_KERNEL); - if (!args.acl_scratch) + res.acl_scratch = alloc_page(GFP_KERNEL); + if (!res.acl_scratch) goto out_free; } args.acl_len = npages * PAGE_SIZE; @@ -3612,8 +3612,8 @@ out_free: for (i = 0; i < npages; i++) if (pages[i]) __free_page(pages[i]); - if (args.acl_scratch) - __free_page(args.acl_scratch); + if (res.acl_scratch) + __free_page(res.acl_scratch); return ret; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 95e92e43840..33bd8d0f745 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2522,7 +2522,6 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, args->acl_pgbase, args->acl_len); - xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE); encode_nops(&hdr); } @@ -6032,6 +6031,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, struct compound_hdr hdr; int status; + if (res->acl_scratch != NULL) { + void *p = page_address(res->acl_scratch); + xdr_set_scratch_buffer(xdr, p, PAGE_SIZE); + } status = decode_compound_hdr(xdr, &hdr); if (status) goto out; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a764cef06b7..d6ba9a12591 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -614,7 +614,6 @@ struct nfs_getaclargs { size_t acl_len; unsigned int acl_pgbase; struct page ** acl_pages; - struct page * acl_scratch; struct nfs4_sequence_args seq_args; }; @@ -624,6 +623,7 @@ struct nfs_getaclres { size_t acl_len; size_t acl_data_offset; int acl_flags; + struct page * acl_scratch; struct nfs4_sequence_res seq_res; }; -- cgit v1.2.3-70-g09d2 From d020283dc694c9ec31b410f522252f7a8397e67d Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 3 Feb 2012 22:22:25 +0100 Subject: PM / QoS: CPU C-state breakage with PM Qos change Looks like change "PM QoS: Move and rename the implementation files" merged during the 3.2 development cycle made PM QoS depend on CONFIG_PM which depends on (PM_SLEEP || PM_RUNTIME). That breaks CPU C-states with kernels not having these CONFIGs, causing CPUs to spend time in Polling loop idle instead of going into deep C-states, consuming way way more power. This is with either acpi idle or intel idle enabled. Either CONFIG_PM should be enabled with any pm_qos users or the !CONFIG_PM pm_qos_request() should return sane defaults not to break the existing users. Here's is the patch for the latter option. [rjw: Modified the changelog slightly.] Signed-off-by: Venkatesh Pallipadi Signed-off-by: Rafael J. Wysocki Cc: stable@vger.kernel.org --- include/linux/pm_qos.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index e5bbcbaa6f5..4d99e4e6ef8 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -110,7 +110,19 @@ static inline void pm_qos_remove_request(struct pm_qos_request *req) { return; } static inline int pm_qos_request(int pm_qos_class) - { return 0; } +{ + switch (pm_qos_class) { + case PM_QOS_CPU_DMA_LATENCY: + return PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE; + case PM_QOS_NETWORK_LATENCY: + return PM_QOS_NETWORK_LAT_DEFAULT_VALUE; + case PM_QOS_NETWORK_THROUGHPUT: + return PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE; + default: + return PM_QOS_DEFAULT_VALUE; + } +} + static inline int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) { return 0; } -- cgit v1.2.3-70-g09d2 From e6b45241c57a83197e5de9166b3b0d32ac562609 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 4 Feb 2012 13:04:46 +0000 Subject: ipv4: reset flowi parameters on route connect Eric Dumazet found that commit 813b3b5db83 (ipv4: Use caller's on-stack flowi as-is in output route lookups.) that comes in 3.0 added a regression. The problem appears to be that resulting flowi4_oif is used incorrectly as input parameter to some routing lookups. The result is that when connecting to local port without listener if the IP address that is used is not on a loopback interface we incorrectly assign RTN_UNICAST to the output route because no route is matched by oif=lo. The RST packet can not be sent immediately by tcp_v4_send_reset because it expects RTN_LOCAL. So, change ip_route_connect and ip_route_newports to update the flowi4 fields that are input parameters because we do not want unnecessary binding to oif. To make it clear what are the input parameters that can be modified during lookup and to show which fields of floiw4 are reused add a new function to update the flowi4 structure: flowi4_update_output. Thanks to Yurij M. Plotnikov for providing a bug report including a program to reproduce the problem. Thanks to Eric Dumazet for tracking the problem down to tcp_v4_send_reset and providing initial fix. Reported-by: Yurij M. Plotnikov Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/flow.h | 10 ++++++++++ include/net/route.h | 4 ++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 9b582437fbe..6c469dbdb91 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -93,6 +93,16 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->fl4_dport = dport; fl4->fl4_sport = sport; } + +/* Reset some input parameters after previous lookup */ +static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos, + __be32 daddr, __be32 saddr) +{ + fl4->flowi4_oif = oif; + fl4->flowi4_tos = tos; + fl4->daddr = daddr; + fl4->saddr = saddr; +} struct flowi6 { diff --git a/include/net/route.h b/include/net/route.h index 91855d185b5..b1c0d5b564c 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -270,6 +270,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, if (IS_ERR(rt)) return rt; ip_rt_put(rt); + flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr); } security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -284,6 +285,9 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable fl4->fl4_dport = dport; fl4->fl4_sport = sport; ip_rt_put(rt); + flowi4_update_output(fl4, sk->sk_bound_dev_if, + RT_CONN_FLAGS(sk), fl4->daddr, + fl4->saddr); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(sock_net(sk), fl4, sk); } -- cgit v1.2.3-70-g09d2 From 977b7e3a52a7421ad33a393a38ece59f3d41c2fa Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sat, 4 Feb 2012 20:54:03 -0600 Subject: writeback: fix dereferencing NULL bdi->dev on trace_writeback_queue When a SD card is hot removed without umount, del_gendisk() will call bdi_unregister() without destroying/freeing it. This leaves the bdi in the bdi->dev = NULL, bdi->wb.task = NULL, bdi->bdi_list removed state. When sync(2) gets the bdi before bdi_unregister() and calls bdi_queue_work() after the unregister, trace_writeback_queue will be dereferencing the NULL bdi->dev. Fix it with a simple test for NULL. LKML-reference: http://lkml.org/lkml/2012/1/18/346 Cc: stable@kernel.org Reported-by: Rabin Vincent Tested-by: Namjae Jeon Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 06d302ebcb7..5973410e8f8 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -47,7 +47,10 @@ DECLARE_EVENT_CLASS(writeback_work_class, __field(int, reason) ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + struct device *dev = bdi->dev; + if (!dev) + dev = default_backing_dev_info.dev; + strncpy(__entry->name, dev_name(dev), 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; -- cgit v1.2.3-70-g09d2 From 96e02d1586782eadf051fa3d6bc4132d2447ac2c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 4 Feb 2012 10:47:10 +0100 Subject: exec: fix use-after-free bug in setup_new_exec() Setting the task name is done within setup_new_exec() by accessing bprm->filename. However this happens after flush_old_exec(). This may result in a use after free bug, flush_old_exec() may "complete" vfork_done, which will wake up the parent which in turn may free the passed in filename. To fix this add a new tcomm field in struct linux_binprm which contains the now early generated task name until it is used. Fixes this bug on s390: Unable to handle kernel pointer dereference at virtual kernel address 0000000039768000 Process kworker/u:3 (pid: 245, task: 000000003a3dc840, ksp: 0000000039453818) Krnl PSW : 0704000180000000 0000000000282e94 (setup_new_exec+0xa0/0x374) Call Trace: ([<0000000000282e2c>] setup_new_exec+0x38/0x374) [<00000000002dd12e>] load_elf_binary+0x402/0x1bf4 [<0000000000280a42>] search_binary_handler+0x38e/0x5bc [<0000000000282b6c>] do_execve_common+0x410/0x514 [<0000000000282cb6>] do_execve+0x46/0x58 [<00000000005bce58>] kernel_execve+0x28/0x70 [<000000000014ba2e>] ____call_usermodehelper+0x102/0x140 [<00000000005bc8da>] kernel_thread_starter+0x6/0xc [<00000000005bc8d4>] kernel_thread_starter+0x0/0xc Last Breaking-Event-Address: [<00000000002830f0>] setup_new_exec+0x2fc/0x374 Kernel panic - not syncing: Fatal exception: panic_on_oops Reported-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Linus Torvalds --- fs/exec.c | 33 +++++++++++++++++---------------- include/linux/binfmts.h | 3 ++- 2 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index aeb135c7ff5..92ce83a11e9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1071,6 +1071,21 @@ void set_task_comm(struct task_struct *tsk, char *buf) perf_event_comm(tsk); } +static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len) +{ + int i, ch; + + /* Copies the binary name from after last slash */ + for (i = 0; (ch = *(fn++)) != '\0';) { + if (ch == '/') + i = 0; /* overwrite what we wrote */ + else + if (i < len - 1) + tcomm[i++] = ch; + } + tcomm[i] = '\0'; +} + int flush_old_exec(struct linux_binprm * bprm) { int retval; @@ -1085,6 +1100,7 @@ int flush_old_exec(struct linux_binprm * bprm) set_mm_exe_file(bprm->mm, bprm->file); + filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm)); /* * Release all of the old mmap stuff */ @@ -1116,10 +1132,6 @@ EXPORT_SYMBOL(would_dump); void setup_new_exec(struct linux_binprm * bprm) { - int i, ch; - const char *name; - char tcomm[sizeof(current->comm)]; - arch_pick_mmap_layout(current->mm); /* This is the point of no return */ @@ -1130,18 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); - name = bprm->filename; - - /* Copies the binary name from after last slash */ - for (i=0; (ch = *(name++)) != '\0';) { - if (ch == '/') - i = 0; /* overwrite what we wrote */ - else - if (i < (sizeof(tcomm) - 1)) - tcomm[i++] = ch; - } - tcomm[i] = '\0'; - set_task_comm(current, tcomm); + set_task_comm(current, bprm->tcomm); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index fd88a3945aa..0092102db2d 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -18,7 +18,7 @@ struct pt_regs; #define BINPRM_BUF_SIZE 128 #ifdef __KERNEL__ -#include +#include #define CORENAME_MAX_SIZE 128 @@ -58,6 +58,7 @@ struct linux_binprm { unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; + char tcomm[TASK_COMM_LEN]; }; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 -- cgit v1.2.3-70-g09d2 From 1edcdb497ef418122cd4f98e157660cf594b345a Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 19 Jan 2012 13:39:23 -0800 Subject: target: Change target_submit_cmd() to return void Retval not very useful, and may even be harmful. Once submitted, fabrics should expect a sense error if anything goes wrong. All fabrics checking of this retval are useless or broken: fc checks it just to emit more debug output. ib_srpt trickles retval up, then it is ignored. qla2xxx trickles it up, which then causes a bug because the abort goto in qla_target.c thinks cmd hasn't been sent to target. Just returning nothing is best. Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 5 ++--- drivers/target/tcm_fc/tfc_cmd.c | 9 ++------- include/target/target_core_fabric.h | 2 +- 3 files changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index cf996d81cfc..aace7ee141f 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1657,7 +1657,7 @@ EXPORT_SYMBOL(transport_handle_cdb_direct); * This may only be called from process context, and also currently * assumes internal allocation of fabric payload buffer by target-core. **/ -int target_submit_cmd(struct se_cmd *se_cmd, struct se_session *se_sess, +void target_submit_cmd(struct se_cmd *se_cmd, struct se_session *se_sess, unsigned char *cdb, unsigned char *sense, u32 unpacked_lun, u32 data_length, int task_attr, int data_dir, int flags) { @@ -1706,12 +1706,11 @@ int target_submit_cmd(struct se_cmd *se_cmd, struct se_session *se_sess, * when fabric has filled the incoming buffer. */ transport_handle_cdb_direct(se_cmd); - return 0; + return; out_check_cond: transport_send_check_condition_and_sense(se_cmd, se_cmd->scsi_sense_reason, 0); - return 0; } EXPORT_SYMBOL(target_submit_cmd); diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index addc18f727e..9e7e26c74c7 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -540,7 +540,6 @@ static void ft_send_work(struct work_struct *work) int data_dir = 0; u32 data_len; int task_attr; - int ret; fcp = fc_frame_payload_get(cmd->req_frame, sizeof(*fcp)); if (!fcp) @@ -603,14 +602,10 @@ static void ft_send_work(struct work_struct *work) * Use a single se_cmd->cmd_kref as we expect to release se_cmd * directly from ft_check_stop_free callback in response path. */ - ret = target_submit_cmd(&cmd->se_cmd, cmd->sess->se_sess, cmd->cdb, + target_submit_cmd(&cmd->se_cmd, cmd->sess->se_sess, cmd->cdb, &cmd->ft_sense_buffer[0], cmd->lun, data_len, task_attr, data_dir, 0); - pr_debug("r_ctl %x alloc target_submit_cmd %d\n", fh->fh_r_ctl, ret); - if (ret < 0) { - ft_dump_cmd(cmd, __func__); - return; - } + pr_debug("r_ctl %x alloc target_submit_cmd\n", fh->fh_r_ctl); return; err: diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 523e8bc104d..d36fad317e7 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -114,7 +114,7 @@ void transport_init_se_cmd(struct se_cmd *, struct target_core_fabric_ops *, struct se_session *, u32, int, int, unsigned char *); int transport_lookup_cmd_lun(struct se_cmd *, u32); int transport_generic_allocate_tasks(struct se_cmd *, unsigned char *); -int target_submit_cmd(struct se_cmd *, struct se_session *, unsigned char *, +void target_submit_cmd(struct se_cmd *, struct se_session *, unsigned char *, unsigned char *, u32, u32, int, int, int); int transport_handle_cdb_direct(struct se_cmd *); int transport_generic_handle_cdb_map(struct se_cmd *); -- cgit v1.2.3-70-g09d2 From 11a3122f6cf2d988a77eb8883d0fc49cd013a6d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 7 Feb 2012 07:51:30 +0100 Subject: block: strip out locking optimization in put_io_context() put_io_context() performed a complex trylock dancing to avoid deferring ioc release to workqueue. It was also broken on UP because trylock was always assumed to succeed which resulted in unbalanced preemption count. While there are ways to fix the UP breakage, even the most pathological microbench (forced ioc allocation and tight fork/exit loop) fails to show any appreciable performance benefit of the optimization. Strip it out. If there turns out to be workloads which are affected by this change, simpler optimization from the discussion thread can be applied later. Signed-off-by: Tejun Heo LKML-Reference: <1328514611.21268.66.camel@sli10-conroe> Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 2 +- block/blk-core.c | 2 +- block/blk-ioc.c | 92 ++++++----------------------------------------- block/cfq-iosched.c | 2 +- fs/ioprio.c | 2 +- include/linux/blkdev.h | 3 -- include/linux/iocontext.h | 5 ++- kernel/fork.c | 2 +- 8 files changed, 18 insertions(+), 92 deletions(-) (limited to 'include') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index fa8f2630944..75642a352a8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1659,7 +1659,7 @@ static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); if (ioc) { ioc_cgroup_changed(ioc); - put_io_context(ioc, NULL); + put_io_context(ioc); } } } diff --git a/block/blk-core.c b/block/blk-core.c index 63670257511..532b3a21b38 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -642,7 +642,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq) if (rq->cmd_flags & REQ_ELVPRIV) { elv_put_request(q, rq); if (rq->elv.icq) - put_io_context(rq->elv.icq->ioc, q); + put_io_context(rq->elv.icq->ioc); } mempool_free(rq, q->rq.rq_pool); diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 7490b6da245..9884fd7427f 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -29,21 +29,6 @@ void get_io_context(struct io_context *ioc) } EXPORT_SYMBOL(get_io_context); -/* - * Releasing ioc may nest into another put_io_context() leading to nested - * fast path release. As the ioc's can't be the same, this is okay but - * makes lockdep whine. Keep track of nesting and use it as subclass. - */ -#ifdef CONFIG_LOCKDEP -#define ioc_release_depth(q) ((q) ? (q)->ioc_release_depth : 0) -#define ioc_release_depth_inc(q) (q)->ioc_release_depth++ -#define ioc_release_depth_dec(q) (q)->ioc_release_depth-- -#else -#define ioc_release_depth(q) 0 -#define ioc_release_depth_inc(q) do { } while (0) -#define ioc_release_depth_dec(q) do { } while (0) -#endif - static void icq_free_icq_rcu(struct rcu_head *head) { struct io_cq *icq = container_of(head, struct io_cq, __rcu_head); @@ -75,11 +60,8 @@ static void ioc_exit_icq(struct io_cq *icq) if (rcu_dereference_raw(ioc->icq_hint) == icq) rcu_assign_pointer(ioc->icq_hint, NULL); - if (et->ops.elevator_exit_icq_fn) { - ioc_release_depth_inc(q); + if (et->ops.elevator_exit_icq_fn) et->ops.elevator_exit_icq_fn(icq); - ioc_release_depth_dec(q); - } /* * @icq->q might have gone away by the time RCU callback runs @@ -149,81 +131,29 @@ static void ioc_release_fn(struct work_struct *work) /** * put_io_context - put a reference of io_context * @ioc: io_context to put - * @locked_q: request_queue the caller is holding queue_lock of (hint) * * Decrement reference count of @ioc and release it if the count reaches - * zero. If the caller is holding queue_lock of a queue, it can indicate - * that with @locked_q. This is an optimization hint and the caller is - * allowed to pass in %NULL even when it's holding a queue_lock. + * zero. */ -void put_io_context(struct io_context *ioc, struct request_queue *locked_q) +void put_io_context(struct io_context *ioc) { - struct request_queue *last_q = locked_q; unsigned long flags; if (ioc == NULL) return; BUG_ON(atomic_long_read(&ioc->refcount) <= 0); - if (locked_q) - lockdep_assert_held(locked_q->queue_lock); - - if (!atomic_long_dec_and_test(&ioc->refcount)) - return; /* - * Destroy @ioc. This is a bit messy because icq's are chained - * from both ioc and queue, and ioc->lock nests inside queue_lock. - * The inner ioc->lock should be held to walk our icq_list and then - * for each icq the outer matching queue_lock should be grabbed. - * ie. We need to do reverse-order double lock dancing. - * - * Another twist is that we are often called with one of the - * matching queue_locks held as indicated by @locked_q, which - * prevents performing double-lock dance for other queues. - * - * So, we do it in two stages. The fast path uses the queue_lock - * the caller is holding and, if other queues need to be accessed, - * uses trylock to avoid introducing locking dependency. This can - * handle most cases, especially if @ioc was performing IO on only - * single device. - * - * If trylock doesn't cut it, we defer to @ioc->release_work which - * can do all the double-locking dancing. + * Releasing ioc requires reverse order double locking and we may + * already be holding a queue_lock. Do it asynchronously from wq. */ - spin_lock_irqsave_nested(&ioc->lock, flags, - ioc_release_depth(locked_q)); - - while (!hlist_empty(&ioc->icq_list)) { - struct io_cq *icq = hlist_entry(ioc->icq_list.first, - struct io_cq, ioc_node); - struct request_queue *this_q = icq->q; - - if (this_q != last_q) { - if (last_q && last_q != locked_q) - spin_unlock(last_q->queue_lock); - last_q = NULL; - - /* spin_trylock() always successes in UP case */ - if (this_q != locked_q && - !spin_trylock(this_q->queue_lock)) - break; - last_q = this_q; - continue; - } - ioc_exit_icq(icq); + if (atomic_long_dec_and_test(&ioc->refcount)) { + spin_lock_irqsave(&ioc->lock, flags); + if (!hlist_empty(&ioc->icq_list)) + schedule_work(&ioc->release_work); + spin_unlock_irqrestore(&ioc->lock, flags); } - - if (last_q && last_q != locked_q) - spin_unlock(last_q->queue_lock); - - spin_unlock_irqrestore(&ioc->lock, flags); - - /* if no icq is left, we're done; otherwise, kick release_work */ - if (hlist_empty(&ioc->icq_list)) - kmem_cache_free(iocontext_cachep, ioc); - else - schedule_work(&ioc->release_work); } EXPORT_SYMBOL(put_io_context); @@ -238,7 +168,7 @@ void exit_io_context(struct task_struct *task) task_unlock(task); atomic_dec(&ioc->nr_tasks); - put_io_context(ioc, NULL); + put_io_context(ioc); } /** diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index da21c24dbed..5684df6848b 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1794,7 +1794,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqd->active_queue = NULL; if (cfqd->active_cic) { - put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue); + put_io_context(cfqd->active_cic->icq.ioc); cfqd->active_cic = NULL; } } diff --git a/fs/ioprio.c b/fs/ioprio.c index f84b380d65e..0f1b9515213 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); if (ioc) { ioc_ioprio_changed(ioc, ioprio); - put_io_context(ioc, NULL); + put_io_context(ioc); } return err; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6c6a1f00806..606cf339bb5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -399,9 +399,6 @@ struct request_queue { /* Throttle data */ struct throtl_data *td; #endif -#ifdef CONFIG_LOCKDEP - int ioc_release_depth; -#endif }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 7e1371c4bcc..119773eebe3 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -133,7 +133,7 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) struct task_struct; #ifdef CONFIG_BLOCK -void put_io_context(struct io_context *ioc, struct request_queue *locked_q); +void put_io_context(struct io_context *ioc); void exit_io_context(struct task_struct *task); struct io_context *get_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node); @@ -141,8 +141,7 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio); void ioc_cgroup_changed(struct io_context *ioc); #else struct io_context; -static inline void put_io_context(struct io_context *ioc, - struct request_queue *locked_q) { } +static inline void put_io_context(struct io_context *ioc) { } static inline void exit_io_context(struct task_struct *task) { } #endif diff --git a/kernel/fork.c b/kernel/fork.c index 051f090d40c..c574aefa8d1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -890,7 +890,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) return -ENOMEM; new_ioc->ioprio = ioc->ioprio; - put_io_context(new_ioc, NULL); + put_io_context(new_ioc); } #endif return 0; -- cgit v1.2.3-70-g09d2 From 050c8ea80e3e90019d9e981c6a117ef614e882ed Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 8 Feb 2012 09:19:38 +0100 Subject: block: separate out blk_rq_merge_ok() and blk_try_merge() from elevator functions blk_rq_merge_ok() is the elevator-neutral part of merge eligibility test. blk_try_merge() determines merge direction and expects the caller to have tested elv_rq_merge_ok() previously. elv_rq_merge_ok() now wraps blk_rq_merge_ok() and then calls elv_iosched_allow_merge(). elv_try_merge() is removed and the two callers are updated to call elv_rq_merge_ok() explicitly followed by blk_try_merge(). While at it, make rq_merge_ok() functions return bool. This is to prepare for plug merge update and doesn't introduce any behavior change. This is based on Jens' patch to skip elevator_allow_merge_fn() from plug merge. Signed-off-by: Tejun Heo LKML-Reference: <4F16F3CA.90904@kernel.dk> Original-patch-by: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++-- block/blk-merge.c | 37 ++++++++++++++++++++++++++++++++ block/blk.h | 2 ++ block/elevator.c | 55 ++++-------------------------------------------- include/linux/elevator.h | 3 +-- 5 files changed, 46 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 532b3a21b38..fa697bf691e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1282,10 +1282,10 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, (*request_count)++; - if (rq->q != q) + if (rq->q != q || !elv_rq_merge_ok(rq, bio)) continue; - el_ret = elv_try_merge(rq, bio); + el_ret = blk_try_merge(rq, bio); if (el_ret == ELEVATOR_BACK_MERGE) { ret = bio_attempt_back_merge(q, rq, bio); if (ret) diff --git a/block/blk-merge.c b/block/blk-merge.c index cfcc37cb222..160035f5488 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -471,3 +471,40 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, { return attempt_merge(q, rq, next); } + +bool blk_rq_merge_ok(struct request *rq, struct bio *bio) +{ + if (!rq_mergeable(rq)) + return false; + + /* don't merge file system requests and discard requests */ + if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD)) + return false; + + /* don't merge discard requests and secure discard requests */ + if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE)) + return false; + + /* different data direction or already started, don't merge */ + if (bio_data_dir(bio) != rq_data_dir(rq)) + return false; + + /* must be same device and not a special request */ + if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) + return false; + + /* only merge integrity protected bio into ditto rq */ + if (bio_integrity(bio) != blk_integrity_rq(rq)) + return false; + + return true; +} + +int blk_try_merge(struct request *rq, struct bio *bio) +{ + if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector) + return ELEVATOR_BACK_MERGE; + else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector) + return ELEVATOR_FRONT_MERGE; + return ELEVATOR_NO_MERGE; +} diff --git a/block/blk.h b/block/blk.h index 7efd772336d..9c12f80882b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -137,6 +137,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next); void blk_recalc_rq_segments(struct request *rq); void blk_rq_set_mixed_merge(struct request *rq); +bool blk_rq_merge_ok(struct request *rq, struct bio *bio); +int blk_try_merge(struct request *rq, struct bio *bio); void blk_queue_congestion_threshold(struct request_queue *q); diff --git a/block/elevator.c b/block/elevator.c index 91e18f8af9b..f016855a46b 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -70,39 +70,9 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) /* * can we safely merge with this request? */ -int elv_rq_merge_ok(struct request *rq, struct bio *bio) +bool elv_rq_merge_ok(struct request *rq, struct bio *bio) { - if (!rq_mergeable(rq)) - return 0; - - /* - * Don't merge file system requests and discard requests - */ - if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD)) - return 0; - - /* - * Don't merge discard requests and secure discard requests - */ - if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE)) - return 0; - - /* - * different data direction or already started, don't merge - */ - if (bio_data_dir(bio) != rq_data_dir(rq)) - return 0; - - /* - * must be same device and not a special request - */ - if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) - return 0; - - /* - * only merge integrity protected bio into ditto rq - */ - if (bio_integrity(bio) != blk_integrity_rq(rq)) + if (!blk_rq_merge_ok(rq, bio)) return 0; if (!elv_iosched_allow_merge(rq, bio)) @@ -112,23 +82,6 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) } EXPORT_SYMBOL(elv_rq_merge_ok); -int elv_try_merge(struct request *__rq, struct bio *bio) -{ - int ret = ELEVATOR_NO_MERGE; - - /* - * we can merge and sequence is ok, check if it's possible - */ - if (elv_rq_merge_ok(__rq, bio)) { - if (blk_rq_pos(__rq) + blk_rq_sectors(__rq) == bio->bi_sector) - ret = ELEVATOR_BACK_MERGE; - else if (blk_rq_pos(__rq) - bio_sectors(bio) == bio->bi_sector) - ret = ELEVATOR_FRONT_MERGE; - } - - return ret; -} - static struct elevator_type *elevator_find(const char *name) { struct elevator_type *e; @@ -478,8 +431,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) /* * First try one-hit cache. */ - if (q->last_merge) { - ret = elv_try_merge(q->last_merge, bio); + if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) { + ret = blk_try_merge(q->last_merge, bio); if (ret != ELEVATOR_NO_MERGE) { *req = q->last_merge; return ret; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index c24f3d7fbf1..d6dfb65c888 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -122,7 +122,6 @@ extern void elv_dispatch_add_tail(struct request_queue *, struct request *); extern void elv_add_request(struct request_queue *, struct request *, int); extern void __elv_add_request(struct request_queue *, struct request *, int); extern int elv_merge(struct request_queue *, struct request **, struct bio *); -extern int elv_try_merge(struct request *, struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); extern void elv_merged_request(struct request_queue *, struct request *, int); @@ -155,7 +154,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct elevator_queue *); extern int elevator_change(struct request_queue *, const char *); -extern int elv_rq_merge_ok(struct request *, struct bio *); +extern bool elv_rq_merge_ok(struct request *, struct bio *); /* * Helper functions. -- cgit v1.2.3-70-g09d2 From 07c2bd37350c9b1af71b35d05f16e300a6602948 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 8 Feb 2012 09:19:42 +0100 Subject: block: don't call elevator callbacks for plug merges Plug merge calls two elevator callbacks outside queue lock - elevator_allow_merge_fn() and elevator_bio_merged_fn(). Although attempt_plug_merge() suggests that elevator is guaranteed to be there through the existing request on the plug list, nothing prevents plug merge from calling into dying or initializing elevator. For regular merges, bypass ensures elvpriv count to reach zero, which in turn prevents merges as all !ELVPRIV requests get REQ_SOFTBARRIER from forced back insertion. Plug merge doesn't check ELVPRIV, and, as the requests haven't gone through elevator insertion yet, it doesn't have SOFTBARRIER set allowing merges on a bypassed queue. This, for example, leads to the following crash during elevator switch. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [] cfq_allow_merge+0x49/0xa0 PGD 112cbc067 PUD 115d5c067 PMD 0 Oops: 0000 [#1] PREEMPT SMP CPU 1 Modules linked in: deadline_iosched Pid: 819, comm: dd Not tainted 3.3.0-rc2-work+ #76 Bochs Bochs RIP: 0010:[] [] cfq_allow_merge+0x49/0xa0 RSP: 0018:ffff8801143a38f8 EFLAGS: 00010297 RAX: 0000000000000000 RBX: ffff88011817ce28 RCX: ffff880116eb6cc0 RDX: 0000000000000000 RSI: ffff880118056e20 RDI: ffff8801199512f8 RBP: ffff8801143a3908 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff880118195708 R13: ffff880118052aa0 R14: ffff8801143a3d50 R15: ffff880118195708 FS: 00007f19f82cb700(0000) GS:ffff88011fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000008 CR3: 0000000112c6a000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process dd (pid: 819, threadinfo ffff8801143a2000, task ffff880116eb6cc0) Stack: ffff88011817ce28 ffff880118195708 ffff8801143a3928 ffffffff81391bba ffff88011817ce28 ffff880118195708 ffff8801143a3948 ffffffff81391bf1 ffff88011817ce28 0000000000000000 ffff8801143a39a8 ffffffff81398e3e Call Trace: [] elv_rq_merge_ok+0x4a/0x60 [] elv_try_merge+0x21/0x40 [] blk_queue_bio+0x8e/0x390 [] generic_make_request+0xca/0x100 [] submit_bio+0x74/0x100 [] __blockdev_direct_IO+0x1ce2/0x3450 [] blkdev_direct_IO+0x57/0x60 [] generic_file_aio_read+0x6d5/0x760 [] do_sync_read+0xe2/0x120 [] vfs_read+0xc5/0x180 [] sys_read+0x51/0x90 [] system_call_fastpath+0x16/0x1b There are multiple ways to fix this including making plug merge check ELVPRIV; however, * Calling into elevator outside queue lock is confusing and error-prone. * Requests on plug list aren't known to the elevator. They aren't on the elevator yet, so there's no elevator specific state to update. * Given the nature of plug merges - collecting bio's for the same purpose from the same issuer - elevator specific restrictions aren't applicable. So, simply don't call into elevator methods from plug merge by moving elv_bio_merged() from bio_attempt_*_merge() to blk_queue_bio(), and using blk_try_merge() in attempt_plug_merge(). This is based on Jens' patch to skip elevator_allow_merge_fn() from plug merge. Note that this makes per-cgroup merged stats skip plug merging. Signed-off-by: Tejun Heo LKML-Reference: <4F16F3CA.90904@kernel.dk> Original-patch-by: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-core.c | 19 +++++++++---------- block/cfq-iosched.c | 15 ++++----------- include/linux/elevator.h | 6 ------ 3 files changed, 13 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index fa697bf691e..3a78b00edd7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1212,7 +1212,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); drive_stat_acct(req, 0); - elv_bio_merged(q, req, bio); return true; } @@ -1243,7 +1242,6 @@ static bool bio_attempt_front_merge(struct request_queue *q, req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); drive_stat_acct(req, 0); - elv_bio_merged(q, req, bio); return true; } @@ -1257,13 +1255,12 @@ static bool bio_attempt_front_merge(struct request_queue *q, * on %current's plugged list. Returns %true if merge was successful, * otherwise %false. * - * This function is called without @q->queue_lock; however, elevator is - * accessed iff there already are requests on the plugged list which in - * turn guarantees validity of the elevator. - * - * Note that, on successful merge, elevator operation - * elevator_bio_merged_fn() will be called without queue lock. Elevator - * must be ready for this. + * Plugging coalesces IOs from the same issuer for the same purpose without + * going through @q->queue_lock. As such it's more of an issuing mechanism + * than scheduling, and the request, while may have elvpriv data, is not + * added on the elevator at this point. In addition, we don't have + * reliable access to the elevator outside queue lock. Only check basic + * merging parameters without querying the elevator. */ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int *request_count) @@ -1282,7 +1279,7 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, (*request_count)++; - if (rq->q != q || !elv_rq_merge_ok(rq, bio)) + if (rq->q != q || !blk_rq_merge_ok(rq, bio)) continue; el_ret = blk_try_merge(rq, bio); @@ -1347,12 +1344,14 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) el_ret = elv_merge(q, &req, bio); if (el_ret == ELEVATOR_BACK_MERGE) { if (bio_attempt_back_merge(q, req, bio)) { + elv_bio_merged(q, req, bio); if (!attempt_back_merge(q, req)) elv_merged_request(q, req, el_ret); goto out_unlock; } } else if (el_ret == ELEVATOR_FRONT_MERGE) { if (bio_attempt_front_merge(q, req, bio)) { + elv_bio_merged(q, req, bio); if (!attempt_front_merge(q, req)) elv_merged_request(q, req, el_ret); goto out_unlock; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5684df6848b..d0ba5053366 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1699,18 +1699,11 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, /* * Lookup the cfqq that this bio will be queued with and allow - * merge only if rq is queued there. This function can be called - * from plug merge without queue_lock. In such cases, ioc of @rq - * and %current are guaranteed to be equal. Avoid lookup which - * requires queue_lock by using @rq's cic. + * merge only if rq is queued there. */ - if (current->io_context == RQ_CIC(rq)->icq.ioc) { - cic = RQ_CIC(rq); - } else { - cic = cfq_cic_lookup(cfqd, current->io_context); - if (!cic) - return false; - } + cic = cfq_cic_lookup(cfqd, current->io_context); + if (!cic) + return false; cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); return cfqq == RQ_CFQQ(rq); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index d6dfb65c888..7d4e0356f32 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -42,12 +42,6 @@ struct elevator_ops elevator_merged_fn *elevator_merged_fn; elevator_merge_req_fn *elevator_merge_req_fn; elevator_allow_merge_fn *elevator_allow_merge_fn; - - /* - * Used for both plugged list and elevator merging and in the - * former case called without queue_lock. Read comment on top of - * attempt_plug_merge() for details. - */ elevator_bio_merged_fn *elevator_bio_merged_fn; elevator_dispatch_fn *elevator_dispatch_fn; -- cgit v1.2.3-70-g09d2 From cdccaa9467b982d57b139818d15e1e994feca372 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Feb 2012 20:03:14 +0100 Subject: cdrom: move shared static to cdrom_device_info The keeplocked variable in the cdrom driver is shared across multiple drives, but set in per-device ioctls. Move it to the per-device struct, avoiding that the setting on one drive affects the driver's behavior when closing another. [ Impact: limit udev's confusion to one drive when a CD burning program unlocks the CD door at the end of burning. ] Signed-off-by: Paolo Bonzini Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 12 +++++------- include/linux/cdrom.h | 3 ++- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 8fefe59f52a..d620b449574 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -286,8 +286,6 @@ /* used to tell the module to turn on full debugging messages */ static bool debug; -/* used to keep tray locked at all times */ -static int keeplocked; /* default compatibility mode */ static bool autoclose=1; static bool autoeject; @@ -1204,7 +1202,7 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode) cdinfo(CD_CLOSE, "Use count for \"/dev/%s\" now zero\n", cdi->name); cdrom_dvd_rw_close_write(cdi); - if ((cdo->capability & CDC_LOCK) && !keeplocked) { + if ((cdo->capability & CDC_LOCK) && !cdi->keeplocked) { cdinfo(CD_CLOSE, "Unlocking door!\n"); cdo->lock_door(cdi, 0); } @@ -1371,7 +1369,7 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot) curslot = info->hdr.curslot; kfree(info); - if (cdi->use_count > 1 || keeplocked) { + if (cdi->use_count > 1 || cdi->keeplocked) { if (slot == CDSL_CURRENT) { return curslot; } else { @@ -2289,7 +2287,7 @@ static int cdrom_ioctl_eject(struct cdrom_device_info *cdi) if (!CDROM_CAN(CDC_OPEN_TRAY)) return -ENOSYS; - if (cdi->use_count != 1 || keeplocked) + if (cdi->use_count != 1 || cdi->keeplocked) return -EBUSY; if (CDROM_CAN(CDC_LOCK)) { int ret = cdi->ops->lock_door(cdi, 0); @@ -2316,7 +2314,7 @@ static int cdrom_ioctl_eject_sw(struct cdrom_device_info *cdi, if (!CDROM_CAN(CDC_OPEN_TRAY)) return -ENOSYS; - if (keeplocked) + if (cdi->keeplocked) return -EBUSY; cdi->options &= ~(CDO_AUTO_CLOSE | CDO_AUTO_EJECT); @@ -2447,7 +2445,7 @@ static int cdrom_ioctl_lock_door(struct cdrom_device_info *cdi, if (!CDROM_CAN(CDC_LOCK)) return -EDRIVE_CANT_DO_THIS; - keeplocked = arg ? 1 : 0; + cdi->keeplocked = arg ? 1 : 0; /* * Don't unlock the door on multiple opens by default, but allow diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 35eae4b6750..7c48029dffe 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -952,7 +952,8 @@ struct cdrom_device_info { char name[20]; /* name of the device type */ /* per-device flags */ __u8 sanyo_slot : 2; /* Sanyo 3 CD changer support */ - __u8 reserved : 6; /* not used yet */ + __u8 keeplocked : 1; /* CDROM_LOCKDOOR status */ + __u8 reserved : 5; /* not used yet */ int cdda_method; /* see flags */ __u8 last_sense; __u8 media_written; /* dirty flag, DVD+RW bookkeeping */ -- cgit v1.2.3-70-g09d2 From 16bda13d90c8d5da243e2cfa1677e62ecce26860 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Feb 2012 15:14:37 -0500 Subject: net: Make qdisc_skb_cb upper size bound explicit. Just like skb->cb[], so that qdisc_skb_cb can be encapsulated inside of other data structures. This is intended to be used by IPoIB so that it can remember addressing information stored at hard_header_ops->create() time that it can fetch when the packet gets to the transmit routine. Signed-off-by: David S. Miller --- include/net/sch_generic.h | 9 ++++++++- net/sched/sch_choke.c | 3 +-- net/sched/sch_netem.c | 3 +-- net/sched/sch_sfb.c | 3 +-- net/sched/sch_sfq.c | 5 ++--- 5 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f6bb08b73ca..55ce96b53b0 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -220,9 +220,16 @@ struct tcf_proto { struct qdisc_skb_cb { unsigned int pkt_len; - long data[]; + unsigned char data[24]; }; +static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) +{ + struct qdisc_skb_cb *qcb; + BUILD_BUG_ON(sizeof(skb->cb) < sizeof(unsigned int) + sz); + BUILD_BUG_ON(sizeof(qcb->data) < sz); +} + static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index e465064d39a..7e267d7b9c7 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -148,8 +148,7 @@ struct choke_skb_cb { static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb)); return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 2776012132e..e83d61ca78c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -130,8 +130,7 @@ struct netem_skb_cb { static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 96e42cae4c7..d7eea99333e 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -94,8 +94,7 @@ struct sfb_skb_cb { static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct sfb_skb_cb)); return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 67494aef9ac..60d47180f04 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -166,9 +166,8 @@ struct sfq_skb_cb { static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb)); - return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data; + qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb)); + return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data; } static unsigned int sfq_hash(const struct sfq_sched_data *q, -- cgit v1.2.3-70-g09d2 From 2b73bc65e2771372c818db7955709c8caedbf8b9 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 10 Feb 2012 05:43:38 +0000 Subject: netprio_cgroup: fix wrong memory access when NETPRIO_CGROUP=m When the netprio_cgroup module is not loaded, net_prio_subsys_id is -1, and so sock_update_prioidx() accesses cgroup_subsys array with negative index subsys[-1]. Make the code resembles cls_cgroup code, which is bug free. Origionally-authored-by: Li Zefan Signed-off-by: Li Zefan Signed-off-by: Neil Horman CC: "David S. Miller" Signed-off-by: David S. Miller --- include/net/netprio_cgroup.h | 48 ++++++++++++++++++++++++++++++++++++-------- net/core/sock.c | 7 ++----- 2 files changed, 42 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index 7b2d43139c8..d58fdec4759 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -37,19 +37,51 @@ extern int net_prio_subsys_id; extern void sock_update_netprioidx(struct sock *sk); -static inline struct cgroup_netprio_state - *task_netprio_state(struct task_struct *p) +#if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) + +static inline u32 task_netprioidx(struct task_struct *p) { -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) - return container_of(task_subsys_state(p, net_prio_subsys_id), - struct cgroup_netprio_state, css); -#else - return NULL; -#endif + struct cgroup_netprio_state *state; + u32 idx; + + rcu_read_lock(); + state = container_of(task_subsys_state(p, net_prio_subsys_id), + struct cgroup_netprio_state, css); + idx = state->prioidx; + rcu_read_unlock(); + return idx; +} + +#elif IS_MODULE(CONFIG_NETPRIO_CGROUP) + +static inline u32 task_netprioidx(struct task_struct *p) +{ + struct cgroup_netprio_state *state; + int subsys_id; + u32 idx = 0; + + rcu_read_lock(); + subsys_id = rcu_dereference_index_check(net_prio_subsys_id, + rcu_read_lock_held()); + if (subsys_id >= 0) { + state = container_of(task_subsys_state(p, subsys_id), + struct cgroup_netprio_state, css); + idx = state->prioidx; + } + rcu_read_unlock(); + return idx; } #else +static inline u32 task_netprioidx(struct task_struct *p) +{ + return 0; +} + +#endif /* CONFIG_NETPRIO_CGROUP */ + +#else #define sock_update_netprioidx(sk) #endif diff --git a/net/core/sock.c b/net/core/sock.c index 3e81fd2e3c7..02f8dfe320b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1171,13 +1171,10 @@ EXPORT_SYMBOL(sock_update_classid); void sock_update_netprioidx(struct sock *sk) { - struct cgroup_netprio_state *state; if (in_interrupt()) return; - rcu_read_lock(); - state = task_netprio_state(current); - sk->sk_cgrp_prioidx = state ? state->prioidx : 0; - rcu_read_unlock(); + + sk->sk_cgrp_prioidx = task_netprioidx(current); } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif -- cgit v1.2.3-70-g09d2 From d9f5343e35d9138432657202afa8e3ddb2ade360 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Thu, 5 Jan 2012 16:28:54 -0800 Subject: USB: Remove duplicate USB 3.0 hub feature #defines. Somehow we ended up with duplicate hub feature #defines in ch11.h. Tatyana Brokhman first created the USB 3.0 hub feature macros in 2.6.38 with commit 0eadcc09203349b11ca477ec367079b23d32ab91 "usb: USB3.0 ch11 definitions". In 2.6.39, I modified a patch from John Youn that added similar macros in a different place in the same file, and committed dbe79bbe9dcb22cb3651c46f18943477141ca452 "USB 3.0 Hub Changes". Some of the #defines used different names for the same values. Others used exactly the same names with the same values, like these gems: #define USB_PORT_FEAT_BH_PORT_RESET 28 ... #define USB_PORT_FEAT_BH_PORT_RESET 28 According to my very geeky husband (who looked it up in the C99 spec), it is allowed to have object-like macros with duplicate names as long as the replacement list is exactly the same. However, he recalled that some compilers will give warnings when they find duplicate macros. It's probably best to remove the duplicates in the stable tree, so that the code compiles for everyone. The macros are now fixed to move the feature requests that are specific to USB 3.0 hubs into a new section (out of the USB 2.0 hub feature section), and use the most common macro name. This patch should be backported to 2.6.39. Signed-off-by: Sarah Sharp Cc: Tatyana Brokhman Cc: John Youn Cc: Jamey Sharp Cc: stable@vger.kernel.org --- include/linux/usb/ch11.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h index 31fdb4c6ee3..0b83acd3360 100644 --- a/include/linux/usb/ch11.h +++ b/include/linux/usb/ch11.h @@ -61,12 +61,6 @@ #define USB_PORT_FEAT_TEST 21 #define USB_PORT_FEAT_INDICATOR 22 #define USB_PORT_FEAT_C_PORT_L1 23 -#define USB_PORT_FEAT_C_PORT_LINK_STATE 25 -#define USB_PORT_FEAT_C_PORT_CONFIG_ERROR 26 -#define USB_PORT_FEAT_PORT_REMOTE_WAKE_MASK 27 -#define USB_PORT_FEAT_BH_PORT_RESET 28 -#define USB_PORT_FEAT_C_BH_PORT_RESET 29 -#define USB_PORT_FEAT_FORCE_LINKPM_ACCEPT 30 /* * Port feature selectors added by USB 3.0 spec. @@ -75,8 +69,8 @@ #define USB_PORT_FEAT_LINK_STATE 5 #define USB_PORT_FEAT_U1_TIMEOUT 23 #define USB_PORT_FEAT_U2_TIMEOUT 24 -#define USB_PORT_FEAT_C_LINK_STATE 25 -#define USB_PORT_FEAT_C_CONFIG_ERR 26 +#define USB_PORT_FEAT_C_PORT_LINK_STATE 25 +#define USB_PORT_FEAT_C_PORT_CONFIG_ERROR 26 #define USB_PORT_FEAT_REMOTE_WAKE_MASK 27 #define USB_PORT_FEAT_BH_PORT_RESET 28 #define USB_PORT_FEAT_C_BH_PORT_RESET 29 -- cgit v1.2.3-70-g09d2 From 2c4967f741e87cdd63de7271b97807041dccbf3b Mon Sep 17 00:00:00 2001 From: Sujit Reddy Thumma Date: Sat, 4 Feb 2012 16:14:50 -0500 Subject: mmc: core: Ensure clocks are always enabled before host interaction Ensure clocks are always enabled before any interaction with the host controller driver. This makes sure that there is no race between host execution and the core layer turning off clocks in different context with clock gating framework. Signed-off-by: Sujit Reddy Thumma Acked-by: Linus Walleij Acked-by: Per Forlin Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 19 ++++++++++++++++--- drivers/mmc/core/host.h | 21 --------------------- drivers/mmc/core/sd.c | 22 ++++++++++++++++++---- drivers/mmc/core/sdio_irq.c | 10 ++++++++-- include/linux/mmc/host.h | 19 +++++++++++++++++++ 5 files changed, 61 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index f545a3e6eb8..b3063b741df 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -290,8 +290,11 @@ static void mmc_wait_for_req_done(struct mmc_host *host, static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq, bool is_first_req) { - if (host->ops->pre_req) + if (host->ops->pre_req) { + mmc_host_clk_hold(host); host->ops->pre_req(host, mrq, is_first_req); + mmc_host_clk_release(host); + } } /** @@ -306,8 +309,11 @@ static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq, static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq, int err) { - if (host->ops->post_req) + if (host->ops->post_req) { + mmc_host_clk_hold(host); host->ops->post_req(host, mrq, err); + mmc_host_clk_release(host); + } } /** @@ -620,7 +626,9 @@ int mmc_host_enable(struct mmc_host *host) int err; host->en_dis_recurs = 1; + mmc_host_clk_hold(host); err = host->ops->enable(host); + mmc_host_clk_release(host); host->en_dis_recurs = 0; if (err) { @@ -640,7 +648,9 @@ static int mmc_host_do_disable(struct mmc_host *host, int lazy) int err; host->en_dis_recurs = 1; + mmc_host_clk_hold(host); err = host->ops->disable(host, lazy); + mmc_host_clk_release(host); host->en_dis_recurs = 0; if (err < 0) { @@ -1203,8 +1213,11 @@ int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage, bool cmd11 host->ios.signal_voltage = signal_voltage; - if (host->ops->start_signal_voltage_switch) + if (host->ops->start_signal_voltage_switch) { + mmc_host_clk_hold(host); err = host->ops->start_signal_voltage_switch(host, &host->ios); + mmc_host_clk_release(host); + } return err; } diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h index fb8a5cd2e4a..08a7852ade4 100644 --- a/drivers/mmc/core/host.h +++ b/drivers/mmc/core/host.h @@ -14,27 +14,6 @@ int mmc_register_host_class(void); void mmc_unregister_host_class(void); - -#ifdef CONFIG_MMC_CLKGATE -void mmc_host_clk_hold(struct mmc_host *host); -void mmc_host_clk_release(struct mmc_host *host); -unsigned int mmc_host_clk_rate(struct mmc_host *host); - -#else -static inline void mmc_host_clk_hold(struct mmc_host *host) -{ -} - -static inline void mmc_host_clk_release(struct mmc_host *host) -{ -} - -static inline unsigned int mmc_host_clk_rate(struct mmc_host *host) -{ - return host->ios.clock; -} -#endif - void mmc_host_deeper_disable(struct work_struct *work); #endif diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index c63ad03c29c..5017f9354ce 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -451,9 +451,11 @@ static int sd_select_driver_type(struct mmc_card *card, u8 *status) * information and let the hardware specific code * return what is possible given the options */ + mmc_host_clk_hold(card->host); drive_strength = card->host->ops->select_drive_strength( card->sw_caps.uhs_max_dtr, host_drv_type, card_drv_type); + mmc_host_clk_release(card->host); err = mmc_sd_switch(card, 1, 2, drive_strength, status); if (err) @@ -660,9 +662,12 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) goto out; /* SPI mode doesn't define CMD19 */ - if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning) + if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning) { + mmc_host_clk_hold(card->host); err = card->host->ops->execute_tuning(card->host, MMC_SEND_TUNING_BLOCK); + mmc_host_clk_release(card->host); + } out: kfree(status); @@ -850,8 +855,11 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card, if (!reinit) { int ro = -1; - if (host->ops->get_ro) + if (host->ops->get_ro) { + mmc_host_clk_hold(card->host); ro = host->ops->get_ro(host); + mmc_host_clk_release(card->host); + } if (ro < 0) { pr_warning("%s: host does not " @@ -967,8 +975,11 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, * Since initialization is now complete, enable preset * value registers for UHS-I cards. */ - if (host->ops->enable_preset_value) + if (host->ops->enable_preset_value) { + mmc_host_clk_hold(card->host); host->ops->enable_preset_value(host, true); + mmc_host_clk_release(card->host); + } } else { /* * Attempt to change to high-speed (if supported) @@ -1151,8 +1162,11 @@ int mmc_attach_sd(struct mmc_host *host) return err; /* Disable preset value enable if already set since last time */ - if (host->ops->enable_preset_value) + if (host->ops->enable_preset_value) { + mmc_host_clk_hold(host); host->ops->enable_preset_value(host, false); + mmc_host_clk_release(host); + } err = mmc_send_app_op_cond(host, 0, &ocr); if (err) diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c index 68f81b9ee0f..f573e7f9f74 100644 --- a/drivers/mmc/core/sdio_irq.c +++ b/drivers/mmc/core/sdio_irq.c @@ -146,15 +146,21 @@ static int sdio_irq_thread(void *_host) } set_current_state(TASK_INTERRUPTIBLE); - if (host->caps & MMC_CAP_SDIO_IRQ) + if (host->caps & MMC_CAP_SDIO_IRQ) { + mmc_host_clk_hold(host); host->ops->enable_sdio_irq(host, 1); + mmc_host_clk_release(host); + } if (!kthread_should_stop()) schedule_timeout(period); set_current_state(TASK_RUNNING); } while (!kthread_should_stop()); - if (host->caps & MMC_CAP_SDIO_IRQ) + if (host->caps & MMC_CAP_SDIO_IRQ) { + mmc_host_clk_hold(host); host->ops->enable_sdio_irq(host, 0); + mmc_host_clk_release(host); + } pr_debug("%s: IRQ thread exiting with code %d\n", mmc_hostname(host), ret); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0beba1e5e1e..73e5ee8e9ca 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -444,4 +444,23 @@ static inline int mmc_boot_partition_access(struct mmc_host *host) return !(host->caps2 & MMC_CAP2_BOOTPART_NOACC); } +#ifdef CONFIG_MMC_CLKGATE +void mmc_host_clk_hold(struct mmc_host *host); +void mmc_host_clk_release(struct mmc_host *host); +unsigned int mmc_host_clk_rate(struct mmc_host *host); + +#else +static inline void mmc_host_clk_hold(struct mmc_host *host) +{ +} + +static inline void mmc_host_clk_release(struct mmc_host *host) +{ +} + +static inline unsigned int mmc_host_clk_rate(struct mmc_host *host) +{ + return host->ios.clock; +} +#endif #endif /* LINUX_MMC_HOST_H */ -- cgit v1.2.3-70-g09d2 From 6e8201f57c9359c9c5dc8f9805c15a4392492a10 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Mon, 16 Jan 2012 17:49:01 +0900 Subject: mmc: core: add the capability for broken voltage There is an understood mismatch between the voltage the host controller is set to and the voltage supplied to the card by a fixed voltage regulator. Teaching the driver to accept the mismatch is overly complicated. Instead just accept the regulator's voltage. This patch adds MMC_CAP2_BROKEN_VOLTAGE. If the voltage didn't satisfy between min_uV and max_uV, try to change the voltage in core.c. When changing the voltage, maybe use regulator_set_voltage(). In regulator_set_voltage(), check the below condition. /* sanity check */ if (!rdev->desc->ops->set_voltage && !rdev->desc->ops->set_voltage_sel) { ret = -EINVAL; goto out; } If some board should use the fixed-regulator, always return -EINVAL. Then, eMMC didn't initialize always. So if use a fixed-regulator, we need to add the MMC_CAP2_BROKEN_VOLTAGE. Signed-off-by: Jaehoon Chung Signed-off-by: Kyungmin Park Acked-by: Adrian Hunter Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 4 ++++ include/linux/mmc/host.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index b3063b741df..18661554e79 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1131,6 +1131,10 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc, * might not allow this operation */ voltage = regulator_get_voltage(supply); + + if (mmc->caps2 & MMC_CAP2_BROKEN_VOLTAGE) + min_uV = max_uV = voltage; + if (voltage < 0) result = voltage; else if (voltage < min_uV || voltage > max_uV) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 73e5ee8e9ca..ee2b0363c04 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -257,6 +257,7 @@ struct mmc_host { #define MMC_CAP2_HS200_1_2V_SDR (1 << 6) /* can support */ #define MMC_CAP2_HS200 (MMC_CAP2_HS200_1_8V_SDR | \ MMC_CAP2_HS200_1_2V_SDR) +#define MMC_CAP2_BROKEN_VOLTAGE (1 << 7) /* Use the broken voltage */ mmc_pm_flag_t pm_caps; /* supported pm features */ unsigned int power_notify_type; -- cgit v1.2.3-70-g09d2 From 3e73c36b4dc224529d0b0c0d5d69c0dacd793c42 Mon Sep 17 00:00:00 2001 From: Girish K S Date: Tue, 31 Jan 2012 15:44:03 +0530 Subject: mmc: core: Fix PowerOff Notify suspend/resume Modified the mmc_poweroff to resume before sending the poweroff notification command. In sleep mode only AWAKE and RESET commands are allowed, so before sending the poweroff notification command resume from sleep mode and then send the notification command. PowerOff Notify is tested on a Synopsis Designware Host Controller (eMMC 4.5). The suspend to RAM and resume works fine. Signed-off-by: Girish K S Tested-by: Girish K S Reviewed-by: Saugata Das Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 26 +++++++++++++++++++------- drivers/mmc/core/mmc.c | 17 ++++++++++++----- include/linux/mmc/card.h | 4 ++++ 3 files changed, 35 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 18661554e79..690255c7d4d 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1256,6 +1256,7 @@ static void mmc_poweroff_notify(struct mmc_host *host) int err = 0; card = host->card; + mmc_claim_host(host); /* * Send power notify command only if card @@ -1286,6 +1287,7 @@ static void mmc_poweroff_notify(struct mmc_host *host) /* Set the card state to no notification after the poweroff */ card->poweroff_notify_state = MMC_NO_POWER_NOTIFICATION; } + mmc_release_host(host); } /* @@ -1344,12 +1346,28 @@ static void mmc_power_up(struct mmc_host *host) void mmc_power_off(struct mmc_host *host) { + int err = 0; mmc_host_clk_hold(host); host->ios.clock = 0; host->ios.vdd = 0; - mmc_poweroff_notify(host); + /* + * For eMMC 4.5 device send AWAKE command before + * POWER_OFF_NOTIFY command, because in sleep state + * eMMC 4.5 devices respond to only RESET and AWAKE cmd + */ + if (host->card && mmc_card_is_sleep(host->card) && + host->bus_ops->resume) { + err = host->bus_ops->resume(host); + + if (!err) + mmc_poweroff_notify(host); + else + pr_warning("%s: error %d during resume " + "(continue with poweroff sequence)\n", + mmc_hostname(host), err); + } /* * Reset ocr mask to be the highest possible voltage supported for @@ -2403,12 +2421,6 @@ int mmc_suspend_host(struct mmc_host *host) */ if (mmc_try_claim_host(host)) { if (host->bus_ops->suspend) { - /* - * For eMMC 4.5 device send notify command - * before sleep, because in sleep state eMMC 4.5 - * devices respond to only RESET and AWAKE cmd - */ - mmc_poweroff_notify(host); err = host->bus_ops->suspend(host); } mmc_do_release_host(host); diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index dd3fcac684a..9be031934e3 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1316,11 +1316,13 @@ static int mmc_suspend(struct mmc_host *host) BUG_ON(!host->card); mmc_claim_host(host); - if (mmc_card_can_sleep(host)) + if (mmc_card_can_sleep(host)) { err = mmc_card_sleep(host); - else if (!mmc_host_is_spi(host)) + if (!err) + mmc_card_set_sleep(host->card); + } else if (!mmc_host_is_spi(host)) mmc_deselect_cards(host); - host->card->state &= ~MMC_STATE_HIGHSPEED; + host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200); mmc_release_host(host); return err; @@ -1340,7 +1342,11 @@ static int mmc_resume(struct mmc_host *host) BUG_ON(!host->card); mmc_claim_host(host); - err = mmc_init_card(host, host->ocr, host->card); + if (mmc_card_is_sleep(host->card)) { + err = mmc_card_awake(host); + mmc_card_clr_sleep(host->card); + } else + err = mmc_init_card(host, host->ocr, host->card); mmc_release_host(host); return err; @@ -1350,7 +1356,8 @@ static int mmc_power_restore(struct mmc_host *host) { int ret; - host->card->state &= ~MMC_STATE_HIGHSPEED; + host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200); + mmc_card_clr_sleep(host->card); mmc_claim_host(host); ret = mmc_init_card(host, host->ocr, host->card); mmc_release_host(host); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 9f22ba572de..19a41d1737a 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -217,6 +217,7 @@ struct mmc_card { #define MMC_CARD_SDXC (1<<6) /* card is SDXC */ #define MMC_CARD_REMOVED (1<<7) /* card has been removed */ #define MMC_STATE_HIGHSPEED_200 (1<<8) /* card is in HS200 mode */ +#define MMC_STATE_SLEEP (1<<9) /* card is in sleep state */ unsigned int quirks; /* card quirks */ #define MMC_QUIRK_LENIENT_FN0 (1<<0) /* allow SDIO FN0 writes outside of the VS CCCR range */ #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1) /* use func->cur_blksize */ @@ -382,6 +383,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data) #define mmc_sd_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED) #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC) #define mmc_card_removed(c) ((c) && ((c)->state & MMC_CARD_REMOVED)) +#define mmc_card_is_sleep(c) ((c)->state & MMC_STATE_SLEEP) #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY) @@ -393,7 +395,9 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data) #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED) #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC) #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED) +#define mmc_card_set_sleep(c) ((c)->state |= MMC_STATE_SLEEP) +#define mmc_card_clr_sleep(c) ((c)->state &= ~MMC_STATE_SLEEP) /* * Quirk add/remove for MMC products. */ -- cgit v1.2.3-70-g09d2 From f9c2a0dc42a6938ff2a80e55ca2bbd1d5581c72e Mon Sep 17 00:00:00 2001 From: Seungwon Jeon Date: Thu, 9 Feb 2012 14:32:43 +0900 Subject: mmc: dw_mmc: Fix PIO mode with support of highmem Current PIO mode makes a kernel crash with CONFIG_HIGHMEM. Highmem pages have a NULL from sg_virt(sg). This patch fixes the following problem. Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: Oops: 817 [#1] PREEMPT SMP Modules linked in: CPU: 0 Not tainted (3.0.15-01423-gdbf465f #589) PC is at dw_mci_pull_data32+0x4c/0x9c LR is at dw_mci_read_data_pio+0x54/0x1f0 pc : [] lr : [] psr: 20000193 sp : c0619d48 ip : c0619d70 fp : c0619d6c r10: 00000000 r9 : 00000002 r8 : 00001000 r7 : 00000200 r6 : 00000000 r5 : e1dd3100 r4 : 00000000 r3 : 65622023 r2 : 0000007f r1 : eeb96000 r0 : e1dd3100 Flags: nzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment xkernel Control: 10c5387d Table: 61e2004a DAC: 00000015 Process swapper (pid: 0, stack limit = 0xc06182f0) Stack: (0xc0619d48 to 0xc061a000) 9d40: e1dd3100 e1a4f000 00000000 e1dd3100 e1a4f000 00000200 9d60: c0619da4 c0619d70 c035988c c03587e4 c0619d9c e18158f4 e1dd3100 e1dd3100 9d80: 00000020 00000000 00000000 00000020 c06e8a84 00000000 c0619e04 c0619da8 9da0: c0359b24 c0359844 e18158f4 e1dd3164 e1dd3168 e1dd3150 3d02fc79 e1dd3154 9dc0: e1dd3178 00000000 00000020 00000000 e1dd3150 00000000 c10dd7e8 e1a84900 9de0: c061e7cc 00000000 00000000 0000008d c06e8a84 c061e780 c0619e4c c0619e08 9e00: c00c4738 c0359a34 3d02fc79 00000000 c0619e4c c05a1698 c05a1670 c05a165c 9e20: c04de8b0 c061e780 c061e7cc e1a84900 ffffed68 0000008d c0618000 00000000 9e40: c0619e6c c0619e50 c00c48b4 c00c46c8 c061e780 c00423ac c061e7cc ffffed68 9e60: c0619e8c c0619e70 c00c7358 c00c487c 0000008d ffffee38 c0618000 ffffed68 9e80: c0619ea4 c0619e90 c00c4258 c00c72b0 c00423ac ffffee38 c0619ecc c0619ea8 9ea0: c004241c c00c4234 ffffffff f8810000 0000006d 00000002 00000001 7fffffff 9ec0: c0619f44 c0619ed0 c0048bc0 c00423c4 220ae7a9 00000000 386f0d30 0005d3a4 9ee0: c00423ac c10dd0b8 c06f2cd8 c0618000 c0594778 c003a674 7fffffff c0619f44 9f00: 386f0d30 c0619f18 c00a6f94 c005be3c 80000013 ffffffff 386f0d30 0005d3a4 9f20: 386f0d30 0005d2d1 c10dd0a8 c10dd0b8 c06f2cd8 c0618000 c0619f74 c0619f48 9f40: c0345858 c005be00 c00a2440 c0618000 c0618000 c00410d8 c06c1944 c00410fc 9f60: c0594778 c003a674 c0619f9c c0619f78 c004a7e8 c03457b4 c0618000 c06c18f8 9f80: 00000000 c0039c70 c06c18d4 c003a674 c0619fb4 c0619fa0 c04ceafc c004a714 9fa0: c06287b4 c06c18f8 c0619ff4 c0619fb8 c0008b68 c04cea68 c0008578 00000000 9fc0: 00000000 c003a674 00000000 10c5387d c0628658 c003aa78 c062f1c4 4000406a 9fe0: 413fc090 00000000 00000000 c0619ff8 40008044 c0008858 00000000 00000000 Backtrace: [] (dw_mci_pull_data32+0x0/0x9c) from [] (dw_mci_read_data_pio+0x54/0x1f0) r6:00000200 r5:e1a4f000 r4:e1dd3100 [] (dw_mci_read_data_pio+0x0/0x1f0) from [] (dw_mci_interrupt+0xfc/0x4a4) [] (dw_mci_interrupt+0x0/0x4a4) from [] (handle_irq_event_percpu+0x7c/0x1b4) [] (handle_irq_event_percpu+0x0/0x1b4) from [] (handle_irq_event+0x44/0x64) [] (handle_irq_event+0x0/0x64) from [] (handle_fasteoi_irq+0xb4/0x124) r7:ffffed68 r6:c061e7cc r5:c00423ac r4:c061e780 [] (handle_fasteoi_irq+0x0/0x124) from [] (generic_handle_irq+0x30/0x38) r7:ffffed68 r6:c0618000 r5:ffffee38 r4:0000008d [] (generic_handle_irq+0x0/0x38) from [] (asm_do_IRQ+0x64/0xe0) r5:ffffee38 r4:c00423ac [] (asm_do_IRQ+0x0/0xe0) from [] (__irq_svc+0x80/0x14c) Exception stack(0xc0619ed0 to 0xc0619f18) Signed-off-by: Seungwon Jeon Acked-by: Will Newton Cc: stable Signed-off-by: Chris Ball --- drivers/mmc/host/dw_mmc.c | 144 +++++++++++++++++++++++---------------------- include/linux/mmc/dw_mmc.h | 6 +- 2 files changed, 79 insertions(+), 71 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 0e342793ff1..8bec1c36b15 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -502,8 +501,14 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data) host->dir_status = DW_MCI_SEND_STATUS; if (dw_mci_submit_data_dma(host, data)) { + int flags = SG_MITER_ATOMIC; + if (host->data->flags & MMC_DATA_READ) + flags |= SG_MITER_TO_SG; + else + flags |= SG_MITER_FROM_SG; + + sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags); host->sg = data->sg; - host->pio_offset = 0; host->part_buf_start = 0; host->part_buf_count = 0; @@ -972,6 +977,7 @@ static void dw_mci_tasklet_func(unsigned long priv) * generates a block interrupt, hence setting * the scatter-gather pointer to NULL. */ + sg_miter_stop(&host->sg_miter); host->sg = NULL; ctrl = mci_readl(host, CTRL); ctrl |= SDMMC_CTRL_FIFO_RESET; @@ -1311,54 +1317,44 @@ static void dw_mci_pull_data(struct dw_mci *host, void *buf, int cnt) static void dw_mci_read_data_pio(struct dw_mci *host) { - struct scatterlist *sg = host->sg; - void *buf = sg_virt(sg); - unsigned int offset = host->pio_offset; + struct sg_mapping_iter *sg_miter = &host->sg_miter; + void *buf; + unsigned int offset; struct mmc_data *data = host->data; int shift = host->data_shift; u32 status; unsigned int nbytes = 0, len; + unsigned int remain, fcnt; do { - len = host->part_buf_count + - (SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift); - if (offset + len <= sg->length) { + if (!sg_miter_next(sg_miter)) + goto done; + + host->sg = sg_miter->__sg; + buf = sg_miter->addr; + remain = sg_miter->length; + offset = 0; + + do { + fcnt = (SDMMC_GET_FCNT(mci_readl(host, STATUS)) + << shift) + host->part_buf_count; + len = min(remain, fcnt); + if (!len) + break; dw_mci_pull_data(host, (void *)(buf + offset), len); - offset += len; nbytes += len; - - if (offset == sg->length) { - flush_dcache_page(sg_page(sg)); - host->sg = sg = sg_next(sg); - if (!sg) - goto done; - - offset = 0; - buf = sg_virt(sg); - } - } else { - unsigned int remaining = sg->length - offset; - dw_mci_pull_data(host, (void *)(buf + offset), - remaining); - nbytes += remaining; - - flush_dcache_page(sg_page(sg)); - host->sg = sg = sg_next(sg); - if (!sg) - goto done; - - offset = len - remaining; - buf = sg_virt(sg); - dw_mci_pull_data(host, buf, offset); - nbytes += offset; - } + remain -= len; + } while (remain); + sg_miter->consumed = offset; status = mci_readl(host, MINTSTS); mci_writel(host, RINTSTS, SDMMC_INT_RXDR); if (status & DW_MCI_DATA_ERROR_FLAGS) { host->data_status = status; data->bytes_xfered += nbytes; + sg_miter_stop(sg_miter); + host->sg = NULL; smp_wmb(); set_bit(EVENT_DATA_ERROR, &host->pending_events); @@ -1367,65 +1363,66 @@ static void dw_mci_read_data_pio(struct dw_mci *host) return; } } while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/ - host->pio_offset = offset; data->bytes_xfered += nbytes; + + if (!remain) { + if (!sg_miter_next(sg_miter)) + goto done; + sg_miter->consumed = 0; + } + sg_miter_stop(sg_miter); return; done: data->bytes_xfered += nbytes; + sg_miter_stop(sg_miter); + host->sg = NULL; smp_wmb(); set_bit(EVENT_XFER_COMPLETE, &host->pending_events); } static void dw_mci_write_data_pio(struct dw_mci *host) { - struct scatterlist *sg = host->sg; - void *buf = sg_virt(sg); - unsigned int offset = host->pio_offset; + struct sg_mapping_iter *sg_miter = &host->sg_miter; + void *buf; + unsigned int offset; struct mmc_data *data = host->data; int shift = host->data_shift; u32 status; unsigned int nbytes = 0, len; + unsigned int fifo_depth = host->fifo_depth; + unsigned int remain, fcnt; do { - len = ((host->fifo_depth - - SDMMC_GET_FCNT(mci_readl(host, STATUS))) << shift) - - host->part_buf_count; - if (offset + len <= sg->length) { + if (!sg_miter_next(sg_miter)) + goto done; + + host->sg = sg_miter->__sg; + buf = sg_miter->addr; + remain = sg_miter->length; + offset = 0; + + do { + fcnt = ((fifo_depth - + SDMMC_GET_FCNT(mci_readl(host, STATUS))) + << shift) - host->part_buf_count; + len = min(remain, fcnt); + if (!len) + break; host->push_data(host, (void *)(buf + offset), len); - offset += len; nbytes += len; - if (offset == sg->length) { - host->sg = sg = sg_next(sg); - if (!sg) - goto done; - - offset = 0; - buf = sg_virt(sg); - } - } else { - unsigned int remaining = sg->length - offset; - - host->push_data(host, (void *)(buf + offset), - remaining); - nbytes += remaining; - - host->sg = sg = sg_next(sg); - if (!sg) - goto done; - - offset = len - remaining; - buf = sg_virt(sg); - host->push_data(host, (void *)buf, offset); - nbytes += offset; - } + remain -= len; + } while (remain); + sg_miter->consumed = offset; status = mci_readl(host, MINTSTS); mci_writel(host, RINTSTS, SDMMC_INT_TXDR); if (status & DW_MCI_DATA_ERROR_FLAGS) { host->data_status = status; data->bytes_xfered += nbytes; + sg_miter_stop(sg_miter); + host->sg = NULL; smp_wmb(); @@ -1435,12 +1432,20 @@ static void dw_mci_write_data_pio(struct dw_mci *host) return; } } while (status & SDMMC_INT_TXDR); /* if TXDR write again */ - host->pio_offset = offset; data->bytes_xfered += nbytes; + + if (!remain) { + if (!sg_miter_next(sg_miter)) + goto done; + sg_miter->consumed = 0; + } + sg_miter_stop(sg_miter); return; done: data->bytes_xfered += nbytes; + sg_miter_stop(sg_miter); + host->sg = NULL; smp_wmb(); set_bit(EVENT_XFER_COMPLETE, &host->pending_events); } @@ -1643,6 +1648,7 @@ static void dw_mci_work_routine_card(struct work_struct *work) * block interrupt, hence setting the * scatter-gather pointer to NULL. */ + sg_miter_stop(&host->sg_miter); host->sg = NULL; ctrl = mci_readl(host, CTRL); diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index e8779c6d175..aae5d1f1bb3 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -14,6 +14,8 @@ #ifndef LINUX_MMC_DW_MMC_H #define LINUX_MMC_DW_MMC_H +#include + #define MAX_MCI_SLOTS 2 enum dw_mci_state { @@ -40,7 +42,7 @@ struct mmc_data; * @lock: Spinlock protecting the queue and associated data. * @regs: Pointer to MMIO registers. * @sg: Scatterlist entry currently being processed by PIO code, if any. - * @pio_offset: Offset into the current scatterlist entry. + * @sg_miter: PIO mapping scatterlist iterator. * @cur_slot: The slot which is currently using the controller. * @mrq: The request currently being processed on @cur_slot, * or NULL if the controller is idle. @@ -115,7 +117,7 @@ struct dw_mci { void __iomem *regs; struct scatterlist *sg; - unsigned int pio_offset; + struct sg_mapping_iter sg_miter; struct dw_mci_slot *cur_slot; struct mmc_request *mrq; -- cgit v1.2.3-70-g09d2 From 6b6dc836a195e077e76977b6c020a73de411b46d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 10 Feb 2012 11:03:00 +0100 Subject: vfs: Provide function to get superblock and wait for it to thaw In quota code we need to find a superblock corresponding to a device and wait for superblock to be unfrozen. However this waiting has to happen without s_umount semaphore because that is required for superblock to thaw. So provide a function in VFS for this to keep dances with s_umount where they belong. [AV: implementation switched to saner variant] Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/super.c | 22 ++++++++++++++++++++++ include/linux/fs.h | 1 + 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/fs/super.c b/fs/super.c index 6015c02296b..6277ec6cb60 100644 --- a/fs/super.c +++ b/fs/super.c @@ -633,6 +633,28 @@ rescan: EXPORT_SYMBOL(get_super); +/** + * get_super_thawed - get thawed superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device. The superblock is returned once it is thawed + * (or immediately if it was not frozen). %NULL is returned if no match + * is found. + */ +struct super_block *get_super_thawed(struct block_device *bdev) +{ + while (1) { + struct super_block *s = get_super(bdev); + if (!s || s->s_frozen == SB_UNFROZEN) + return s; + up_read(&s->s_umount); + vfs_check_frozen(s, SB_FREEZE_WRITE); + put_super(s); + } +} +EXPORT_SYMBOL(get_super_thawed); + /** * get_active_super - get an active reference to the superblock of a device * @bdev: device to get the superblock for diff --git a/include/linux/fs.h b/include/linux/fs.h index 386da09f229..69cd5bb640f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2496,6 +2496,7 @@ extern void get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); +extern struct super_block *get_super_thawed(struct block_device *); extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); extern void iterate_supers(void (*)(struct super_block *, void *), void *); -- cgit v1.2.3-70-g09d2 From 607c50d429371797f198ffc34afb239eadd1c655 Mon Sep 17 00:00:00 2001 From: Eun-Chul Kim Date: Tue, 14 Feb 2012 15:59:46 +0900 Subject: drm/exynos: added panel physical size. Signed-off-by: Eun-Chul Kim Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_connector.c | 16 +++++++++++----- drivers/gpu/drm/exynos/exynos_drm_drv.h | 4 ++-- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 27 ++++++++++++++------------- include/drm/exynos_drm.h | 17 +++++++++++++++-- 4 files changed, 42 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.c b/drivers/gpu/drm/exynos/exynos_drm_connector.c index d620b078425..618bd4d87d2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_connector.c +++ b/drivers/gpu/drm/exynos/exynos_drm_connector.c @@ -28,6 +28,7 @@ #include "drmP.h" #include "drm_crtc_helper.h" +#include #include "exynos_drm_drv.h" #include "exynos_drm_encoder.h" @@ -44,8 +45,9 @@ struct exynos_drm_connector { /* convert exynos_video_timings to drm_display_mode */ static inline void convert_to_display_mode(struct drm_display_mode *mode, - struct fb_videomode *timing) + struct exynos_drm_panel_info *panel) { + struct fb_videomode *timing = &panel->timing; DRM_DEBUG_KMS("%s\n", __FILE__); mode->clock = timing->pixclock / 1000; @@ -60,6 +62,8 @@ convert_to_display_mode(struct drm_display_mode *mode, mode->vsync_start = mode->vdisplay + timing->upper_margin; mode->vsync_end = mode->vsync_start + timing->vsync_len; mode->vtotal = mode->vsync_end + timing->lower_margin; + mode->width_mm = panel->width_mm; + mode->height_mm = panel->height_mm; if (timing->vmode & FB_VMODE_INTERLACED) mode->flags |= DRM_MODE_FLAG_INTERLACE; @@ -148,16 +152,18 @@ static int exynos_drm_connector_get_modes(struct drm_connector *connector) connector->display_info.raw_edid = edid; } else { struct drm_display_mode *mode = drm_mode_create(connector->dev); - struct fb_videomode *timing; + struct exynos_drm_panel_info *panel; - if (display_ops->get_timing) - timing = display_ops->get_timing(manager->dev); + if (display_ops->get_panel) + panel = display_ops->get_panel(manager->dev); else { drm_mode_destroy(connector->dev, mode); return 0; } - convert_to_display_mode(mode, timing); + convert_to_display_mode(mode, panel); + connector->display_info.width_mm = mode->width_mm; + connector->display_info.height_mm = mode->height_mm; mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; drm_mode_set_name(mode); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index e685e1e3305..13540de90bf 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -136,7 +136,7 @@ struct exynos_drm_overlay { * @type: one of EXYNOS_DISPLAY_TYPE_LCD and HDMI. * @is_connected: check for that display is connected or not. * @get_edid: get edid modes from display driver. - * @get_timing: get timing object from display driver. + * @get_panel: get panel object from display driver. * @check_timing: check if timing is valid or not. * @power_on: display device on or off. */ @@ -145,7 +145,7 @@ struct exynos_drm_display_ops { bool (*is_connected)(struct device *dev); int (*get_edid)(struct device *dev, struct drm_connector *connector, u8 *edid, int len); - void *(*get_timing)(struct device *dev); + void *(*get_panel)(struct device *dev); int (*check_timing)(struct device *dev, void *timing); int (*power_on)(struct device *dev, int mode); }; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 0dbb32bb18a..360adf2bba0 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -89,7 +89,7 @@ struct fimd_context { bool suspended; struct mutex lock; - struct fb_videomode *timing; + struct exynos_drm_panel_info *panel; }; static bool fimd_display_is_connected(struct device *dev) @@ -101,13 +101,13 @@ static bool fimd_display_is_connected(struct device *dev) return true; } -static void *fimd_get_timing(struct device *dev) +static void *fimd_get_panel(struct device *dev) { struct fimd_context *ctx = get_fimd_context(dev); DRM_DEBUG_KMS("%s\n", __FILE__); - return ctx->timing; + return ctx->panel; } static int fimd_check_timing(struct device *dev, void *timing) @@ -131,7 +131,7 @@ static int fimd_display_power_on(struct device *dev, int mode) static struct exynos_drm_display_ops fimd_display_ops = { .type = EXYNOS_DISPLAY_TYPE_LCD, .is_connected = fimd_display_is_connected, - .get_timing = fimd_get_timing, + .get_panel = fimd_get_panel, .check_timing = fimd_check_timing, .power_on = fimd_display_power_on, }; @@ -193,7 +193,8 @@ static void fimd_apply(struct device *subdrv_dev) static void fimd_commit(struct device *dev) { struct fimd_context *ctx = get_fimd_context(dev); - struct fb_videomode *timing = ctx->timing; + struct exynos_drm_panel_info *panel = ctx->panel; + struct fb_videomode *timing = &panel->timing; u32 val; if (ctx->suspended) @@ -786,7 +787,7 @@ static int __devinit fimd_probe(struct platform_device *pdev) struct fimd_context *ctx; struct exynos_drm_subdrv *subdrv; struct exynos_drm_fimd_pdata *pdata; - struct fb_videomode *timing; + struct exynos_drm_panel_info *panel; struct resource *res; int win; int ret = -EINVAL; @@ -799,9 +800,9 @@ static int __devinit fimd_probe(struct platform_device *pdev) return -EINVAL; } - timing = &pdata->timing; - if (!timing) { - dev_err(dev, "timing is null.\n"); + panel = &pdata->panel; + if (!panel) { + dev_err(dev, "panel is null.\n"); return -EINVAL; } @@ -863,16 +864,16 @@ static int __devinit fimd_probe(struct platform_device *pdev) goto err_req_irq; } - ctx->clkdiv = fimd_calc_clkdiv(ctx, timing); + ctx->clkdiv = fimd_calc_clkdiv(ctx, &panel->timing); ctx->vidcon0 = pdata->vidcon0; ctx->vidcon1 = pdata->vidcon1; ctx->default_win = pdata->default_win; - ctx->timing = timing; + ctx->panel = panel; - timing->pixclock = clk_get_rate(ctx->lcd_clk) / ctx->clkdiv; + panel->timing.pixclock = clk_get_rate(ctx->lcd_clk) / ctx->clkdiv; DRM_DEBUG_KMS("pixel clock = %d, clkdiv = %d\n", - timing->pixclock, ctx->clkdiv); + panel->timing.pixclock, ctx->clkdiv); subdrv = &ctx->subdrv; diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h index 5e120f1c5cd..308575ea7c0 100644 --- a/include/drm/exynos_drm.h +++ b/include/drm/exynos_drm.h @@ -98,14 +98,27 @@ struct drm_exynos_plane_set_zpos { DRM_EXYNOS_PLANE_SET_ZPOS, struct drm_exynos_plane_set_zpos) /** - * Platform Specific Structure for DRM based FIMD. + * A structure for lcd panel information. * * @timing: default video mode for initializing + * @width_mm: physical size of lcd width. + * @height_mm: physical size of lcd height. + */ +struct exynos_drm_panel_info { + struct fb_videomode timing; + u32 width_mm; + u32 height_mm; +}; + +/** + * Platform Specific Structure for DRM based FIMD. + * + * @panel: default panel info for initializing * @default_win: default window layer number to be used for UI. * @bpp: default bit per pixel. */ struct exynos_drm_fimd_pdata { - struct fb_videomode timing; + struct exynos_drm_panel_info panel; u32 vidcon0; u32 vidcon1; unsigned int default_win; -- cgit v1.2.3-70-g09d2 From 265da78afd52b9a01d76d99556e828a6c30f1ac9 Mon Sep 17 00:00:00 2001 From: Kamil Debski Date: Wed, 15 Feb 2012 10:23:33 +0900 Subject: drm/exynos: exynos_drm.h header file fixes First of all #ifdef __KERNEL__ was added to exynos_drm.h to mark the part that should be left out of userspace. Secondly exynos_drm.h was added to include/drm/Kbuild, so it will be included when doing make headers_install. Signed-off-by: Kamil Debski Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- include/drm/Kbuild | 1 + include/drm/exynos_drm.h | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/Kbuild b/include/drm/Kbuild index a5c0e10fd47..1e38a19d68f 100644 --- a/include/drm/Kbuild +++ b/include/drm/Kbuild @@ -2,6 +2,7 @@ header-y += drm.h header-y += drm_fourcc.h header-y += drm_mode.h header-y += drm_sarea.h +header-y += exynos_drm.h header-y += i810_drm.h header-y += i915_drm.h header-y += mga_drm.h diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h index 308575ea7c0..1ed3aae893a 100644 --- a/include/drm/exynos_drm.h +++ b/include/drm/exynos_drm.h @@ -97,6 +97,8 @@ struct drm_exynos_plane_set_zpos { #define DRM_IOCTL_EXYNOS_PLANE_SET_ZPOS DRM_IOWR(DRM_COMMAND_BASE + \ DRM_EXYNOS_PLANE_SET_ZPOS, struct drm_exynos_plane_set_zpos) +#ifdef __KERNEL__ + /** * A structure for lcd panel information. * @@ -152,4 +154,5 @@ struct exynos_drm_hdmi_pdata { unsigned int bpp; }; -#endif +#endif /* __KERNEL__ */ +#endif /* _EXYNOS_DRM_H_ */ -- cgit v1.2.3-70-g09d2 From 4aa832c27edf902130f8bace1d42cf22468823fa Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 8 Jan 2012 22:51:16 +0200 Subject: Bluetooth: Remove bogus inline declaration from l2cap_chan_connect As reported by Dan Carpenter this function causes a Sparse warning and shouldn't be declared inline: include/net/bluetooth/l2cap.h:837:30 error: marked inline, but without a definition" Reported-by: Dan Carpenter Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 2 +- net/bluetooth/l2cap_core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 68f58915069..124f7cf45bd 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -834,7 +834,7 @@ int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid); struct l2cap_chan *l2cap_chan_create(struct sock *sk); void l2cap_chan_close(struct l2cap_chan *chan, int reason); void l2cap_chan_destroy(struct l2cap_chan *chan); -inline int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, +int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst); int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, u32 priority); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index faf0b11ac1d..980abdb3067 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1120,7 +1120,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr return c1; } -inline int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst) +int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst) { struct sock *sk = chan->sk; bdaddr_t *src = &bt_sk(sk)->src; -- cgit v1.2.3-70-g09d2 From 6e1da683f79a22fafaada62d547138daaa9e3456 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 4 Jan 2012 12:10:42 +0100 Subject: Bluetooth: l2cap_set_timer needs jiffies as timeout value After moving L2CAP timers to workqueues l2cap_set_timer expects timeout value to be specified in jiffies but constants defined in miliseconds are used. This makes timeouts unreliable when CONFIG_HZ is not set to 1000. __set_chan_timer macro still uses jiffies as input to avoid multiple conversions from/to jiffies for sk_sndtimeo value which is already specified in jiffies. Signed-off-by: Andrzej Kaczmarek Ackec-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/l2cap.h | 6 +++--- net/bluetooth/l2cap_core.c | 12 ++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 124f7cf45bd..26486e182f5 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -626,13 +626,13 @@ static inline void l2cap_clear_timer(struct l2cap_chan *chan, #define __set_chan_timer(c, t) l2cap_set_timer(c, &c->chan_timer, (t)) #define __clear_chan_timer(c) l2cap_clear_timer(c, &c->chan_timer) #define __set_retrans_timer(c) l2cap_set_timer(c, &c->retrans_timer, \ - L2CAP_DEFAULT_RETRANS_TO); + msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO)); #define __clear_retrans_timer(c) l2cap_clear_timer(c, &c->retrans_timer) #define __set_monitor_timer(c) l2cap_set_timer(c, &c->monitor_timer, \ - L2CAP_DEFAULT_MONITOR_TO); + msecs_to_jiffies(L2CAP_DEFAULT_MONITOR_TO)); #define __clear_monitor_timer(c) l2cap_clear_timer(c, &c->monitor_timer) #define __set_ack_timer(c) l2cap_set_timer(c, &chan->ack_timer, \ - L2CAP_DEFAULT_ACK_TO); + msecs_to_jiffies(L2CAP_DEFAULT_ACK_TO)); #define __clear_ack_timer(c) l2cap_clear_timer(c, &c->ack_timer) static inline int __seq_offset(struct l2cap_chan *chan, __u16 seq1, __u16 seq2) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 980abdb3067..dcccae04ae0 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2970,7 +2970,8 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr default: sk->sk_err = ECONNRESET; - __set_chan_timer(chan, L2CAP_DISC_REJ_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_DISC_REJ_TIMEOUT)); l2cap_send_disconn_req(conn, chan, ECONNRESET); goto done; } @@ -4478,7 +4479,8 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt) if (encrypt == 0x00) { if (chan->sec_level == BT_SECURITY_MEDIUM) { __clear_chan_timer(chan); - __set_chan_timer(chan, L2CAP_ENC_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_ENC_TIMEOUT)); } else if (chan->sec_level == BT_SECURITY_HIGH) l2cap_chan_close(chan, ECONNREFUSED); } else { @@ -4546,7 +4548,8 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) L2CAP_CONN_REQ, sizeof(req), &req); } else { __clear_chan_timer(chan); - __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_DISC_TIMEOUT)); } } else if (chan->state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; @@ -4566,7 +4569,8 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } } else { l2cap_state_change(chan, BT_DISCONN); - __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_DISC_TIMEOUT)); res = L2CAP_CR_SEC_BLOCK; stat = L2CAP_CS_NO_INFO; } -- cgit v1.2.3-70-g09d2 From 331660637b4e5136602a98200a84f6b65ed8d5be Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 4 Jan 2012 11:57:17 -0300 Subject: Bluetooth: Fix using an absolute timeout on hci_conn_put() queue_delayed_work() expects a relative time for when that work should be scheduled. Signed-off-by: Vinicius Costa Gomes Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ea9231f4935..92b8fea553d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -561,7 +561,7 @@ static inline void hci_conn_put(struct hci_conn *conn) } cancel_delayed_work_sync(&conn->disc_work); queue_delayed_work(conn->hdev->workqueue, - &conn->disc_work, jiffies + timeo); + &conn->disc_work, timeo); } } -- cgit v1.2.3-70-g09d2 From b5a30dda6598af216c070165ece6068f9f00f33a Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sun, 22 Jan 2012 00:28:34 +0200 Subject: Bluetooth: silence lockdep warning Since bluetooth uses multiple protocols types, to avoid lockdep warnings, we need to use different lockdep classes (one for each protocol type). This is already done in bt_sock_create but it misses a couple of cases when new connections are created. This patch corrects that to fix the following warning: <4>[ 1864.732366] ======================================================= <4>[ 1864.733030] [ INFO: possible circular locking dependency detected ] <4>[ 1864.733544] 3.0.16-mid3-00007-gc9a0f62 #3 <4>[ 1864.733883] ------------------------------------------------------- <4>[ 1864.734408] t.android.btclc/4204 is trying to acquire lock: <4>[ 1864.734869] (rfcomm_mutex){+.+.+.}, at: [] rfcomm_dlc_close+0x15/0x30 <4>[ 1864.735541] <4>[ 1864.735549] but task is already holding lock: <4>[ 1864.736045] (sk_lock-AF_BLUETOOTH){+.+.+.}, at: [] lock_sock+0xa/0xc <4>[ 1864.736732] <4>[ 1864.736740] which lock already depends on the new lock. <4>[ 1864.736750] <4>[ 1864.737428] <4>[ 1864.737437] the existing dependency chain (in reverse order) is: <4>[ 1864.738016] <4>[ 1864.738023] -> #1 (sk_lock-AF_BLUETOOTH){+.+.+.}: <4>[ 1864.738549] [] lock_acquire+0x104/0x140 <4>[ 1864.738977] [] lock_sock_nested+0x58/0x68 <4>[ 1864.739411] [] l2cap_sock_sendmsg+0x3e/0x76 <4>[ 1864.739858] [] __sock_sendmsg+0x50/0x59 <4>[ 1864.740279] [] sock_sendmsg+0x94/0xa8 <4>[ 1864.740687] [] kernel_sendmsg+0x28/0x37 <4>[ 1864.741106] [] rfcomm_send_frame+0x30/0x38 <4>[ 1864.741542] [] rfcomm_send_ua+0x58/0x5a <4>[ 1864.741959] [] rfcomm_run+0x441/0xb52 <4>[ 1864.742365] [] kthread+0x63/0x68 <4>[ 1864.742742] [] kernel_thread_helper+0x6/0xd <4>[ 1864.743187] <4>[ 1864.743193] -> #0 (rfcomm_mutex){+.+.+.}: <4>[ 1864.743667] [] __lock_acquire+0x988/0xc00 <4>[ 1864.744100] [] lock_acquire+0x104/0x140 <4>[ 1864.744519] [] __mutex_lock_common+0x3b/0x33f <4>[ 1864.744975] [] mutex_lock_nested+0x2d/0x36 <4>[ 1864.745412] [] rfcomm_dlc_close+0x15/0x30 <4>[ 1864.745842] [] __rfcomm_sock_close+0x5f/0x6b <4>[ 1864.746288] [] rfcomm_sock_shutdown+0x2f/0x62 <4>[ 1864.746737] [] sys_socketcall+0x1db/0x422 <4>[ 1864.747165] [] syscall_call+0x7/0xb Signed-off-by: Octavian Purdila Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 2 ++ net/bluetooth/af_bluetooth.c | 12 +++++------- net/bluetooth/l2cap_sock.c | 2 ++ net/bluetooth/rfcomm/sock.c | 2 ++ 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index abaad6ed9b8..4a82ca0bb0b 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -256,4 +256,6 @@ void l2cap_exit(void); int sco_init(void); void sco_exit(void); +void bt_sock_reclassify_lock(struct sock *sk, int proto); + #endif /* __BLUETOOTH_H */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ef92864ac62..72eb187a5f6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -71,19 +71,16 @@ static const char *const bt_slock_key_strings[BT_MAX_PROTO] = { "slock-AF_BLUETOOTH-BTPROTO_AVDTP", }; -static inline void bt_sock_reclassify_lock(struct socket *sock, int proto) +void bt_sock_reclassify_lock(struct sock *sk, int proto) { - struct sock *sk = sock->sk; - - if (!sk) - return; - + BUG_ON(!sk); BUG_ON(sock_owned_by_user(sk)); sock_lock_init_class_and_name(sk, bt_slock_key_strings[proto], &bt_slock_key[proto], bt_key_strings[proto], &bt_lock_key[proto]); } +EXPORT_SYMBOL(bt_sock_reclassify_lock); int bt_sock_register(int proto, const struct net_proto_family *ops) { @@ -145,7 +142,8 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto, if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { err = bt_proto[proto]->create(net, sock, proto, kern); - bt_sock_reclassify_lock(sock, proto); + if (!err) + bt_sock_reclassify_lock(sock->sk, proto); module_put(bt_proto[proto]->owner); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index c57027f7606..401d9428ae4 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -849,6 +849,8 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(void *data) if (!sk) return NULL; + bt_sock_reclassify_lock(sk, BTPROTO_L2CAP); + l2cap_sock_init(sk, parent); return l2cap_pi(sk)->chan; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index f066678faee..22169c3f148 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -956,6 +956,8 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * if (!sk) goto done; + bt_sock_reclassify_lock(sk, BTPROTO_RFCOMM); + rfcomm_sock_init(sk, parent); bacpy(&bt_sk(sk)->src, &src); bacpy(&bt_sk(sk)->dst, &dst); -- cgit v1.2.3-70-g09d2 From a51cd2be864a3cc0272359b1995e213341dfc7e7 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 27 Jan 2012 19:42:02 -0300 Subject: Bluetooth: Fix potential deadlock We don't need to use the _sync variant in hci_conn_hold and hci_conn_put to cancel conn->disc_work delayed work. This way we avoid potential deadlocks like this one reported by lockdep. ====================================================== [ INFO: possible circular locking dependency detected ] 3.2.0+ #1 Not tainted ------------------------------------------------------- kworker/u:1/17 is trying to acquire lock: (&hdev->lock){+.+.+.}, at: [] hci_conn_timeout+0x62/0x158 [bluetooth] but task is already holding lock: ((&(&conn->disc_work)->work)){+.+...}, at: [] process_one_work+0x11a/0x2bf which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 ((&(&conn->disc_work)->work)){+.+...}: [] lock_acquire+0x8a/0xa7 [] wait_on_work+0x3d/0xaa [] __cancel_work_timer+0xac/0xef [] cancel_delayed_work_sync+0xd/0xf [] smp_chan_create+0xde/0xe6 [bluetooth] [] smp_conn_security+0xa3/0x12d [bluetooth] [] l2cap_connect_cfm+0x237/0x2e8 [bluetooth] [] hci_proto_connect_cfm+0x2d/0x6f [bluetooth] [] hci_event_packet+0x29d1/0x2d60 [bluetooth] [] hci_rx_work+0xd0/0x2e1 [bluetooth] [] process_one_work+0x178/0x2bf [] worker_thread+0xce/0x152 [] kthread+0x95/0x9d [] kernel_thread_helper+0x4/0x10 -> #1 (slock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+...}: [] lock_acquire+0x8a/0xa7 [] _raw_spin_lock_bh+0x36/0x6a [] lock_sock_nested+0x24/0x7f [] lock_sock+0xb/0xd [bluetooth] [] l2cap_chan_connect+0xa9/0x26f [bluetooth] [] l2cap_sock_connect+0xb3/0xff [bluetooth] [] sys_connect+0x69/0x8a [] system_call_fastpath+0x16/0x1b -> #0 (&hdev->lock){+.+.+.}: [] __lock_acquire+0xa80/0xd74 [] lock_acquire+0x8a/0xa7 [] __mutex_lock_common+0x48/0x38e [] mutex_lock_nested+0x2a/0x31 [] hci_conn_timeout+0x62/0x158 [bluetooth] [] process_one_work+0x178/0x2bf [] worker_thread+0xce/0x152 [] kthread+0x95/0x9d [] kernel_thread_helper+0x4/0x10 other info that might help us debug this: Chain exists of: &hdev->lock --> slock-AF_BLUETOOTH-BTPROTO_L2CAP --> (&(&conn->disc_work)->work) Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((&(&conn->disc_work)->work)); lock(slock-AF_BLUETOOTH-BTPROTO_L2CAP); lock((&(&conn->disc_work)->work)); lock(&hdev->lock); *** DEADLOCK *** 2 locks held by kworker/u:1/17: #0: (hdev->name){.+.+.+}, at: [] process_one_work+0x11a/0x2bf #1: ((&(&conn->disc_work)->work)){+.+...}, at: [] process_one_work+0x11a/0x2bf stack backtrace: Pid: 17, comm: kworker/u:1 Not tainted 3.2.0+ #1 Call Trace: [] print_circular_bug+0x1f8/0x209 [] __lock_acquire+0xa80/0xd74 [] ? arch_local_irq_restore+0x6/0xd [] ? vprintk+0x3f9/0x41e [] lock_acquire+0x8a/0xa7 [] ? hci_conn_timeout+0x62/0x158 [bluetooth] [] __mutex_lock_common+0x48/0x38e [] ? hci_conn_timeout+0x62/0x158 [bluetooth] [] ? __dynamic_pr_debug+0x6d/0x6f [] ? hci_conn_timeout+0x62/0x158 [bluetooth] [] ? trace_hardirqs_off+0xd/0xf [] mutex_lock_nested+0x2a/0x31 [] hci_conn_timeout+0x62/0x158 [bluetooth] [] process_one_work+0x178/0x2bf [] ? process_one_work+0x11a/0x2bf [] ? lock_acquired+0x1d0/0x1df [] ? hci_acl_disconn+0x65/0x65 [bluetooth] [] worker_thread+0xce/0x152 [] ? finish_task_switch+0x45/0xc5 [] ? manage_workers.isra.25+0x16a/0x16a [] kthread+0x95/0x9d [] kernel_thread_helper+0x4/0x10 [] ? retint_restore_args+0x13/0x13 [] ? __init_kthread_worker+0x55/0x55 [] ? gs_change+0x13/0x13 Signed-off-by: Andre Guedes Signed-off-by: Vinicius Costa Gomes Reviewed-by: Ulisses Furquim Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 92b8fea553d..453893b3120 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -540,7 +540,7 @@ void hci_conn_put_device(struct hci_conn *conn); static inline void hci_conn_hold(struct hci_conn *conn) { atomic_inc(&conn->refcnt); - cancel_delayed_work_sync(&conn->disc_work); + cancel_delayed_work(&conn->disc_work); } static inline void hci_conn_put(struct hci_conn *conn) @@ -559,7 +559,7 @@ static inline void hci_conn_put(struct hci_conn *conn) } else { timeo = msecs_to_jiffies(10); } - cancel_delayed_work_sync(&conn->disc_work); + cancel_delayed_work(&conn->disc_work); queue_delayed_work(conn->hdev->workqueue, &conn->disc_work, timeo); } -- cgit v1.2.3-70-g09d2 From 6de32750822d00bfa92c341166132b0714c5b559 Mon Sep 17 00:00:00 2001 From: Ulisses Furquim Date: Mon, 30 Jan 2012 18:26:28 -0200 Subject: Bluetooth: Remove usage of __cancel_delayed_work() __cancel_delayed_work() is being used in some paths where we cannot sleep waiting for the delayed work to finish. However, that function might return while the timer is running and the work will be queued again. Replace the calls with safer cancel_delayed_work() version which spins until the timer handler finishes on other CPUs and cancels the delayed work. Signed-off-by: Ulisses Furquim Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/l2cap.h | 4 ++-- net/bluetooth/l2cap_core.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 26486e182f5..b1664ed884e 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -611,7 +611,7 @@ static inline void l2cap_set_timer(struct l2cap_chan *chan, { BT_DBG("chan %p state %d timeout %ld", chan, chan->state, timeout); - if (!__cancel_delayed_work(work)) + if (!cancel_delayed_work(work)) l2cap_chan_hold(chan); schedule_delayed_work(work, timeout); } @@ -619,7 +619,7 @@ static inline void l2cap_set_timer(struct l2cap_chan *chan, static inline void l2cap_clear_timer(struct l2cap_chan *chan, struct delayed_work *work) { - if (__cancel_delayed_work(work)) + if (cancel_delayed_work(work)) l2cap_chan_put(chan); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dcccae04ae0..ec10c698b89 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2574,7 +2574,7 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hd if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && cmd->ident == conn->info_ident) { - __cancel_delayed_work(&conn->info_timer); + cancel_delayed_work(&conn->info_timer); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; @@ -3121,7 +3121,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) return 0; - __cancel_delayed_work(&conn->info_timer); + cancel_delayed_work(&conn->info_timer); if (result != L2CAP_IR_SUCCESS) { conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; @@ -4501,7 +4501,7 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) if (hcon->type == LE_LINK) { smp_distribute_keys(conn, 0); - __cancel_delayed_work(&conn->security_timer); + cancel_delayed_work(&conn->security_timer); } rcu_read_lock(); -- cgit v1.2.3-70-g09d2 From f2ea0f5f04c97b48c88edccba52b0682fbe45087 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 14 Jan 2012 21:44:49 +0300 Subject: crypto: sha512 - use standard ror64() Use standard ror64() instead of hand-written. There is no standard ror64, so create it. The difference is shift value being "unsigned int" instead of uint64_t (for which there is no reason). gcc starts to emit native ROR instructions which it doesn't do for some reason currently. This should make the code faster. Patch survives in-tree crypto test and ping flood with hmac(sha512) on. Signed-off-by: Alexey Dobriyan Signed-off-by: Herbert Xu --- crypto/sha512_generic.c | 13 ++++--------- include/linux/bitops.h | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index f04af931a68..107f6f7be5e 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -31,11 +31,6 @@ static inline u64 Maj(u64 x, u64 y, u64 z) return (x & y) | (z & (x | y)); } -static inline u64 RORu64(u64 x, u64 y) -{ - return (x >> y) | (x << (64 - y)); -} - static const u64 sha512_K[80] = { 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, @@ -66,10 +61,10 @@ static const u64 sha512_K[80] = { 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL, }; -#define e0(x) (RORu64(x,28) ^ RORu64(x,34) ^ RORu64(x,39)) -#define e1(x) (RORu64(x,14) ^ RORu64(x,18) ^ RORu64(x,41)) -#define s0(x) (RORu64(x, 1) ^ RORu64(x, 8) ^ (x >> 7)) -#define s1(x) (RORu64(x,19) ^ RORu64(x,61) ^ (x >> 6)) +#define e0(x) (ror64(x,28) ^ ror64(x,34) ^ ror64(x,39)) +#define e1(x) (ror64(x,14) ^ ror64(x,18) ^ ror64(x,41)) +#define s0(x) (ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7)) +#define s1(x) (ror64(x,19) ^ ror64(x,61) ^ (x >> 6)) static inline void LOAD_OP(int I, u64 *W, const u8 *input) { diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a3ef66a2a08..fc8a3ffce32 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -49,6 +49,26 @@ static inline unsigned long hweight_long(unsigned long w) return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } +/** + * rol64 - rotate a 64-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u64 rol64(__u64 word, unsigned int shift) +{ + return (word << shift) | (word >> (64 - shift)); +} + +/** + * ror64 - rotate a 64-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u64 ror64(__u64 word, unsigned int shift) +{ + return (word >> shift) | (word << (64 - shift)); +} + /** * rol32 - rotate a 32-bit value left * @word: value to rotate -- cgit v1.2.3-70-g09d2 From 59cca653a601372e9b4a430d867377a3e4a36d76 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 2 Feb 2012 10:46:49 +0200 Subject: digsig: changed type of the timestamp time_t was used in the signature and key packet headers, which is typedef of long and is different on 32 and 64 bit architectures. Signature and key format should be independent of architecture. Similar to GPG, I have changed the type to uint32_t. Signed-off-by: Dmitry Kasatkin Signed-off-by: James Morris --- include/linux/digsig.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/digsig.h b/include/linux/digsig.h index b01558b1581..6f85a070bb4 100644 --- a/include/linux/digsig.h +++ b/include/linux/digsig.h @@ -30,7 +30,7 @@ enum digest_algo { struct pubkey_hdr { uint8_t version; /* key format version */ - time_t timestamp; /* key made, always 0 for now */ + uint32_t timestamp; /* key made, always 0 for now */ uint8_t algo; uint8_t nmpi; char mpi[0]; @@ -38,7 +38,7 @@ struct pubkey_hdr { struct signature_hdr { uint8_t version; /* signature format version */ - time_t timestamp; /* signature made */ + uint32_t timestamp; /* signature made */ uint8_t algo; uint8_t hash; uint8_t keyid[8]; -- cgit v1.2.3-70-g09d2 From 88ba136d6635b262f77cc418d536115fb8e4d4ab Mon Sep 17 00:00:00 2001 From: Joerg Willmann Date: Tue, 21 Feb 2012 13:26:14 +0100 Subject: netfilter: ebtables: fix alignment problem in ppc ebt_among extension of ebtables uses __alignof__(_xt_align) while the corresponding kernel module uses __alignof__(ebt_replace) to determine the alignment in EBT_ALIGN(). These are the results of these values on different platforms: x86 x86_64 ppc __alignof__(_xt_align) 4 8 8 __alignof__(ebt_replace) 4 8 4 ebtables fails to add rules which use the among extension. I'm using kernel 2.6.33 and ebtables 2.0.10-4 According to Bart De Schuymer, userspace alignment was changed to _xt_align to fix an alignment issue on a userspace32-kernel64 system (he thinks it was for an ARM device). So userspace must be right. The kernel alignment macro needs to change so it also uses _xt_align instead of ebt_replace. The userspace changes date back from June 29, 2009. Signed-off-by: Joerg Willmann Signed-off by: Bart De Schuymer Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 8797ed16feb..4dd5bd6994a 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -285,8 +285,8 @@ struct ebt_table { struct module *me; }; -#define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \ - ~(__alignof__(struct ebt_replace)-1)) +#define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \ + ~(__alignof__(struct _xt_align)-1)) extern struct ebt_table *ebt_register_table(struct net *net, const struct ebt_table *table); extern void ebt_unregister_table(struct net *net, struct ebt_table *table); -- cgit v1.2.3-70-g09d2 From 115c9b81928360d769a76c632bae62d15206a94a Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 21 Feb 2012 16:54:48 -0500 Subject: rtnetlink: Fix problem with buffer allocation Implement a new netlink attribute type IFLA_EXT_MASK. The mask is a 32 bit value that can be used to indicate to the kernel that certain extended ifinfo values are requested by the user application. At this time the only mask value defined is RTEXT_FILTER_VF to indicate that the user wants the ifinfo dump to send information about the VFs belonging to the interface. This patch fixes a bug in which certain applications do not have large enough buffers to accommodate the extra information returned by the kernel with large numbers of SR-IOV virtual functions. Those applications will not send the new netlink attribute with the interface info dump request netlink messages so they will not get unexpectedly large request buffers returned by the kernel. Modifies the rtnl_calcit function to traverse the list of net devices and compute the minimum buffer size that can hold the info dumps of all matching devices based upon the filter passed in via the new netlink attribute filter mask. If no filter mask is sent then the buffer allocation defaults to NLMSG_GOODSIZE. With this change it is possible to add yet to be defined netlink attributes to the dump request which should make it fairly extensible in the future. Signed-off-by: Greg Rose Signed-off-by: David S. Miller --- include/linux/if_link.h | 1 + include/linux/rtnetlink.h | 3 ++ include/net/rtnetlink.h | 2 +- net/core/rtnetlink.c | 78 ++++++++++++++++++++++++++++++++++------------- 4 files changed, 62 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index c52d4b5f872..4b24ff453ae 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -137,6 +137,7 @@ enum { IFLA_AF_SPEC, IFLA_GROUP, /* Group the device belongs to */ IFLA_NET_NS_FD, + IFLA_EXT_MASK, /* Extended info mask, VFs, etc */ __IFLA_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8e872ead88b..577592ea0ea 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -602,6 +602,9 @@ struct tcamsg { #define TCA_ACT_TAB 1 /* attr type must be >=1 */ #define TCAA_MAX 1 +/* New extended info filters for IFLA_EXT_MASK */ +#define RTEXT_FILTER_VF (1 << 0) + /* End of information exported to user level */ #ifdef __KERNEL__ diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 678f1ffaf84..37029390197 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -6,7 +6,7 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); -typedef u16 (*rtnl_calcit_func)(struct sk_buff *); +typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *); extern int __rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 65aebd45002..606a6e8f367 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -60,7 +60,6 @@ struct rtnl_link { }; static DEFINE_MUTEX(rtnl_mutex); -static u16 min_ifinfo_dump_size; void rtnl_lock(void) { @@ -724,10 +723,11 @@ static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) } /* All VF info */ -static inline int rtnl_vfinfo_size(const struct net_device *dev) +static inline int rtnl_vfinfo_size(const struct net_device *dev, + u32 ext_filter_mask) { - if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { - + if (dev->dev.parent && dev_is_pci(dev->dev.parent) && + (ext_filter_mask & RTEXT_FILTER_VF)) { int num_vfs = dev_num_vf(dev->dev.parent); size_t size = nla_total_size(sizeof(struct nlattr)); size += nla_total_size(num_vfs * sizeof(struct nlattr)); @@ -766,7 +766,8 @@ static size_t rtnl_port_size(const struct net_device *dev) return port_self_size; } -static noinline size_t if_nlmsg_size(const struct net_device *dev) +static noinline size_t if_nlmsg_size(const struct net_device *dev, + u32 ext_filter_mask) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -784,8 +785,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ - + nla_total_size(4) /* IFLA_NUM_VF */ - + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + + nla_total_size(ext_filter_mask + & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ @@ -868,7 +870,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, - unsigned int flags) + unsigned int flags, u32 ext_filter_mask) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -941,10 +943,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; copy_rtnl_link_stats64(nla_data(attr), stats); - if (dev->dev.parent) + if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); - if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { + if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent + && (ext_filter_mask & RTEXT_FILTER_VF)) { int i; struct nlattr *vfinfo, *vf; @@ -1048,6 +1051,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct hlist_head *head; struct hlist_node *node; + struct nlattr *tb[IFLA_MAX+1]; + u32 ext_filter_mask = 0; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -1055,6 +1060,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; + nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + ifla_policy); + + if (tb[IFLA_EXT_MASK]) + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; @@ -1064,7 +1075,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI) <= 0) + NLM_F_MULTI, + ext_filter_mask) <= 0) goto out; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -1100,6 +1112,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, [IFLA_AF_SPEC] = { .type = NLA_NESTED }, + [IFLA_EXT_MASK] = { .type = NLA_U32 }, }; EXPORT_SYMBOL(ifla_policy); @@ -1509,8 +1522,6 @@ errout: if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - min_ifinfo_dump_size = max_t(u16, if_nlmsg_size(dev), - min_ifinfo_dump_size); return err; } @@ -1842,6 +1853,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct net_device *dev = NULL; struct sk_buff *nskb; int err; + u32 ext_filter_mask = 0; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) @@ -1850,6 +1862,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + if (tb[IFLA_EXT_MASK]) + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); @@ -1861,12 +1876,12 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (dev == NULL) return -ENODEV; - nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); if (nskb == NULL) return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, - nlh->nlmsg_seq, 0, 0); + nlh->nlmsg_seq, 0, 0, ext_filter_mask); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -1877,8 +1892,31 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return err; } -static u16 rtnl_calcit(struct sk_buff *skb) +static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); + struct net_device *dev; + struct nlattr *tb[IFLA_MAX+1]; + u32 ext_filter_mask = 0; + u16 min_ifinfo_dump_size = 0; + + nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, ifla_policy); + + if (tb[IFLA_EXT_MASK]) + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + + if (!ext_filter_mask) + return NLMSG_GOODSIZE; + /* + * traverse the list of net devices and compute the minimum + * buffer size based upon the filter mask. + */ + list_for_each_entry(dev, &net->dev_base_head, dev_list) { + min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size, + if_nlmsg_size(dev, + ext_filter_mask)); + } + return min_ifinfo_dump_size; } @@ -1913,13 +1951,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) int err = -ENOBUFS; size_t if_info_size; - skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL); + skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL); if (skb == NULL) goto errout; - min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size); - - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -1977,7 +2013,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EOPNOTSUPP; calcit = rtnl_get_calcit(family, type); if (calcit) - min_dump_alloc = calcit(skb); + min_dump_alloc = calcit(skb, nlh); __rtnl_unlock(); rtnl = net->rtnl; -- cgit v1.2.3-70-g09d2 From 797a796a13df6b84a4791e57306737059b5b2384 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Tue, 7 Feb 2012 11:45:33 +0900 Subject: asm-generic: architecture independent readq/writeq for 32bit environment This provides unified readq()/writeq() helper functions for 32-bit drivers. For some cases, readq/writeq without atomicity is harmful, and order of io access has to be specified explicitly. So in this patch, new two header files which contain non-atomic readq/writeq are added. - provides non-atomic readq/ writeq with the order of lower address -> higher address - provides non-atomic readq/ writeq with reversed order This allows us to remove some readq()s that were added drivers when the default non-atomic ones were removed in commit dbee8a0affd5 ("x86: remove 32-bit versions of readq()/writeq()") The drivers which need readq/writeq but can do with the non-atomic ones must add the line: #include /* or hi-lo.h */ But this will be nop in 64-bit environments, and no other #ifdefs are required. So I believe that this patch can solve the problem of 1. driver-specific readq/writeq 2. atomicity and order of io access This patch is tested with building allyesconfig and allmodconfig as ARCH=x86 and ARCH=i386 on top of tip/master. Cc: Kashyap Desai Cc: Len Brown Cc: Ravi Anand Cc: Vikas Chaudhary Cc: Matthew Garrett Cc: Jason Uhlenkott Cc: James Bottomley Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Roland Dreier Cc: James Bottomley Cc: Alan Cox Cc: Matthew Wilcox Cc: Andrew Morton Signed-off-by: Hitoshi Mitake Signed-off-by: Linus Torvalds --- drivers/block/nvme.c | 2 ++ drivers/edac/i3200_edac.c | 15 ++------------- drivers/platform/x86/ibm_rtl.c | 15 ++------------- drivers/platform/x86/intel_ips.c | 15 ++------------- drivers/scsi/qla4xxx/ql4_nx.c | 23 ++--------------------- include/asm-generic/io-64-nonatomic-hi-lo.h | 28 ++++++++++++++++++++++++++++ include/asm-generic/io-64-nonatomic-lo-hi.h | 28 ++++++++++++++++++++++++++++ 7 files changed, 66 insertions(+), 60 deletions(-) create mode 100644 include/asm-generic/io-64-nonatomic-hi-lo.h create mode 100644 include/asm-generic/io-64-nonatomic-lo-hi.h (limited to 'include') diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index c1dc4d86c22..1f3c1a7d132 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -41,6 +41,8 @@ #include #include +#include + #define NVME_Q_DEPTH 1024 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index aa08497a075..73f55e2008c 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -15,6 +15,8 @@ #include #include "edac_core.h" +#include + #define I3200_REVISION "1.1" #define EDAC_MOD_STR "i3200_edac" @@ -101,19 +103,6 @@ struct i3200_priv { static int nr_channels; -#ifndef readq -static inline __u64 readq(const volatile void __iomem *addr) -{ - const volatile u32 __iomem *p = addr; - u32 low, high; - - low = readl(p); - high = readl(p + 1); - - return low + ((u64)high << 32); -} -#endif - static int how_many_channels(struct pci_dev *pdev) { unsigned char capid0_8b; /* 8th byte of CAPID0 */ diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c index 42a7d603c87..7481146a5b4 100644 --- a/drivers/platform/x86/ibm_rtl.c +++ b/drivers/platform/x86/ibm_rtl.c @@ -33,6 +33,8 @@ #include #include +#include + static bool force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Force driver load, ignore DMI data"); @@ -83,19 +85,6 @@ static void __iomem *rtl_cmd_addr; static u8 rtl_cmd_type; static u8 rtl_cmd_width; -#ifndef readq -static inline __u64 readq(const volatile void __iomem *addr) -{ - const volatile u32 __iomem *p = addr; - u32 low, high; - - low = readl(p); - high = readl(p + 1); - - return low + ((u64)high << 32); -} -#endif - static void __iomem *rtl_port_map(phys_addr_t addr, unsigned long len) { if (rtl_cmd_type == RTL_ADDR_TYPE_MMIO) diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index 809a3ae943c..88a98cff5a4 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -77,6 +77,8 @@ #include #include "intel_ips.h" +#include + #define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32 /* @@ -344,19 +346,6 @@ struct ips_driver { static bool ips_gpu_turbo_enabled(struct ips_driver *ips); -#ifndef readq -static inline __u64 readq(const volatile void __iomem *addr) -{ - const volatile u32 __iomem *p = addr; - u32 low, high; - - low = readl(p); - high = readl(p + 1); - - return low + ((u64)high << 32); -} -#endif - /** * ips_cpu_busy - is CPU busy? * @ips: IPS driver struct diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c index 78f1111158d..65253dfbe96 100644 --- a/drivers/scsi/qla4xxx/ql4_nx.c +++ b/drivers/scsi/qla4xxx/ql4_nx.c @@ -10,6 +10,8 @@ #include "ql4_def.h" #include "ql4_glbl.h" +#include + #define MASK(n) DMA_BIT_MASK(n) #define MN_WIN(addr) (((addr & 0x1fc0000) >> 1) | ((addr >> 25) & 0x3ff)) #define OCM_WIN(addr) (((addr & 0x1ff0000) >> 1) | ((addr >> 25) & 0x3ff)) @@ -655,27 +657,6 @@ static int qla4_8xxx_pci_is_same_window(struct scsi_qla_host *ha, return 0; } -#ifndef readq -static inline __u64 readq(const volatile void __iomem *addr) -{ - const volatile u32 __iomem *p = addr; - u32 low, high; - - low = readl(p); - high = readl(p + 1); - - return low + ((u64)high << 32); -} -#endif - -#ifndef writeq -static inline void writeq(__u64 val, volatile void __iomem *addr) -{ - writel(val, addr); - writel(val >> 32, addr+4); -} -#endif - static int qla4_8xxx_pci_mem_read_direct(struct scsi_qla_host *ha, u64 off, void *data, int size) { diff --git a/include/asm-generic/io-64-nonatomic-hi-lo.h b/include/asm-generic/io-64-nonatomic-hi-lo.h new file mode 100644 index 00000000000..a6806a94250 --- /dev/null +++ b/include/asm-generic/io-64-nonatomic-hi-lo.h @@ -0,0 +1,28 @@ +#ifndef _ASM_IO_64_NONATOMIC_HI_LO_H_ +#define _ASM_IO_64_NONATOMIC_HI_LO_H_ + +#include +#include + +#ifndef readq +static inline __u64 readq(const volatile void __iomem *addr) +{ + const volatile u32 __iomem *p = addr; + u32 low, high; + + high = readl(p + 1); + low = readl(p); + + return low + ((u64)high << 32); +} +#endif + +#ifndef writeq +static inline void writeq(__u64 val, volatile void __iomem *addr) +{ + writel(val >> 32, addr + 4); + writel(val, addr); +} +#endif + +#endif /* _ASM_IO_64_NONATOMIC_HI_LO_H_ */ diff --git a/include/asm-generic/io-64-nonatomic-lo-hi.h b/include/asm-generic/io-64-nonatomic-lo-hi.h new file mode 100644 index 00000000000..ca546b1ff8b --- /dev/null +++ b/include/asm-generic/io-64-nonatomic-lo-hi.h @@ -0,0 +1,28 @@ +#ifndef _ASM_IO_64_NONATOMIC_LO_HI_H_ +#define _ASM_IO_64_NONATOMIC_LO_HI_H_ + +#include +#include + +#ifndef readq +static inline __u64 readq(const volatile void __iomem *addr) +{ + const volatile u32 __iomem *p = addr; + u32 low, high; + + low = readl(p); + high = readl(p + 1); + + return low + ((u64)high << 32); +} +#endif + +#ifndef writeq +static inline void writeq(__u64 val, volatile void __iomem *addr) +{ + writel(val, addr); + writel(val >> 32, addr + 4); +} +#endif + +#endif /* _ASM_IO_64_NONATOMIC_LO_HI_H_ */ -- cgit v1.2.3-70-g09d2 From faf309009e2e18d30c032b7d9479f29b91677c37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 21 Feb 2012 17:24:20 -0800 Subject: sys_poll: fix incorrect type for 'timeout' parameter The 'poll()' system call timeout parameter is supposed to be 'int', not 'long'. Now, the reason this matters is that right now 32-bit compat mode is broken on at least x86-64, because the 32-bit code just calls 'sys_poll()' directly on x86-64, and the 32-bit argument will have been zero-extended, turning a signed 'int' into a large unsigned 'long' value. We could just introduce a 'compat_sys_poll()' function for this, and that may eventually be what we have to do, but since the actual standard poll() semantics is *supposed* to be 'int', and since at least on x86-64 glibc sign-extends the argument before invocing the system call (so nobody can actually use a 64-bit timeout value in user space _anyway_, even in 64-bit binaries), the simpler solution would seem to be to just fix the definition of the system call to match what it should have been from the very start. If it turns out that somebody somehow circumvents the user-level libc 64-bit sign extension and actually uses a large unsigned 64-bit timeout despite that not being how poll() is supposed to work, we will need to do the compat_sys_poll() approach. Reported-by: Thomas Meyer Acked-by: Eric Dumazet Signed-off-by: Linus Torvalds --- arch/s390/kernel/compat_wrapper.S | 2 +- fs/select.c | 2 +- include/linux/syscalls.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 18c51df9fe0..ff605a39cf4 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -662,7 +662,7 @@ ENTRY(sys32_getresuid16_wrapper) ENTRY(sys32_poll_wrapper) llgtr %r2,%r2 # struct pollfd * llgfr %r3,%r3 # unsigned int - lgfr %r4,%r4 # long + lgfr %r4,%r4 # int jg sys_poll # branch to system call ENTRY(sys32_setresgid16_wrapper) diff --git a/fs/select.c b/fs/select.c index d33418fdc85..e782258d0de 100644 --- a/fs/select.c +++ b/fs/select.c @@ -912,7 +912,7 @@ static long do_restart_poll(struct restart_block *restart_block) } SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, - long, timeout_msecs) + int, timeout_msecs) { struct timespec end_time, *to = NULL; int ret; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 515669fa3c1..8ec1153ff57 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -624,7 +624,7 @@ asmlinkage long sys_socketpair(int, int, int, int __user *); asmlinkage long sys_socketcall(int call, unsigned long __user *args); asmlinkage long sys_listen(int, int); asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, - long timeout); + int timeout); asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp); asmlinkage long sys_old_select(struct sel_arg_struct __user *arg); -- cgit v1.2.3-70-g09d2 From 8c79a045fd590a26e81e75f5d8d4ec5c7d23e565 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Jan 2012 14:51:37 +0100 Subject: sched/events: Revert trace_sched_stat_sleeptime() Commit 1ac9bc69 ("sched/tracing: Add a new tracepoint for sleeptime") added a new sched:sched_stat_sleeptime tracepoint. It's broken: the first sample we get on a task might be bad because of a stale sleep_start value that wasn't reset at the last task switch because the tracepoint was not active. It also breaks the existing schedstat samples due to the side effects of: - se->statistics.sleep_start = 0; ... - se->statistics.block_start = 0; Nor do I see means to fix it without adding overhead to the scheduler fast path, which I'm not willing to for the sake of redundant instrumentation. Most importantly, sleep time information can already be constructed by tracing context switches and wakeups, and taking the timestamp difference between the schedule-out, the wakeup and the schedule-in. Signed-off-by: Peter Zijlstra Cc: Andrew Vagin Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-pc4c9qhl8q6vg3bs4j6k0rbd@git.kernel.org Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 50 -------------------------------------------- kernel/sched/core.c | 1 - kernel/sched/fair.c | 2 ++ 3 files changed, 2 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 6ba596b07a7..e33ed1bfa11 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -370,56 +370,6 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); -#ifdef CREATE_TRACE_POINTS -static inline u64 trace_get_sleeptime(struct task_struct *tsk) -{ -#ifdef CONFIG_SCHEDSTATS - u64 block, sleep; - - block = tsk->se.statistics.block_start; - sleep = tsk->se.statistics.sleep_start; - tsk->se.statistics.block_start = 0; - tsk->se.statistics.sleep_start = 0; - - return block ? block : sleep ? sleep : 0; -#else - return 0; -#endif -} -#endif - -/* - * Tracepoint for accounting sleeptime (time the task is sleeping - * or waiting for I/O). - */ -TRACE_EVENT(sched_stat_sleeptime, - - TP_PROTO(struct task_struct *tsk, u64 now), - - TP_ARGS(tsk, now), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, sleeptime ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->sleeptime = trace_get_sleeptime(tsk); - __entry->sleeptime = __entry->sleeptime ? - now - __entry->sleeptime : 0; - ) - TP_perf_assign( - __perf_count(__entry->sleeptime); - ), - - TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->sleeptime) -); - /* * Tracepoint for showing priority inheritance modifying a tasks * priority. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5255c9d2e05..b342f57879e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1932,7 +1932,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) local_irq_enable(); #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ finish_lock_switch(rq, prev); - trace_sched_stat_sleeptime(current, rq->clock); fire_sched_in_preempt_notifiers(current); if (mm) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7c6414fc669..aca16b843b7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1003,6 +1003,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) if (unlikely(delta > se->statistics.sleep_max)) se->statistics.sleep_max = delta; + se->statistics.sleep_start = 0; se->statistics.sum_sleep_runtime += delta; if (tsk) { @@ -1019,6 +1020,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) if (unlikely(delta > se->statistics.block_max)) se->statistics.block_max = delta; + se->statistics.block_start = 0; se->statistics.sum_sleep_runtime += delta; if (tsk) { -- cgit v1.2.3-70-g09d2 From 03606895cd98c0a628b17324fd7b5ff15db7e3cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Feb 2012 10:55:02 +0000 Subject: ipsec: be careful of non existing mac headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Niccolo Belli reported ipsec crashes in case we handle a frame without mac header (atm in his case) Before copying mac header, better make sure it is present. Bugzilla reference: https://bugzilla.kernel.org/show_bug.cgi?id=42809 Reported-by: Niccolò Belli Tested-by: Niccolò Belli Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++++ net/ipv4/xfrm4_mode_beet.c | 5 +---- net/ipv4/xfrm4_mode_tunnel.c | 6 ++---- net/ipv6/xfrm6_mode_beet.c | 6 +----- net/ipv6/xfrm6_mode_tunnel.c | 6 ++---- 5 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 50db9b04a55..ae86adee374 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1465,6 +1465,16 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) } #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_mac_header_rebuild(struct sk_buff *skb) +{ + if (skb_mac_header_was_set(skb)) { + const unsigned char *old_mac = skb_mac_header(skb); + + skb_set_mac_header(skb, -skb->mac_len); + memmove(skb_mac_header(skb), old_mac, skb->mac_len); + } +} + static inline int skb_checksum_start_offset(const struct sk_buff *skb) { return skb->csum_start - skb_headroom(skb); diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 63418185f52..e3db3f91511 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, sizeof(*iph)); skb_reset_network_header(skb); - - memmove(skb->data - skb->mac_len, skb_mac_header(skb), - skb->mac_len); - skb_set_mac_header(skb, -skb->mac_len); + skb_mac_header_rebuild(skb); xfrm4_beet_make_header(skb); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 534972e114a..ed4bf11ef9f 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - const unsigned char *old_mac; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) @@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index a81ce945075..9949a356d62 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -80,7 +80,6 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *ip6h; - const unsigned char *old_mac; int size = sizeof(struct ipv6hdr); int err; @@ -90,10 +89,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) __skb_push(skb, size); skb_reset_network_header(skb); - - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); + skb_mac_header_rebuild(skb); xfrm6_beet_make_header(skb); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 261e6e6f487..9f2095b19ad 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -63,7 +63,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - const unsigned char *old_mac; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) goto out; @@ -80,10 +79,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip6_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: -- cgit v1.2.3-70-g09d2 From 7d367e06688dc7a2cc98c2ace04e1296e1d987e2 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 24 Feb 2012 11:45:49 +0100 Subject: netfilter: ctnetlink: fix soft lockup when netlink adds new entries (v2) Marcell Zambo and Janos Farago noticed and reported that when new conntrack entries are added via netlink and the conntrack table gets full, soft lockup happens. This is because the nf_conntrack_lock is held while nf_conntrack_alloc is called, which is in turn wants to lock nf_conntrack_lock while evicting entries from the full table. The patch fixes the soft lockup with limiting the holding of the nf_conntrack_lock to the minimum, where it's absolutely required. It required to extend (and thus change) nf_conntrack_hash_insert so that it makes sure conntrack and ctnetlink do not add the same entry twice to the conntrack table. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 +- net/netfilter/nf_conntrack_core.c | 38 +++++++++++++++++++++++++---- net/netfilter/nf_conntrack_netlink.c | 46 +++++++++++++----------------------- 3 files changed, 51 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8a2b0ae7dbd..ab86036bbf0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -209,7 +209,7 @@ extern struct nf_conntrack_tuple_hash * __nf_conntrack_find(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple); -extern void nf_conntrack_hash_insert(struct nf_conn *ct); +extern int nf_conntrack_hash_check_insert(struct nf_conn *ct); extern void nf_ct_delete_from_lists(struct nf_conn *ct); extern void nf_ct_insert_dying_list(struct nf_conn *ct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 76613f5a55c..ed86a3be678 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -404,19 +404,49 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, &net->ct.hash[repl_hash]); } -void nf_conntrack_hash_insert(struct nf_conn *ct) +int +nf_conntrack_hash_check_insert(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, repl_hash; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; u16 zone; zone = nf_ct_zone(ct); - hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + hash = hash_conntrack(net, zone, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, zone, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + spin_lock_bh(&nf_conntrack_lock); + /* See if there's one in the list already, including reverse */ + hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + goto out; + hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + goto out; + + add_timer(&ct->timeout); + nf_conntrack_get(&ct->ct_general); __nf_conntrack_hash_insert(ct, hash, repl_hash); + NF_CT_STAT_INC(net, insert); + spin_unlock_bh(&nf_conntrack_lock); + + return 0; + +out: + NF_CT_STAT_INC(net, insert_failed); + spin_unlock_bh(&nf_conntrack_lock); + return -EEXIST; } -EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); +EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); /* Confirm a connection given skb; places it in hash table */ int diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9307b033c0c..30c9d4ca021 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1367,15 +1367,12 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); - spin_unlock_bh(&nf_conntrack_lock); #ifdef CONFIG_MODULES if (request_module("nfct-helper-%s", helpname) < 0) { - spin_lock_bh(&nf_conntrack_lock); err = -EOPNOTSUPP; goto err1; } - spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), @@ -1468,8 +1465,10 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, if (tstamp) tstamp->start = ktime_to_ns(ktime_get_real()); - add_timer(&ct->timeout); - nf_conntrack_hash_insert(ct); + err = nf_conntrack_hash_check_insert(ct); + if (err < 0) + goto err2; + rcu_read_unlock(); return ct; @@ -1490,6 +1489,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); + struct nf_conn *ct; u_int8_t u3 = nfmsg->nfgen_family; u16 zone; int err; @@ -1510,27 +1510,22 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; } - spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(net, zone, &otuple); + h = nf_conntrack_find_get(net, zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(net, zone, &rtuple); + h = nf_conntrack_find_get(net, zone, &rtuple); if (h == NULL) { err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { - struct nf_conn *ct; enum ip_conntrack_events events; ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, &rtuple, u3); - if (IS_ERR(ct)) { - err = PTR_ERR(ct); - goto out_unlock; - } + if (IS_ERR(ct)) + return PTR_ERR(ct); + err = 0; - nf_conntrack_get(&ct->ct_general); - spin_unlock_bh(&nf_conntrack_lock); if (test_bit(IPS_EXPECTED_BIT, &ct->status)) events = IPCT_RELATED; else @@ -1545,23 +1540,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, ct, NETLINK_CB(skb).pid, nlmsg_report(nlh)); nf_ct_put(ct); - } else - spin_unlock_bh(&nf_conntrack_lock); + } return err; } /* implicit 'else' */ - /* We manipulate the conntrack inside the global conntrack table lock, - * so there's no need to increase the refcount */ err = -EEXIST; + ct = nf_ct_tuplehash_to_ctrack(h); if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - + spin_lock_bh(&nf_conntrack_lock); err = ctnetlink_change_conntrack(ct, cda); + spin_unlock_bh(&nf_conntrack_lock); if (err == 0) { - nf_conntrack_get(&ct->ct_general); - spin_unlock_bh(&nf_conntrack_lock); nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | @@ -1570,15 +1561,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_MARK), ct, NETLINK_CB(skb).pid, nlmsg_report(nlh)); - nf_ct_put(ct); - } else - spin_unlock_bh(&nf_conntrack_lock); - - return err; + } } -out_unlock: - spin_unlock_bh(&nf_conntrack_lock); + nf_ct_put(ct); return err; } -- cgit v1.2.3-70-g09d2 From d80e731ecab420ddcb79ee9d0ac427acbc187b4b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 24 Feb 2012 20:07:11 +0100 Subject: epoll: introduce POLLFREE to flush ->signalfd_wqh before kfree() This patch is intentionally incomplete to simplify the review. It ignores ep_unregister_pollwait() which plays with the same wqh. See the next change. epoll assumes that the EPOLL_CTL_ADD'ed file controls everything f_op->poll() needs. In particular it assumes that the wait queue can't go away until eventpoll_release(). This is not true in case of signalfd, the task which does EPOLL_CTL_ADD uses its ->sighand which is not connected to the file. This patch adds the special event, POLLFREE, currently only for epoll. It expects that init_poll_funcptr()'ed hook should do the necessary cleanup. Perhaps it should be defined as EPOLLFREE in eventpoll. __cleanup_sighand() is changed to do wake_up_poll(POLLFREE) if ->signalfd_wqh is not empty, we add the new signalfd_cleanup() helper. ep_poll_callback(POLLFREE) simply does list_del_init(task_list). This make this poll entry inconsistent, but we don't care. If you share epoll fd which contains our sigfd with another process you should blame yourself. signalfd is "really special". I simply do not know how we can define the "right" semantics if it used with epoll. The main problem is, epoll calls signalfd_poll() once to establish the connection with the wait queue, after that signalfd_poll(NULL) returns the different/inconsistent results depending on who does EPOLL_CTL_MOD/signalfd_read/etc. IOW: apart from sigmask, signalfd has nothing to do with the file, it works with the current thread. In short: this patch is the hack which tries to fix the symptoms. It also assumes that nobody can take tasklist_lock under epoll locks, this seems to be true. Note: - we do not have wake_up_all_poll() but wake_up_poll() is fine, poll/epoll doesn't use WQ_FLAG_EXCLUSIVE. - signalfd_cleanup() uses POLLHUP along with POLLFREE, we need a couple of simple changes in eventpoll.c to make sure it can't be "lost". Reported-by: Maxime Bizon Cc: Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 4 ++++ fs/signalfd.c | 11 +++++++++++ include/asm-generic/poll.h | 2 ++ include/linux/signalfd.h | 5 ++++- kernel/fork.c | 5 ++++- 5 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index aabdfc38cf2..34bbfc6dd8d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -842,6 +842,10 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; + /* the caller holds eppoll_entry->whead->lock */ + if ((unsigned long)key & POLLFREE) + list_del_init(&wait->task_list); + spin_lock_irqsave(&ep->lock, flags); /* diff --git a/fs/signalfd.c b/fs/signalfd.c index 492465b451d..79c1eea98a3 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -30,6 +30,17 @@ #include #include +void signalfd_cleanup(struct sighand_struct *sighand) +{ + wait_queue_head_t *wqh = &sighand->signalfd_wqh; + + if (likely(!waitqueue_active(wqh))) + return; + + /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */ + wake_up_poll(wqh, POLLHUP | POLLFREE); +} + struct signalfd_ctx { sigset_t sigmask; }; diff --git a/include/asm-generic/poll.h b/include/asm-generic/poll.h index 44bce836d35..9ce7f44aebd 100644 --- a/include/asm-generic/poll.h +++ b/include/asm-generic/poll.h @@ -28,6 +28,8 @@ #define POLLRDHUP 0x2000 #endif +#define POLLFREE 0x4000 /* currently only for epoll */ + struct pollfd { int fd; short events; diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h index 3ff4961da9b..247399b2979 100644 --- a/include/linux/signalfd.h +++ b/include/linux/signalfd.h @@ -61,13 +61,16 @@ static inline void signalfd_notify(struct task_struct *tsk, int sig) wake_up(&tsk->sighand->signalfd_wqh); } +extern void signalfd_cleanup(struct sighand_struct *sighand); + #else /* CONFIG_SIGNALFD */ static inline void signalfd_notify(struct task_struct *tsk, int sig) { } +static inline void signalfd_cleanup(struct sighand_struct *sighand) { } + #endif /* CONFIG_SIGNALFD */ #endif /* __KERNEL__ */ #endif /* _LINUX_SIGNALFD_H */ - diff --git a/kernel/fork.c b/kernel/fork.c index b77fd559c78..e2cd3e2a5ae 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -935,8 +936,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) void __cleanup_sighand(struct sighand_struct *sighand) { - if (atomic_dec_and_test(&sighand->count)) + if (atomic_dec_and_test(&sighand->count)) { + signalfd_cleanup(sighand); kmem_cache_free(sighand_cachep, sighand); + } } -- cgit v1.2.3-70-g09d2 From 3c761ea05a8900a907f32b628611873f6bef24b2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Feb 2012 09:44:55 -0800 Subject: Fix autofs compile without CONFIG_COMPAT The autofs compat handling fix caused a compile failure when CONFIG_COMPAT isn't defined. Instead of adding random #ifdef'fery in autofs, let's just make the compat helpers earlier to use: without CONFIG_COMPAT, is_compat_task() just hardcodes to zero. We could probably do something similar for a number of other cases where we have #ifdef's in code, but this is the low-hanging fruit. Reported-and-tested-by: Andreas Schwab Signed-off-by: Linus Torvalds --- include/linux/compat.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index 41c9f6515f4..7e05fcee75a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -561,5 +561,9 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, unsigned long liovcnt, const struct compat_iovec __user *rvec, unsigned long riovcnt, unsigned long flags); +#else + +#define is_compat_task() (0) + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ -- cgit v1.2.3-70-g09d2 From 97a29d59fc222b36bac3ee3a8ae994f65bf7ffdf Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 30 Jan 2012 10:40:47 -0600 Subject: [PARISC] fix compile break caused by iomap: make IOPORT/PCI mapping functions conditional The problem in commit fea80311a939a746533a6d7e7c3183729d6a3faf Author: Randy Dunlap Date: Sun Jul 24 11:39:14 2011 -0700 iomap: make IOPORT/PCI mapping functions conditional is that if your architecture supplies pci_iomap/pci_iounmap, it expects always to supply them. Adding empty body defitions in the !CONFIG_PCI case, which is what this patch does, breaks the parisc compile because the functions become doubly defined. It took us a while to spot this, because we don't actually build !CONFIG_PCI very often (only if someone is brave enough to test the snake/asp machines). Since the note in the commit log says this is to fix a CONFIG_GENERIC_IOMAP issue (which it does because CONFIG_GENERIC_IOMAP supplies pci_iounmap only if CONFIG_PCI is set), there should actually have been a condition upon this. This should make sure no other architecture's !CONFIG_PCI compile breaks in the same way as parisc. The fix had to be updated to take account of the GENERIC_PCI_IOMAP separation. Reported-by: Rolf Eike Beer Signed-off-by: James Bottomley --- include/asm-generic/iomap.h | 2 +- include/asm-generic/pci_iomap.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 8a3d4fde260..6afd7d6a989 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -70,7 +70,7 @@ extern void ioport_unmap(void __iomem *); /* Destroy a virtual mapping cookie for a PCI BAR (memory or IO) */ struct pci_dev; extern void pci_iounmap(struct pci_dev *dev, void __iomem *); -#else +#elif defined(CONFIG_GENERIC_IOMAP) struct pci_dev; static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) { } diff --git a/include/asm-generic/pci_iomap.h b/include/asm-generic/pci_iomap.h index e58fcf89137..ce37349860f 100644 --- a/include/asm-generic/pci_iomap.h +++ b/include/asm-generic/pci_iomap.h @@ -25,7 +25,7 @@ extern void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port, #define __pci_ioport_map(dev, port, nr) ioport_map((port), (nr)) #endif -#else +#elif defined(CONFIG_GENERIC_PCI_IOMAP) static inline void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) { return NULL; -- cgit v1.2.3-70-g09d2 From c8e252586f8d5de906385d8cf6385fee289a825e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 2 Mar 2012 10:43:48 -0800 Subject: regset: Prevent null pointer reference on readonly regsets The regset common infrastructure assumed that regsets would always have .get and .set methods, but not necessarily .active methods. Unfortunately people have since written regsets without .set methods. Rather than putting in stub functions everywhere, handle regsets with null .get or .set methods explicitly. Signed-off-by: H. Peter Anvin Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 +- include/linux/regset.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index bcb884e2d61..07d096c4992 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1421,7 +1421,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, for (i = 1; i < view->n; ++i) { const struct user_regset *regset = &view->regsets[i]; do_thread_regset_writeback(t->task, regset); - if (regset->core_note_type && + if (regset->core_note_type && regset->get && (!regset->active || regset->active(t->task, regset))) { int ret; size_t size = regset->n * regset->size; diff --git a/include/linux/regset.h b/include/linux/regset.h index 8abee655622..5150fd16ef9 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -335,6 +335,9 @@ static inline int copy_regset_to_user(struct task_struct *target, { const struct user_regset *regset = &view->regsets[setno]; + if (!regset->get) + return -EOPNOTSUPP; + if (!access_ok(VERIFY_WRITE, data, size)) return -EIO; @@ -358,6 +361,9 @@ static inline int copy_regset_from_user(struct task_struct *target, { const struct user_regset *regset = &view->regsets[setno]; + if (!regset->set) + return -EOPNOTSUPP; + if (!access_ok(VERIFY_READ, data, size)) return -EIO; -- cgit v1.2.3-70-g09d2 From 5189fa19a4b2b4c3bec37c3a019d446148827717 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 2 Mar 2012 10:43:49 -0800 Subject: regset: Return -EFAULT, not -EIO, on host-side memory fault There is only one error code to return for a bad user-space buffer pointer passed to a system call in the same address space as the system call is executed, and that is EFAULT. Furthermore, the low-level access routines, which catch most of the faults, return EFAULT already. Signed-off-by: H. Peter Anvin Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Signed-off-by: Linus Torvalds --- include/linux/regset.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/regset.h b/include/linux/regset.h index 5150fd16ef9..686f37327a4 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -339,7 +339,7 @@ static inline int copy_regset_to_user(struct task_struct *target, return -EOPNOTSUPP; if (!access_ok(VERIFY_WRITE, data, size)) - return -EIO; + return -EFAULT; return regset->get(target, regset, offset, size, NULL, data); } @@ -365,7 +365,7 @@ static inline int copy_regset_from_user(struct task_struct *target, return -EOPNOTSUPP; if (!access_ok(VERIFY_READ, data, size)) - return -EIO; + return -EFAULT; return regset->set(target, regset, offset, size, NULL, data); } -- cgit v1.2.3-70-g09d2 From 8966be90304b394fd6a2c5af7b6b3abe2df3889c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Mar 2012 14:23:30 -0800 Subject: vfs: trivial __d_lookup_rcu() cleanups These don't change any semantics, but they clean up the code a bit and mark some arguments appropriately 'const'. They came up as I was doing the word-at-a-time dcache name accessor code, and cleaning this up now allows me to send out a smaller relevant interesting patch for the experimental stuff. Signed-off-by: Linus Torvalds --- fs/dcache.c | 13 ++++++++----- include/linux/dcache.h | 3 ++- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index fe19ac13f75..138be96e25b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -104,7 +104,7 @@ static unsigned int d_hash_shift __read_mostly; static struct hlist_bl_head *dentry_hashtable __read_mostly; -static inline struct hlist_bl_head *d_hash(struct dentry *parent, +static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned long hash) { hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; @@ -1717,8 +1717,9 @@ EXPORT_SYMBOL(d_add_ci); * child is looked up. Thus, an interlocking stepping of sequence lock checks * is formed, giving integrity down the path walk. */ -struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, - unsigned *seq, struct inode **inode) +struct dentry *__d_lookup_rcu(const struct dentry *parent, + const struct qstr *name, + unsigned *seqp, struct inode **inode) { unsigned int len = name->len; unsigned int hash = name->hash; @@ -1748,6 +1749,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, * See Documentation/filesystems/path-lookup.txt for more details. */ hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { + unsigned seq; struct inode *i; const char *tname; int tlen; @@ -1756,7 +1758,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, continue; seqretry: - *seq = read_seqcount_begin(&dentry->d_seq); + seq = read_seqcount_begin(&dentry->d_seq); if (dentry->d_parent != parent) continue; if (d_unhashed(dentry)) @@ -1771,7 +1773,7 @@ seqretry: * edge of memory when walking. If we could load this * atomically some other way, we could drop this check. */ - if (read_seqcount_retry(&dentry->d_seq, *seq)) + if (read_seqcount_retry(&dentry->d_seq, seq)) goto seqretry; if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { if (parent->d_op->d_compare(parent, *inode, @@ -1788,6 +1790,7 @@ seqretry: * order to do anything useful with the returned dentry * anyway. */ + *seqp = seq; *inode = i; return dentry; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d64a55b23af..61b24261e07 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -309,7 +309,8 @@ extern struct dentry *d_ancestor(struct dentry *, struct dentry *); extern struct dentry *d_lookup(struct dentry *, struct qstr *); extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); extern struct dentry *__d_lookup(struct dentry *, struct qstr *); -extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, +extern struct dentry *__d_lookup_rcu(const struct dentry *parent, + const struct qstr *name, unsigned *seq, struct inode **inode); /** -- cgit v1.2.3-70-g09d2 From 0145acc202ca613b23b5383e55df3c32a92ad1bf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Mar 2012 14:32:59 -0800 Subject: vfs: uninline full_name_hash() .. and also use it in lookup_one_len() rather than open-coding it. There aren't any performance-critical users, so inlining it is silly. But it wouldn't matter if it wasn't for the fact that the word-at-a-time dentry name patches want to conditionally replace the function, and uninlining it sets the stage for that. So again, this is a preparatory patch that doesn't change any semantics, and only prepares for a much cleaner and testable word-at-a-time dentry name accessor patch. Signed-off-by: Linus Torvalds --- fs/namei.c | 13 +++++++++---- include/linux/dcache.h | 9 +-------- 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/fs/namei.c b/fs/namei.c index a780ea515c4..ec72fa1acb1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1374,6 +1374,14 @@ static inline int can_lookup(struct inode *inode) return 1; } +unsigned int full_name_hash(const unsigned char *name, unsigned int len) +{ + unsigned long hash = init_name_hash(); + while (len--) + hash = partial_name_hash(*name++, hash); + return end_name_hash(hash); +} + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -1775,24 +1783,21 @@ static struct dentry *lookup_hash(struct nameidata *nd) struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { struct qstr this; - unsigned long hash; unsigned int c; WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); this.name = name; this.len = len; + this.hash = full_name_hash(name, len); if (!len) return ERR_PTR(-EACCES); - hash = init_name_hash(); while (len--) { c = *(const unsigned char *)name++; if (c == '/' || c == '\0') return ERR_PTR(-EACCES); - hash = partial_name_hash(c, hash); } - this.hash = end_name_hash(hash); /* * See if the low-level filesystem might want * to use its own hash.. diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 61b24261e07..f1c7eb8461b 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -89,14 +89,7 @@ static inline unsigned long end_name_hash(unsigned long hash) } /* Compute the hash for a name string. */ -static inline unsigned int -full_name_hash(const unsigned char *name, unsigned int len) -{ - unsigned long hash = init_name_hash(); - while (len--) - hash = partial_name_hash(*name++, hash); - return end_name_hash(hash); -} +extern unsigned int full_name_hash(const unsigned char *, unsigned int); /* * Try to keep struct dentry aligned on 64 byte cachelines (this will -- cgit v1.2.3-70-g09d2 From 5707c87f20bca9e76969bb4096149de6ef74cbb9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Mar 2012 14:47:15 -0800 Subject: vfs: clarify and clean up dentry_cmp() It did some odd things for unclear reasons. As this is one of the functions that gets changed when doing word-at-a-time compares, this is yet another of the "don't change any semantics, but clean things up so that subsequent patches don't get obscured by the cleanups". Signed-off-by: Linus Torvalds --- include/linux/dcache.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f1c7eb8461b..4270bedd230 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -54,18 +54,17 @@ extern struct dentry_stat_t dentry_stat; static inline int dentry_cmp(const unsigned char *cs, size_t scount, const unsigned char *ct, size_t tcount) { - int ret; if (scount != tcount) return 1; + do { - ret = (*cs != *ct); - if (ret) - break; + if (*cs != *ct) + return 1; cs++; ct++; tcount--; } while (tcount); - return ret; + return 0; } /* Name hashing routines. Initial hash value */ -- cgit v1.2.3-70-g09d2