From b299eb5cde1a91706c450804006c6559b0826df8 Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Thu, 3 Mar 2011 04:30:13 +0530 Subject: ACPI:Fix goto flows in thermal-sys This patch fixes two minor bugs in thermal_sys: (a) The flow of goto's in thermal_hwmon_add_sysfs. (b) Remove the temp*_crit only if there is a get_crit_temp defined, in thermal_remove_hwmon_sysfs. Signed-off-by: Durgadoss R Signed-off-by: Len Brown --- drivers/thermal/thermal_sys.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 713b7ea4a60..0b1c82ad680 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -499,7 +499,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) dev_set_drvdata(hwmon->device, hwmon); result = device_create_file(hwmon->device, &dev_attr_name); if (result) - goto unregister_hwmon_device; + goto free_mem; register_sys_interface: tz->hwmon = hwmon; @@ -513,7 +513,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) sysfs_attr_init(&tz->temp_input.attr.attr); result = device_create_file(hwmon->device, &tz->temp_input.attr); if (result) - goto unregister_hwmon_device; + goto unregister_name; if (tz->ops->get_crit_temp) { unsigned long temperature; @@ -527,7 +527,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) result = device_create_file(hwmon->device, &tz->temp_crit.attr); if (result) - goto unregister_hwmon_device; + goto unregister_input; } } @@ -539,9 +539,9 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) return 0; - unregister_hwmon_device: - device_remove_file(hwmon->device, &tz->temp_crit.attr); + unregister_input: device_remove_file(hwmon->device, &tz->temp_input.attr); + unregister_name: if (new_hwmon_device) { device_remove_file(hwmon->device, &dev_attr_name); hwmon_device_unregister(hwmon->device); @@ -560,7 +560,8 @@ thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) tz->hwmon = NULL; device_remove_file(hwmon->device, &tz->temp_input.attr); - device_remove_file(hwmon->device, &tz->temp_crit.attr); + if (tz->ops->get_crit_temp) + device_remove_file(hwmon->device, &tz->temp_crit.attr); mutex_lock(&thermal_list_lock); list_del(&tz->hwmon_node); -- cgit v1.2.3-70-g09d2 From 9f63b88bd7a1ac1afbb4358772a39abaeddbdd13 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 23 Mar 2011 17:26:34 +0800 Subject: ACPI: osl, add acpi_os_create_lock interface Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/osl.c | 33 +++++++++++++++++++++++++-------- include/acpi/acpiosxf.h | 3 +++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index c90c76aa7f8..cf750a7a952 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -883,14 +883,6 @@ void acpi_os_wait_events_complete(void *context) EXPORT_SYMBOL(acpi_os_wait_events_complete); -/* - * Deallocate the memory for a spinlock. - */ -void acpi_os_delete_lock(acpi_spinlock handle) -{ - return; -} - acpi_status acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) { @@ -1321,6 +1313,31 @@ int acpi_resources_are_enforced(void) } EXPORT_SYMBOL(acpi_resources_are_enforced); +/* + * Create and initialize a spinlock. + */ +acpi_status +acpi_os_create_lock(acpi_spinlock *out_handle) +{ + spinlock_t *lock; + + lock = ACPI_ALLOCATE(sizeof(spinlock_t)); + if (!lock) + return AE_NO_MEMORY; + spin_lock_init(lock); + *out_handle = lock; + + return AE_OK; +} + +/* + * Deallocate the memory for a spinlock. + */ +void acpi_os_delete_lock(acpi_spinlock handle) +{ + ACPI_FREE(handle); +} + /* * Acquire a spinlock. * diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index a3252a5ead6..a756bc8d866 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -98,6 +98,9 @@ acpi_os_table_override(struct acpi_table_header *existing_table, /* * Spinlock primitives */ +acpi_status +acpi_os_create_lock(acpi_spinlock *out_handle); + void acpi_os_delete_lock(acpi_spinlock handle); acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock handle); -- cgit v1.2.3-70-g09d2 From 3854c8e32f46ffa6ee0bf2eb01137f5a48b2754f Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 23 Mar 2011 17:26:35 +0800 Subject: ACPICA: Use acpi_os_create_lock interface Replace spin_lock_init with acpi_os_create_lock. Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/acglobal.h | 9 +++------ drivers/acpi/acpica/utmutex.c | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index d69750b83b3..6d512fcabdb 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -226,12 +226,9 @@ ACPI_EXTERN u8 acpi_gbl_global_lock_present; * Spinlocks are used for interfaces that can be possibly called at * interrupt level */ -ACPI_EXTERN spinlock_t _acpi_gbl_gpe_lock; /* For GPE data structs and registers */ -ACPI_EXTERN spinlock_t _acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ -ACPI_EXTERN spinlock_t _acpi_ev_global_lock_pending_lock; /* For global lock */ -#define acpi_gbl_gpe_lock &_acpi_gbl_gpe_lock -#define acpi_gbl_hardware_lock &_acpi_gbl_hardware_lock -#define acpi_ev_global_lock_pending_lock &_acpi_ev_global_lock_pending_lock +ACPI_EXTERN acpi_spinlock acpi_gbl_gpe_lock; /* For GPE data structs and registers */ +ACPI_EXTERN acpi_spinlock acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ +ACPI_EXTERN acpi_spinlock acpi_ev_global_lock_pending_lock; /* For global lock */ /***************************************************************************** * diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index a946c689f03..519d4ee9b45 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -83,9 +83,20 @@ acpi_status acpi_ut_mutex_initialize(void) /* Create the spinlocks for use at interrupt level */ - spin_lock_init(acpi_gbl_gpe_lock); - spin_lock_init(acpi_gbl_hardware_lock); - spin_lock_init(acpi_ev_global_lock_pending_lock); + status = acpi_os_create_lock (&acpi_gbl_gpe_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } + + status = acpi_os_create_lock (&acpi_gbl_hardware_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } + + status = acpi_os_create_lock (&acpi_ev_global_lock_pending_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } /* Mutex for _OSI support */ status = acpi_os_create_mutex(&acpi_gbl_osi_mutex); -- cgit v1.2.3-70-g09d2 From 749c27639b95c5c4a8185e02a4efb189034944ed Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 23 Mar 2011 17:26:36 +0800 Subject: ACPICA: Fix code divergence of global lock handling Commit 9cd0314(ACPI / ACPICA: Fix global lock acquisition) was backported into ACPICA code base, and some divergence was introduced. This patch fixed it, - rename acpi_ev_global_lock_pending/acpi_ev_global_lock_pending_lock to acpi_gbl_global_lock_pending/acpi_gbl_global_lock_pending_lock. - move the initialization of acpi_gbl_global_lock_pending_lock from acpi_ut_mutex_initialize to acpi_ev_init_global_lock_handler. Signed-off-by: Lin Ming Reviewed-by: Rafael J. Wysocki Signed-off-by: Len Brown --- drivers/acpi/acpica/acglobal.h | 6 ++-- drivers/acpi/acpica/evmisc.c | 74 +++++++++++++++++++++++------------------- drivers/acpi/acpica/utmutex.c | 5 --- 3 files changed, 44 insertions(+), 41 deletions(-) diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 6d512fcabdb..73863d86f02 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -214,13 +214,16 @@ ACPI_EXTERN struct acpi_mutex_info acpi_gbl_mutex_info[ACPI_NUM_MUTEX]; /* * Global lock mutex is an actual AML mutex object - * Global lock semaphore works in conjunction with the HW global lock + * Global lock semaphore works in conjunction with the actual global lock + * Global lock spinlock is used for "pending" handshake */ ACPI_EXTERN union acpi_operand_object *acpi_gbl_global_lock_mutex; ACPI_EXTERN acpi_semaphore acpi_gbl_global_lock_semaphore; +ACPI_EXTERN acpi_spinlock acpi_gbl_global_lock_pending_lock; ACPI_EXTERN u16 acpi_gbl_global_lock_handle; ACPI_EXTERN u8 acpi_gbl_global_lock_acquired; ACPI_EXTERN u8 acpi_gbl_global_lock_present; +ACPI_EXTERN u8 acpi_gbl_global_lock_pending; /* * Spinlocks are used for interfaces that can be possibly called at @@ -228,7 +231,6 @@ ACPI_EXTERN u8 acpi_gbl_global_lock_present; */ ACPI_EXTERN acpi_spinlock acpi_gbl_gpe_lock; /* For GPE data structs and registers */ ACPI_EXTERN acpi_spinlock acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ -ACPI_EXTERN acpi_spinlock acpi_ev_global_lock_pending_lock; /* For global lock */ /***************************************************************************** * diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c index 7dc80946f7b..69a3b4aa862 100644 --- a/drivers/acpi/acpica/evmisc.c +++ b/drivers/acpi/acpica/evmisc.c @@ -284,39 +284,41 @@ static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context) * RETURN: ACPI_INTERRUPT_HANDLED * * DESCRIPTION: Invoked directly from the SCI handler when a global lock - * release interrupt occurs. If there's a thread waiting for - * the global lock, signal it. - * - * NOTE: Assumes that the semaphore can be signaled from interrupt level. If - * this is not possible for some reason, a separate thread will have to be - * scheduled to do this. + * release interrupt occurs. If there is actually a pending + * request for the lock, signal the waiting thread. * ******************************************************************************/ -static u8 acpi_ev_global_lock_pending; static u32 acpi_ev_global_lock_handler(void *context) { acpi_status status; acpi_cpu_flags flags; - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); - if (!acpi_ev_global_lock_pending) { - goto out; + /* + * If a request for the global lock is not actually pending, + * we are done. This handles "spurious" global lock interrupts + * which are possible (and have been seen) with bad BIOSs. + */ + if (!acpi_gbl_global_lock_pending) { + goto cleanup_and_exit; } - /* Send a unit to the semaphore */ - + /* + * Send a unit to the global lock semaphore. The actual acquisition + * of the global lock will be performed by the waiting thread. + */ status = acpi_os_signal_semaphore(acpi_gbl_global_lock_semaphore, 1); if (ACPI_FAILURE(status)) { ACPI_ERROR((AE_INFO, "Could not signal Global Lock semaphore")); } - acpi_ev_global_lock_pending = FALSE; + acpi_gbl_global_lock_pending = FALSE; - out: - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); +cleanup_and_exit: + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); return (ACPI_INTERRUPT_HANDLED); } @@ -350,14 +352,20 @@ acpi_status acpi_ev_init_global_lock_handler(void) * Map to AE_OK, but mark global lock as not present. Any attempt to * actually use the global lock will be flagged with an error. */ + acpi_gbl_global_lock_present = FALSE; if (status == AE_NO_HARDWARE_RESPONSE) { ACPI_ERROR((AE_INFO, "No response from Global Lock hardware, disabling lock")); - acpi_gbl_global_lock_present = FALSE; return_ACPI_STATUS(AE_OK); } + status = acpi_os_create_lock(&acpi_gbl_global_lock_pending_lock); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + acpi_gbl_global_lock_pending = FALSE; acpi_gbl_global_lock_present = TRUE; return_ACPI_STATUS(status); } @@ -414,7 +422,7 @@ static int acpi_ev_global_lock_acquired; acpi_status acpi_ev_acquire_global_lock(u16 timeout) { acpi_cpu_flags flags; - acpi_status status = AE_OK; + acpi_status status; u8 acquired = FALSE; ACPI_FUNCTION_TRACE(ev_acquire_global_lock); @@ -458,15 +466,15 @@ acpi_status acpi_ev_acquire_global_lock(u16 timeout) } /* - * Make sure that a global lock actually exists. If not, just treat the - * lock as a standard mutex. + * Make sure that a global lock actually exists. If not, just + * treat the lock as a standard mutex. */ if (!acpi_gbl_global_lock_present) { acpi_gbl_global_lock_acquired = TRUE; return_ACPI_STATUS(AE_OK); } - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); do { @@ -475,20 +483,19 @@ acpi_status acpi_ev_acquire_global_lock(u16 timeout) ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired); if (acquired) { acpi_gbl_global_lock_acquired = TRUE; - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Acquired hardware Global Lock\n")); break; } - acpi_ev_global_lock_pending = TRUE; - - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); - /* - * Did not get the lock. The pending bit was set above, and we - * must wait until we get the global lock released interrupt. + * Did not get the lock. The pending bit was set above, and + * we must now wait until we receive the global lock + * released interrupt. */ + acpi_gbl_global_lock_pending = TRUE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Waiting for hardware Global Lock\n")); @@ -496,17 +503,16 @@ acpi_status acpi_ev_acquire_global_lock(u16 timeout) * Wait for handshake with the global lock interrupt handler. * This interface releases the interpreter if we must wait. */ - status = acpi_ex_system_wait_semaphore( - acpi_gbl_global_lock_semaphore, - ACPI_WAIT_FOREVER); + status = + acpi_ex_system_wait_semaphore + (acpi_gbl_global_lock_semaphore, ACPI_WAIT_FOREVER); - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); } while (ACPI_SUCCESS(status)); - acpi_ev_global_lock_pending = FALSE; - - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); + acpi_gbl_global_lock_pending = FALSE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index 519d4ee9b45..7d797e2baec 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -93,11 +93,6 @@ acpi_status acpi_ut_mutex_initialize(void) return_ACPI_STATUS (status); } - status = acpi_os_create_lock (&acpi_ev_global_lock_pending_lock); - if (ACPI_FAILURE (status)) { - return_ACPI_STATUS (status); - } - /* Mutex for _OSI support */ status = acpi_os_create_mutex(&acpi_gbl_osi_mutex); if (ACPI_FAILURE(status)) { -- cgit v1.2.3-70-g09d2 From 84adccfb8cd2a6b8237da6752668ba25cd90c20b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 24 Mar 2011 11:32:15 +0530 Subject: dmaengine/dw_dmac fix: dwc_scan_descriptors must compare first desc address also with llp dwc_scan_descriptors scans all descriptors from active_list in case transfer is not completed. It compares first_desc->lli.llp, and then all childrens of its tx_list. But it doesn't compare its own address, i.e. first_desc->txd.phys, as this is what we have initially programmed into the controller register. So this causes dma to stop and finish a transfer, which was never started. And thus fail. Signed-off-by: Viresh Kumar Signed-off-by: Vinod Koul --- drivers/dma/dw_dmac.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 9c25c7d099e..b15c32ca0ef 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -304,6 +304,11 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp); list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { + /* check first descriptors addr */ + if (desc->txd.phys == llp) + return; + + /* check first descriptors llp */ if (desc->lli.llp == llp) /* This one is currently in progress */ return; -- cgit v1.2.3-70-g09d2 From e2ec771a99a5cf231c9dea4da26238bf073e1e9c Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 27 Mar 2011 01:26:52 +0800 Subject: dma: use BUG_ON correctly in iop-adma.c, v4 This patch makes BUG_ON() usage correct in drivers/dma/iop-adma.c. Cc: Dan Williams Cc: Vinod Koul Signed-off-by: Coly Li Signed-off-by: Vinod Koul --- drivers/dma/iop-adma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index c6b01f535b2..e03f811a83d 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -619,7 +619,7 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", __func__, len); @@ -652,7 +652,7 @@ iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", __func__, len); @@ -686,7 +686,7 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u flags: %lx\n", -- cgit v1.2.3-70-g09d2 From 7912d30007d0c958bcf11cd5ce19f77856cf041b Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 27 Mar 2011 01:26:53 +0800 Subject: dma: use BUG_ON correctly in mv_xor.c, v4 This patch makes BUG_ON() usage correct in drivers/dma/mv_xor.c Cc: Vinod Koul Cc: Dan Williams Signed-off-by: Coly Li Signed-off-by: Vinod Koul --- drivers/dma/mv_xor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index a25f5f61e0e..954e334e01b 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -671,7 +671,7 @@ mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); spin_lock_bh(&mv_chan->lock); slot_cnt = mv_chan_memcpy_slot_count(len); @@ -710,7 +710,7 @@ mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); spin_lock_bh(&mv_chan->lock); slot_cnt = mv_chan_memset_slot_count(len); @@ -744,7 +744,7 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); dev_dbg(mv_chan->device->common.dev, "%s src_cnt: %d len: dest %x %u flags: %ld\n", -- cgit v1.2.3-70-g09d2 From 427cdf19b97e509e21e5d347e18d8b0b34723dfc Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 27 Mar 2011 01:26:54 +0800 Subject: dma: use BUG_ON correctly in ppc4xx/adam.c, v4 This patch makes BUG_ON() usage correct in drivers/dma/ppc4xx/adam.c Cc: Vinod Koul Cc: Dan Williams Cc: Grant Likely Cc: Anatolij Gustschin Cc: Sean MacLennan Cc: Joe Perches Signed-off-by: Coly Li Signed-off-by: Vinod Koul --- drivers/dma/ppc4xx/adma.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index cef584533ee..a2b62e93a14 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -2313,7 +2313,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy( if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); spin_lock_bh(&ppc440spe_chan->lock); @@ -2354,7 +2354,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memset( if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); spin_lock_bh(&ppc440spe_chan->lock); @@ -2397,7 +2397,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor( dma_dest, dma_src, src_cnt)); if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); dev_dbg(ppc440spe_chan->device->common.dev, "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n", @@ -2887,7 +2887,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq( ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id, dst, src, src_cnt)); BUG_ON(!len); - BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); BUG_ON(!src_cnt); if (src_cnt == 1 && dst[1] == src[0]) { -- cgit v1.2.3-70-g09d2 From 1cb7b1e0de6a1f8f071f4a146e3d10f3a662f707 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 31 Mar 2011 13:36:38 +0200 Subject: ACPI EC: remove dead code static void acpi_ec_gpe_query(void *ec_cxt); -> The function is right above this declaration -> not needed. poll_force is also not used, cleaned up in ec.c and its users: compal-laptop and msi-laptop. Signed-off-by: Thomas Renninger Signed-off-by: Len Brown --- drivers/acpi/ec.c | 6 +----- drivers/platform/x86/compal-laptop.c | 12 ++++++------ drivers/platform/x86/msi-laptop.c | 12 ++++++------ include/linux/acpi.h | 3 +-- 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index fa848c4116a..b3f1d6f52a8 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -69,7 +69,6 @@ enum ec_command { #define ACPI_EC_DELAY 500 /* Wait 500ms max. during EC ops */ #define ACPI_EC_UDELAY_GLK 1000 /* Wait 1ms max. to get global lock */ -#define ACPI_EC_CDELAY 10 /* Wait 10us before polling EC */ #define ACPI_EC_MSI_UDELAY 550 /* Wait 550us for MSI EC */ #define ACPI_EC_STORM_THRESHOLD 8 /* number of false interrupts @@ -433,8 +432,7 @@ EXPORT_SYMBOL(ec_write); int ec_transaction(u8 command, const u8 * wdata, unsigned wdata_len, - u8 * rdata, unsigned rdata_len, - int force_poll) + u8 * rdata, unsigned rdata_len) { struct transaction t = {.command = command, .wdata = wdata, .rdata = rdata, @@ -592,8 +590,6 @@ static void acpi_ec_gpe_query(void *ec_cxt) mutex_unlock(&ec->lock); } -static void acpi_ec_gpe_query(void *ec_cxt); - static int ec_check_sci(struct acpi_ec *ec, u8 state) { if (state & ACPI_EC_FLAG_SCI) { diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c index 034572b980c..f4f43e65475 100644 --- a/drivers/platform/x86/compal-laptop.c +++ b/drivers/platform/x86/compal-laptop.c @@ -200,7 +200,7 @@ static bool extra_features; * watching the output of address 0x4F (do an ec_transaction writing 0x33 * into 0x4F and read a few bytes from the output, like so: * u8 writeData = 0x33; - * ec_transaction(0x4F, &writeData, 1, buffer, 32, 0); + * ec_transaction(0x4F, &writeData, 1, buffer, 32); * That address is labled "fan1 table information" in the service manual. * It should be clear which value in 'buffer' changes). This seems to be * related to fan speed. It isn't a proper 'realtime' fan speed value @@ -286,7 +286,7 @@ static int get_backlight_level(void) static void set_backlight_state(bool on) { u8 data = on ? BACKLIGHT_STATE_ON_DATA : BACKLIGHT_STATE_OFF_DATA; - ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0, 0); + ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0); } @@ -294,24 +294,24 @@ static void set_backlight_state(bool on) static void pwm_enable_control(void) { unsigned char writeData = PWM_ENABLE_DATA; - ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0, 0); + ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0); } static void pwm_disable_control(void) { unsigned char writeData = PWM_DISABLE_DATA; - ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0, 0); + ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0); } static void set_pwm(int pwm) { - ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0, 0); + ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0); } static int get_fan_rpm(void) { u8 value, data = FAN_DATA; - ec_transaction(FAN_ADDRESS, &data, 1, &value, 1, 0); + ec_transaction(FAN_ADDRESS, &data, 1, &value, 1); return 100 * (int)value; } diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 7e9bb6df9d3..918a65dd2ab 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -120,7 +120,7 @@ static int set_lcd_level(int level) buf[1] = (u8) (level*31); return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, buf, sizeof(buf), - NULL, 0, 1); + NULL, 0); } static int get_lcd_level(void) @@ -129,7 +129,7 @@ static int get_lcd_level(void) int result; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -142,7 +142,7 @@ static int get_auto_brightness(void) int result; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -157,7 +157,7 @@ static int set_auto_brightness(int enable) wdata[0] = 4; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -165,7 +165,7 @@ static int set_auto_brightness(int enable) wdata[1] = (rdata & 0xF7) | (enable ? 8 : 0); return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 2, - NULL, 0, 1); + NULL, 0); } static ssize_t set_device_state(const char *buf, size_t count, u8 mask) @@ -202,7 +202,7 @@ static int get_wireless_state(int *wlan, int *bluetooth) u8 wdata = 0, rdata; int result; - result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1, 1); + result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1); if (result < 0) return -1; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a2e910e0129..1deb2a73c2d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -150,8 +150,7 @@ extern int ec_read(u8 addr, u8 *val); extern int ec_write(u8 addr, u8 val); extern int ec_transaction(u8 command, const u8 *wdata, unsigned wdata_len, - u8 *rdata, unsigned rdata_len, - int force_poll); + u8 *rdata, unsigned rdata_len); #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) -- cgit v1.2.3-70-g09d2 From e2142df7ec7184ed4a77ada686bc1eb41075490f Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Thu, 31 Mar 2011 11:02:43 -0700 Subject: intel_mid_dma: fix runtime pm issues Use the correct api in probe to enable runtime pm for this driver. Additionally, do not just call legacy suspend for runtime_suspend, as this duplicates some work the pci core does for you. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Vinod Koul --- drivers/dma/intel_mid_dma.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c index 798f46a4590..f153adfcace 100644 --- a/drivers/dma/intel_mid_dma.c +++ b/drivers/dma/intel_mid_dma.c @@ -1292,8 +1292,7 @@ static int __devinit intel_mid_dma_probe(struct pci_dev *pdev, if (err) goto err_dma; - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); pm_runtime_allow(&pdev->dev); return 0; @@ -1322,6 +1321,9 @@ err_enable_device: static void __devexit intel_mid_dma_remove(struct pci_dev *pdev) { struct middma_device *device = pci_get_drvdata(pdev); + + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_forbid(&pdev->dev); middma_shutdown(pdev); pci_dev_put(pdev); kfree(device); @@ -1385,13 +1387,20 @@ int dma_resume(struct pci_dev *pci) static int dma_runtime_suspend(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - return dma_suspend(pci_dev, PMSG_SUSPEND); + struct middma_device *device = pci_get_drvdata(pci_dev); + + device->state = SUSPENDED; + return 0; } static int dma_runtime_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - return dma_resume(pci_dev); + struct middma_device *device = pci_get_drvdata(pci_dev); + + device->state = RUNNING; + iowrite32(REG_BIT0, device->dma_base + DMA_CFG); + return 0; } static int dma_runtime_idle(struct device *dev) -- cgit v1.2.3-70-g09d2 From 364de77831213be20f7f33c39ca1c194593b5c11 Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Sat, 2 Apr 2011 14:20:47 +0800 Subject: drivers, pch_dma: Fix uninitialized var before use In the function pdc_desc_get(), var 'i' is not initialized before use. This patch fixes it. Signed-off-by: Liu Yuan Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index 8d8fef1480a..6eebc6205c6 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -403,7 +403,7 @@ static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan) { struct pch_dma_desc *desc, *_d; struct pch_dma_desc *ret = NULL; - int i; + int i = 0; spin_lock(&pd_chan->lock); list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) { -- cgit v1.2.3-70-g09d2 From 0601f793921157603831d00a9541d92e8f5763f6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 May 2009 17:47:56 -0400 Subject: SUNRPC: requeue tcp socket less frequently Don't requeue the socket in some cases where we know it's unnecessary. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b7d435c3f19..7a3e4bfd895 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -965,7 +965,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) goto err_again; /* record not complete */ } len = svsk->sk_reclen; - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); return len; error: @@ -1115,6 +1114,10 @@ out: /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + /* If we have more data, signal svc_xprt_enqueue() to try again */ + if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); if (serv->sv_stats) -- cgit v1.2.3-70-g09d2 From 5ee78d483c5812228e971e145b912e0a7e35e571 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 May 2009 17:47:56 -0400 Subject: SUNRPC: svc_tcp_recvfrom cleanup Minor cleanup in preparation for later patches. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 7a3e4bfd895..733c2f6a185 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -893,6 +893,7 @@ failed: static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) { struct svc_serv *serv = svsk->sk_xprt.xpt_server; + unsigned int want; int len; if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) @@ -915,9 +916,9 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { - int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; struct kvec iov; + want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; iov.iov_len = want; if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) @@ -1040,8 +1041,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; - int pnum, vlen; struct rpc_rqst *req = NULL; + unsigned int vlen; + int pnum; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), @@ -1072,7 +1074,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) } pnum = 1; - while (vlen < len) { + while (vlen < svsk->sk_reclen - 8) { vec[pnum].iov_base = (req) ? page_address(req->rq_private_buf.pages[pnum - 1]) : page_address(rqstp->rq_pages[pnum]); @@ -1083,29 +1085,23 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, len); + len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen - 8); if (len < 0) goto err_again; - /* - * Account for the 8 bytes we read earlier - */ - len += 8; - if (req) { - xprt_complete_rqst(req->rq_task, len); - len = 0; + xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + rqstp->rq_arg.len = 0; goto out; } - dprintk("svc: TCP complete record (%d bytes)\n", len); - rqstp->rq_arg.len = len; + dprintk("svc: TCP complete record (%d bytes)\n", svsk->sk_reclen); + rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; - if (len <= rqstp->rq_arg.head[0].iov_len) { - rqstp->rq_arg.head[0].iov_len = len; + if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { + rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; rqstp->rq_arg.page_len = 0; - } else { - rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; - } + } else + rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; @@ -1123,7 +1119,7 @@ out: if (serv->sv_stats) serv->sv_stats->nettcpcnt++; - return len; + return rqstp->rq_arg.len; err_again: if (len == -EAGAIN) { -- cgit v1.2.3-70-g09d2 From 48e6555c7b3bf0d92f8167d8b8b8ecf4a3fdab84 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 14 Feb 2011 14:52:03 -0500 Subject: svcrpc: note network-order types in svc_process_calldir Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 733c2f6a185..1955e1a1e39 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -982,9 +982,9 @@ static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, struct rpc_rqst **reqpp, struct kvec *vec) { struct rpc_rqst *req = NULL; - u32 *p; - u32 xid; - u32 calldir; + __be32 *p; + __be32 xid; + __be32 calldir; int len; len = svc_recvfrom(rqstp, vec, 1, 8); -- cgit v1.2.3-70-g09d2 From cc6c2127f2316c2b2ad1e8919b45cde5e03f65aa Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 14 Feb 2011 15:03:35 -0500 Subject: svcrpc: close connection if client sends short packet If the client sents a record too short to contain even the beginning of the rpc header, then just close the connection. The current code drops the record data and continues. I don't see the point. It's a hopeless situation and simpler just to cut off the connection completely. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1955e1a1e39..62ff7c5c09c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -955,6 +955,9 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) } } + if (svsk->sk_reclen < 8) + goto err_delete; /* client is nuts. */ + /* Check whether enough data is available */ len = svc_recv_available(svsk); if (len < 0) @@ -1058,20 +1061,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) vec[0] = rqstp->rq_arg.head[0]; vlen = PAGE_SIZE; - /* - * We have enough data for the whole tcp record. Let's try and read the - * first 8 bytes to get the xid and the call direction. We can use this - * to figure out if this is a call or a reply to a callback. If - * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. - * In that case, don't bother with the calldir and just read the data. - * It will be rejected in svc_process. - */ - if (len >= 8) { - len = svc_process_calldir(svsk, rqstp, &req, vec); - if (len < 0) - goto err_again; - vlen -= 8; - } + len = svc_process_calldir(svsk, rqstp, &req, vec); + if (len < 0) + goto err_again; + vlen -= 8; pnum = 1; while (vlen < svsk->sk_reclen - 8) { -- cgit v1.2.3-70-g09d2 From 586c52cc61b5b84c70102208b78269ef5924bf49 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 May 2009 17:47:56 -0400 Subject: svcrpc: copy cb reply instead of pages It's much simpler just to copy the cb reply data than to play tricks with pages. Callback replies will typically be very small (at least until we implement cb_getattr, in which case files with very long ACLs could pose a problem), so there's no loss in efficiency. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 122 +++++++++++++++++++++++---------------------------- 1 file changed, 56 insertions(+), 66 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 62ff7c5c09c..40b502b1144 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -981,57 +981,58 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) return -EAGAIN; } -static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, - struct rpc_rqst **reqpp, struct kvec *vec) +static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) { + struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; struct rpc_rqst *req = NULL; - __be32 *p; + struct kvec *src, *dst; + __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base; __be32 xid; __be32 calldir; - int len; - - len = svc_recvfrom(rqstp, vec, 1, 8); - if (len < 0) - goto error; - p = (u32 *)rqstp->rq_arg.head[0].iov_base; xid = *p++; calldir = *p; - if (calldir == 0) { - /* REQUEST is the most common case */ - vec[0] = rqstp->rq_arg.head[0]; - } else { - /* REPLY */ - struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; - - if (bc_xprt) - req = xprt_lookup_rqst(bc_xprt, xid); - - if (!req) { - printk(KERN_NOTICE - "%s: Got unrecognized reply: " - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", - __func__, ntohl(calldir), - bc_xprt, xid); - vec[0] = rqstp->rq_arg.head[0]; - goto out; - } + if (bc_xprt) + req = xprt_lookup_rqst(bc_xprt, xid); - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, - sizeof(struct xdr_buf)); - /* copy the xid and call direction */ - memcpy(req->rq_private_buf.head[0].iov_base, - rqstp->rq_arg.head[0].iov_base, 8); - vec[0] = req->rq_private_buf.head[0]; + if (!req) { + printk(KERN_NOTICE + "%s: Got unrecognized reply: " + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", + __func__, ntohl(calldir), + bc_xprt, xid); + return -EAGAIN; } - out: - vec[0].iov_base += 8; - vec[0].iov_len -= 8; - len = svsk->sk_reclen - 8; - error: - *reqpp = req; - return len; + + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); + /* + * XXX!: cheating for now! Only copying HEAD. + * But we know this is good enough for now (in fact, for any + * callback reply in the forseeable future). + */ + dst = &req->rq_private_buf.head[0]; + src = &rqstp->rq_arg.head[0]; + if (dst->iov_len < src->iov_len) + return -EAGAIN; /* whatever; just giving up. */ + memcpy(dst->iov_base, src->iov_base, src->iov_len); + xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + rqstp->rq_arg.len = 0; + return 0; +} + +static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) +{ + int i = 0; + int t = 0; + + while (t < len) { + vec[i].iov_base = page_address(pages[i]); + vec[i].iov_len = PAGE_SIZE; + i++; + t += PAGE_SIZE; + } + return i; } /* @@ -1044,8 +1045,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; - struct rpc_rqst *req = NULL; - unsigned int vlen; + __be32 *p; + __be32 calldir; int pnum; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", @@ -1058,35 +1059,17 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto error; vec = rqstp->rq_vec; - vec[0] = rqstp->rq_arg.head[0]; - vlen = PAGE_SIZE; - len = svc_process_calldir(svsk, rqstp, &req, vec); - if (len < 0) - goto err_again; - vlen -= 8; - - pnum = 1; - while (vlen < svsk->sk_reclen - 8) { - vec[pnum].iov_base = (req) ? - page_address(req->rq_private_buf.pages[pnum - 1]) : - page_address(rqstp->rq_pages[pnum]); - vec[pnum].iov_len = PAGE_SIZE; - pnum++; - vlen += PAGE_SIZE; - } + pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], + svsk->sk_reclen); + rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen - 8); + len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen); if (len < 0) goto err_again; - if (req) { - xprt_complete_rqst(req->rq_task, svsk->sk_reclen); - rqstp->rq_arg.len = 0; - goto out; - } dprintk("svc: TCP complete record (%d bytes)\n", svsk->sk_reclen); rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; @@ -1099,7 +1082,14 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; -out: + p = (__be32 *)rqstp->rq_arg.head[0].iov_base; + calldir = p[1]; + if (calldir) { + len = receive_cb_reply(svsk, rqstp); + if (len < 0) + goto err_again; + } + /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; -- cgit v1.2.3-70-g09d2 From 31d68ef65c7d49def19c1bae4e01b87d66cf5a56 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 24 Feb 2011 11:25:33 -0800 Subject: SUNRPC: Don't wait for full record to receive tcp data Ensure that we immediately read and buffer data from the incoming TCP stream so that we grow the receive window quickly, and don't deadlock on large READ or WRITE requests. Also do some minor exit cleanup. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcsock.h | 1 + net/sunrpc/svcsock.c | 144 ++++++++++++++++++++++++++++++++--------- 2 files changed, 113 insertions(+), 32 deletions(-) diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 04dba23c59f..85c50b40759 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,6 +28,7 @@ struct svc_sock { /* private TCP part */ u32 sk_reclen; /* length of record */ u32 sk_tcplen; /* current read length */ + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 40b502b1144..a4fafcbc6ea 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, return len; } +static int svc_partial_recvfrom(struct svc_rqst *rqstp, + struct kvec *iov, int nr, + int buflen, unsigned int base) +{ + size_t save_iovlen; + void __user *save_iovbase; + unsigned int i; + int ret; + + if (base == 0) + return svc_recvfrom(rqstp, iov, nr, buflen); + + for (i = 0; i < nr; i++) { + if (iov[i].iov_len > base) + break; + base -= iov[i].iov_len; + } + save_iovlen = iov[i].iov_len; + save_iovbase = iov[i].iov_base; + iov[i].iov_len -= base; + iov[i].iov_base += base; + ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen); + iov[i].iov_len = save_iovlen; + iov[i].iov_base = save_iovbase; + return ret; +} + /* * Set socket snd and rcv buffer lengths */ @@ -884,6 +911,56 @@ failed: return NULL; } +static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return 0; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + if (rqstp->rq_pages[i] != NULL) + put_page(rqstp->rq_pages[i]); + BUG_ON(svsk->sk_pages[i] == NULL); + rqstp->rq_pages[i] = svsk->sk_pages[i]; + svsk->sk_pages[i] = NULL; + } + rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]); + return len; +} + +static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + svsk->sk_pages[i] = rqstp->rq_pages[i]; + rqstp->rq_pages[i] = NULL; + } +} + +static void svc_tcp_clear_pages(struct svc_sock *svsk) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + goto out; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + BUG_ON(svsk->sk_pages[i] == NULL); + put_page(svsk->sk_pages[i]); + svsk->sk_pages[i] = NULL; + } +out: + svsk->sk_tcplen = 0; +} + /* * Receive data. * If we haven't gotten the record length yet, get the next four bytes. @@ -928,7 +1005,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (len < want) { dprintk("svc: short recvfrom while reading record " "length (%d of %d)\n", len, want); - goto err_again; /* record header not complete */ + return -EAGAIN; } svsk->sk_reclen = ntohl(svsk->sk_reclen); @@ -958,26 +1035,14 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (svsk->sk_reclen < 8) goto err_delete; /* client is nuts. */ - /* Check whether enough data is available */ - len = svc_recv_available(svsk); - if (len < 0) - goto error; - - if (len < svsk->sk_reclen) { - dprintk("svc: incomplete TCP record (%d of %d)\n", - len, svsk->sk_reclen); - goto err_again; /* record not complete */ - } len = svsk->sk_reclen; return len; - error: - if (len == -EAGAIN) - dprintk("RPC: TCP recv_record got EAGAIN\n"); +error: + dprintk("RPC: TCP recv_record got %d\n", len); return len; - err_delete: +err_delete: set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - err_again: return -EAGAIN; } @@ -1035,6 +1100,7 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) return i; } + /* * Receive data from a TCP socket. */ @@ -1045,6 +1111,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; + unsigned int want, base; __be32 *p; __be32 calldir; int pnum; @@ -1058,6 +1125,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len < 0) goto error; + base = svc_tcp_restore_pages(svsk, rqstp); + want = svsk->sk_reclen - base; + vec = rqstp->rq_vec; pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], @@ -1066,11 +1136,18 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen); - if (len < 0) - goto err_again; + len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); + if (len >= 0) + svsk->sk_tcplen += len; + if (len != want) { + if (len < 0 && len != -EAGAIN) + goto err_other; + svc_tcp_save_pages(svsk, rqstp); + dprintk("svc: incomplete TCP record (%d of %d)\n", + svsk->sk_tcplen, svsk->sk_reclen); + goto err_noclose; + } - dprintk("svc: TCP complete record (%d bytes)\n", svsk->sk_reclen); rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { @@ -1087,7 +1164,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (calldir) { len = receive_cb_reply(svsk, rqstp); if (len < 0) - goto err_again; + goto error; } /* Reset TCP read info */ @@ -1102,20 +1179,20 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (serv->sv_stats) serv->sv_stats->nettcpcnt++; + dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len); return rqstp->rq_arg.len; -err_again: - if (len == -EAGAIN) { - dprintk("RPC: TCP recvfrom got EAGAIN\n"); - return len; - } error: - if (len != -EAGAIN) { - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_xprt.xpt_server->sv_name, -len); - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - } + if (len != -EAGAIN) + goto err_other; + dprintk("RPC: TCP recvfrom got EAGAIN\n"); return -EAGAIN; +err_other: + printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", + svsk->sk_xprt.xpt_server->sv_name, -len); + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_noclose: + return -EAGAIN; /* record not complete */ } /* @@ -1286,6 +1363,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; @@ -1544,8 +1622,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) svc_sock_detach(xprt); - if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) + if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + svc_tcp_clear_pages(svsk); kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); + } } /* -- cgit v1.2.3-70-g09d2 From 9660439861aa8dbd5e2b8087f33e20760c2c9afc Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 21 Oct 2008 14:13:47 -0400 Subject: svcrpc: take advantage of tcp autotuning Allow the NFSv4 server to make use of TCP autotuning behaviour, which was previously disabled by setting the sk_userlocks variable. Set the receive buffers to be big enough to receive the whole RPC request, and set this for the listening socket, not the accept socket. Remove the code that readjusts the receive/send buffer sizes for the accepted socket. Previously this code was used to influence the TCP window management behaviour, which is no longer needed when autotuning is enabled. This can improve IO bandwidth on networks with high bandwidth-delay products, where a large tcp window is required. It also simplifies performance tuning, since getting adequate tcp buffers previously required increasing the number of nfsd threads. Signed-off-by: Olga Kornievskaia Cc: Jim Rees Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a4fafcbc6ea..213dea8b283 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -436,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, lock_sock(sock->sk); sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif @@ -973,23 +972,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) unsigned int want; int len; - if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) - /* sndbuf needs to have room for one request - * per thread, otherwise we can stall even when the - * network isn't a bottleneck. - * - * We count all threads rather than threads in a - * particular pool, which provides an upper bound - * on the number of threads which will access the socket. - * - * rcvbuf just needs to be able to hold a few requests. - * Normally they will be removed from the queue - * as soon a a complete request arrives. - */ - svc_sock_setbufsize(svsk->sk_sock, - (serv->sv_nrthreads+3) * serv->sv_max_mesg, - 3 * serv->sv_max_mesg); - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { @@ -1367,15 +1349,6 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; - /* initialise setting must have enough space to - * receive and respond to one request. - * svc_tcp_recvfrom will re-adjust if necessary - */ - svc_sock_setbufsize(svsk->sk_sock, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); - - set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (sk->sk_state != TCP_ESTABLISHED) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1439,8 +1412,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Initialize the socket */ if (sock->type == SOCK_DGRAM) svc_udp_init(svsk, serv); - else + else { + /* initialise setting must have enough space to + * receive and respond to one request. + */ + svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, + 4 * serv->sv_max_mesg); svc_tcp_init(svsk, serv); + } dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); -- cgit v1.2.3-70-g09d2 From 8985ef0b8af895c3b85a8c1b7108e0169fcbd20b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 9 Apr 2011 10:03:10 -0400 Subject: svcrpc: complete svsk processing on cb receive failure Currently when there's some failure to receive a callback (because we couldn't find a matching xid, for example), we exit svc_recv with sk_tcplen still set but without any pages saved with the socket. This will cause a crash later in svc_tcp_restore_pages. Instead, make sure we reset that tcp information whether the callback received failed or succeeded. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 213dea8b283..af04f779ce9 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1143,11 +1143,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; - if (calldir) { + if (calldir) len = receive_cb_reply(svsk, rqstp); - if (len < 0) - goto error; - } /* Reset TCP read info */ svsk->sk_reclen = 0; @@ -1156,6 +1153,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + if (len < 0) + goto error; svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); if (serv->sv_stats) -- cgit v1.2.3-70-g09d2 From aea93397db4b39c9d15443a0e7cc9a380ba990c6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 10 Apr 2011 10:35:12 -0400 Subject: nfsd: distinguish functions of NFSD_MAY_* flags Most of the NFSD_MAY_* flags actually request permissions, but over the years we've accreted a few that modify the behavior of the permission or open code in other ways. Distinguish the two cases a little more. In particular, allow the shortcut at the start of nfsd_permission to ignore the non-permission-requesting bits. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 2 +- fs/nfsd/vfs.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 2e1cebde90d..a76ef7e0b3d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2027,7 +2027,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, struct inode *inode = dentry->d_inode; int err; - if (acc == NFSD_MAY_NOP) + if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP) return 0; #if 0 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 9a370a5e36b..1036913e6e8 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -17,6 +17,9 @@ #define NFSD_MAY_SATTR 8 #define NFSD_MAY_TRUNC 16 #define NFSD_MAY_LOCK 32 +#define NFSD_MAY_MASK 63 + +/* extra hints to permission and open routines: */ #define NFSD_MAY_OWNER_OVERRIDE 64 #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 -- cgit v1.2.3-70-g09d2 From 204f4ce75434c3453907813f8a819d4cf2a5728f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 8 Apr 2011 16:32:54 -0400 Subject: nfsd4: allow fh_verify caller to skip pseudoflavor checks Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsfh.c | 2 +- fs/nfsd/vfs.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 55c8e63af0b..90c6aa6d5e0 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -344,7 +344,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) * which clients virtually always use auth_sys for, * even while using RPCSEC_GSS for NFS. */ - if (access & NFSD_MAY_LOCK) + if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) goto skip_pseudoflavor_check; /* * Clients may expect to be able to use auth_sys during mount, diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 1036913e6e8..4d2509f766d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -24,6 +24,7 @@ #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 #define NFSD_MAY_NOT_BREAK_LEASE 512 +#define NFSD_MAY_BYPASS_GSS 1024 #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) -- cgit v1.2.3-70-g09d2 From 22b03214962ec2a9748abc9987fc2e66dec4626d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 9 Apr 2011 11:23:24 -0400 Subject: nfsd4: introduce OPDESC helper Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5fcb1396a7e..126b8f75b57 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1031,6 +1031,11 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) return nfs_ok; } +static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) +{ + return &nfsd4_ops[op->opnum]; +} + /* * COMPOUND call. */ @@ -1108,7 +1113,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, goto encode_op; } - opdesc = &nfsd4_ops[op->opnum]; + opdesc = OPDESC(op); if (!cstate->current_fh.fh_dentry) { if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { -- cgit v1.2.3-70-g09d2 From 29a78a3ed7fc9c4ee49962751eb321b038c190a2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 9 Apr 2011 11:28:53 -0400 Subject: nfsd4: make fh_verify responsibility of nfsd_lookup_dentry caller The secinfo caller actually won't want this. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 +++ fs/nfsd/vfs.c | 9 +++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 126b8f75b57..8059adae013 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -762,6 +762,9 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 err; fh_init(&resfh, NFS4_FHSIZE); + err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, secinfo->si_name, secinfo->si_namelen, &exp, &dentry); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a76ef7e0b3d..e53313972c3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -181,16 +181,10 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_export *exp; struct dentry *dparent; struct dentry *dentry; - __be32 err; int host_err; dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); - /* Obtain dentry and export. */ - err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); - if (err) - return err; - dparent = fhp->fh_dentry; exp = fhp->fh_export; exp_get(exp); @@ -254,6 +248,9 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, struct dentry *dentry; __be32 err; + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); if (err) return err; -- cgit v1.2.3-70-g09d2 From af986d101d141f10231ffa7e40ae397dc7356857 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Sun, 24 Apr 2011 22:09:32 +0100 Subject: ACPI: EC: add another DMI check for ASUS hardware Commit 0adf3c746a73684b3f8c2821a584e1db998f61e9 introduced a regression by making the ECDT validation test for ASUS hardware more restrictive. The previous test used the dmi_name_in_vendors function which searches a number of DMI fields, while the new test checked only the BIOS vendor, which is known to not match on an ASUS F5GL laptop which requires ECDT validation. Add a rule to ec_dmi_table based on an alternative DMI pattern for ASUS hardware as found elsewhere in the kernel. Signed-off-by: Peter Collingbourne Signed-off-by: Len Brown --- drivers/acpi/ec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index fa848c4116a..30ca71791bf 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -940,6 +940,9 @@ static struct dmi_system_id __initdata ec_dmi_table[] = { { ec_validate_ecdt, "ASUS hardware", { DMI_MATCH(DMI_BIOS_VENDOR, "ASUS") }, NULL}, + { + ec_validate_ecdt, "ASUS hardware", { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc.") }, NULL}, {}, }; -- cgit v1.2.3-70-g09d2 From 68d93184352f2e723f135b0a9bad93b58f9d120b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 8 Apr 2011 17:00:50 -0400 Subject: nfsd4: fix wrongsec handling for PUTFH + op cases When PUTFH is followed by an operation that uses the filehandle, and when the current client is using a security flavor that is inconsistent with the given filehandle, we have a choice: we can return WRONGSEC either when the current filehandle is set using the PUTFH, or when the filehandle is first used by the following operation. Follow the recommendations of RFC 5661 in making this choice. (Our current behavior prevented the client from doing security negotiation by returning WRONGSEC on PUTFH+SECINFO_NO_NAME.) Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 6 ------ fs/nfsd/nfs4proc.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index ad000aeb21a..b9566e46219 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1354,12 +1354,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) if (IS_ERR(exp)) return nfserrno(PTR_ERR(exp)); rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); - if (rv) - goto out; - rv = check_nfsd_access(exp, rqstp); - if (rv) - fh_put(fhp); -out: exp_put(exp); return rv; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8059adae013..ad32568a1aa 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -403,7 +403,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen); - return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); + return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); } static __be32 @@ -989,6 +989,9 @@ enum nfsd4_op_flags { ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ + /* For rfc 5661 section 2.6.3.1.1: */ + OP_HANDLES_WRONGSEC = 1 << 3, + OP_IS_PUTFH_LIKE = 1 << 4, }; struct nfsd4_operation { @@ -1039,6 +1042,39 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) return &nfsd4_ops[op->opnum]; } +static bool need_wrongsec_check(struct svc_rqst *rqstp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + struct nfsd4_op *this = &argp->ops[resp->opcnt - 1]; + struct nfsd4_op *next = &argp->ops[resp->opcnt]; + struct nfsd4_operation *thisd; + struct nfsd4_operation *nextd; + + thisd = OPDESC(this); + /* + * Most ops check wronsec on our own; only the putfh-like ops + * have special rules. + */ + if (!(thisd->op_flags & OP_IS_PUTFH_LIKE)) + return false; + /* + * rfc 5661 2.6.3.1.1.6: don't bother erroring out a + * put-filehandle operation if we're not going to use the + * result: + */ + if (argp->opcnt == resp->opcnt) + return false; + + nextd = OPDESC(next); + /* + * Rest of 2.6.3.1.1: certain operations will return WRONGSEC + * errors themselves as necessary; others should check for them + * now: + */ + return !(nextd->op_flags & OP_HANDLES_WRONGSEC); +} + /* * COMPOUND call. */ @@ -1134,6 +1170,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, else BUG_ON(op->status == nfs_ok); + if (!op->status && need_wrongsec_check(rqstp)) + op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); + encode_op: /* Only from SEQUENCE */ if (resp->cstate.status == nfserr_replay_cache) { @@ -1225,10 +1264,12 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_LOOKUP] = { .op_func = (nfsd4op_func)nfsd4_lookup, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_LOOKUP", }, [OP_LOOKUPP] = { .op_func = (nfsd4op_func)nfsd4_lookupp, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_LOOKUPP", }, [OP_NVERIFY] = { @@ -1237,6 +1278,7 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_OPEN] = { .op_func = (nfsd4op_func)nfsd4_open, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_OPEN", }, [OP_OPEN_CONFIRM] = { @@ -1249,17 +1291,20 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_PUTFH", }, [OP_PUTPUBFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_PUTPUBFH", }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_PUTROOTFH", }, [OP_READ] = { @@ -1289,15 +1334,18 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_RESTOREFH] = { .op_func = (nfsd4op_func)nfsd4_restorefh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_RESTOREFH", }, [OP_SAVEFH] = { .op_func = (nfsd4op_func)nfsd4_savefh, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SAVEFH", }, [OP_SECINFO] = { .op_func = (nfsd4op_func)nfsd4_secinfo, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO", }, [OP_SETATTR] = { @@ -1361,6 +1409,7 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO_NO_NAME] = { .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", }, }; -- cgit v1.2.3-70-g09d2 From ac6721a13e5b1a90728e790600f827a5e5f5da2f Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Wed, 20 Apr 2011 17:06:25 +0800 Subject: nfsd41: make sure nfs server process OPEN with EXCLUSIVE4_1 correctly The NFS server uses nfsd_create_v3 to handle EXCLUSIVE4_1 opens, but that function is not prepared to handle them. Rename nfsd_create_v3() to do_nfsd_create(), and add handling of EXCLUSIVE4_1. Signed-off-by: Mi Jinlong Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 2 +- fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/vfs.c | 20 ++++++++++++++++---- fs/nfsd/vfs.h | 2 +- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 2247fc91d5e..9095f3c21df 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -245,7 +245,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, } /* Now create the file and set attributes */ - nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len, + nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, attr, newfhp, argp->createmode, argp->verf, NULL, NULL); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ad32568a1aa..3a6dbd70b34 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -196,9 +196,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o /* * Note: create modes (UNCHECKED,GUARDED...) are the same - * in NFSv4 as in v3. + * in NFSv4 as in v3 except EXCLUSIVE4_1. */ - status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data, + status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &open->op_iattr, &resfh, open->op_createmode, (u32 *)open->op_verf.data, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e53313972c3..389e45ad200 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1337,11 +1337,18 @@ out_nfserr: } #ifdef CONFIG_NFSD_V3 + +static inline int nfsd_create_is_exclusive(int createmode) +{ + return createmode == NFS3_CREATE_EXCLUSIVE + || createmode == NFS4_CREATE_EXCLUSIVE4_1; +} + /* - * NFSv3 version of nfsd_create + * NFSv3 and NFSv4 version of nfsd_create */ __be32 -nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, +do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, struct iattr *iap, struct svc_fh *resfhp, int createmode, u32 *verifier, int *truncp, int *created) @@ -1386,7 +1393,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) goto out; - if (createmode == NFS3_CREATE_EXCLUSIVE) { + if (nfsd_create_is_exclusive(createmode)) { /* solaris7 gets confused (bugid 4218508) if these have * the high bit set, so just clear the high bits. If this is * ever changed to use different attrs for storing the @@ -1427,6 +1434,11 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, && dchild->d_inode->i_atime.tv_sec == v_atime && dchild->d_inode->i_size == 0 ) break; + case NFS4_CREATE_EXCLUSIVE4_1: + if ( dchild->d_inode->i_mtime.tv_sec == v_mtime + && dchild->d_inode->i_atime.tv_sec == v_atime + && dchild->d_inode->i_size == 0 ) + goto set_attr; /* fallthru */ case NFS3_CREATE_GUARDED: err = nfserr_exist; @@ -1445,7 +1457,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_check_ignore_resizing(iap); - if (createmode == NFS3_CREATE_EXCLUSIVE) { + if (nfsd_create_is_exclusive(createmode)) { /* Cram the verifier into atime/mtime */ iap->ia_valid = ATTR_MTIME|ATTR_ATIME | ATTR_MTIME_SET|ATTR_ATIME_SET; diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 4d2509f766d..e0bbac04d1d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -58,7 +58,7 @@ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, int type, dev_t rdev, struct svc_fh *res); #ifdef CONFIG_NFSD_V3 __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); -__be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *, +__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, struct svc_fh *res, int createmode, u32 *verifier, int *truncp, int *created); -- cgit v1.2.3-70-g09d2 From a62573dc353b255dbbc8520bfdcb1c8a8c1ada87 Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Wed, 23 Mar 2011 17:57:07 +0800 Subject: nfsd41: add flag checking for create_session Teach the NFS server to reject invalid create_session flags. Also do some minor formatting adjustments. Signed-off-by: Mi Jinlong Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 +++ include/linux/nfs4.h | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fbde6f79922..6dbaa379c86 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1515,6 +1515,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, bool confirm_me = false; int status = 0; + if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) + return nfserr_inval; + nfs4_lock_state(); unconf = find_unconfirmed_client(&cr_ses->clientid); conf = find_confirmed_client(&cr_ses->clientid); diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index b528f6d4b86..8937727e703 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -570,9 +570,11 @@ struct nfs4_sessionid { }; /* Create Session Flags */ -#define SESSION4_PERSIST 0x001 -#define SESSION4_BACK_CHAN 0x002 -#define SESSION4_RDMA 0x004 +#define SESSION4_PERSIST 0x001 +#define SESSION4_BACK_CHAN 0x002 +#define SESSION4_RDMA 0x004 + +#define SESSION4_FLAG_MASK_A 0x007 enum state_protect_how4 { SP4_NONE = 0, -- cgit v1.2.3-70-g09d2 From b7c66360dc34e64742edabf4dc410070aa883119 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 22 Apr 2011 12:45:59 -0400 Subject: nfsd v4.1 lOCKT clientid field must be ignored RFC 5661 Section 18.11.3 The clientid field of the owner MAY be set to any value by the client and MUST be ignored by the server. The reason the server MUST ignore the clientid field is that the server MUST derive the client ID from the session ID from the SEQUENCE operation of the COMPOUND request. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c6766af00d9..6e7f10a8935 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -588,8 +588,6 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) READ_BUF(lockt->lt_owner.len); READMEM(lockt->lt_owner.data, lockt->lt_owner.len); - if (argp->minorversion && !zero_clientid(&lockt->lt_clientid)) - return nfserr_inval; DECODE_TAIL; } -- cgit v1.2.3-70-g09d2 From 868b89c3dc25eec03985b31ff070e8f73c818980 Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Wed, 27 Apr 2011 09:09:58 +0800 Subject: nfsd41: compare request's opcnt with session's maxops at nfsd4_sequence Make sure nfs server errors out if request contains more ops than channel allows. Signed-off-by: Mi Jinlong [bfields@redhat.com: use helper function] Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6dbaa379c86..3196dc38857 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1724,6 +1724,13 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi return; } +static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) +{ + struct nfsd4_compoundargs *args = rqstp->rq_argp; + + return args->opcnt > session->se_fchannel.maxops; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1752,6 +1759,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (!session) goto out; + status = nfserr_too_many_ops; + if (nfsd4_session_too_many_ops(rqstp, session)) + goto out; + status = nfserr_badslot; if (seq->slotid >= session->se_fchannel.maxreqs) goto out; -- cgit v1.2.3-70-g09d2 From bcecf1ccc336200ee488e8eb68acdafc4b0dbd1a Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Wed, 27 Apr 2011 09:14:30 +0800 Subject: nfsd41: error out on repeated RECLAIM_COMPLETE Servers are supposed to return nfserr_complete_already to clients that attempt to send multiple RECLAIM_COMPLETEs. Signed-off-by: Mi Jinlong Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3196dc38857..2bb03f86a03 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1818,6 +1818,8 @@ out: __be32 nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) { + int status = 0; + if (rc->rca_one_fs) { if (!cstate->current_fh.fh_dentry) return nfserr_nofilehandle; @@ -1827,9 +1829,14 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta */ return nfs_ok; } + nfs4_lock_state(); - if (is_client_expired(cstate->session->se_client)) { - nfs4_unlock_state(); + status = nfserr_complete_already; + if (cstate->session->se_client->cl_firststate) + goto out; + + status = nfserr_stale_clientid; + if (is_client_expired(cstate->session->se_client)) /* * The following error isn't really legal. * But we only get here if the client just explicitly @@ -1837,11 +1844,13 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta * error it gets back on an operation for the dead * client. */ - return nfserr_stale_clientid; - } + goto out; + + status = nfs_ok; nfsd4_create_clid_dir(cstate->session->se_client); +out: nfs4_unlock_state(); - return nfs_ok; + return status; } __be32 -- cgit v1.2.3-70-g09d2 From fccb13c947de83a368e1f3c2216bbf4d8d41efa1 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 27 Apr 2011 15:47:14 -0400 Subject: NFSD: Remove setting unused variable in nfsd_vfs_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling gave me this warning: fs/nfsd/vfs.c: In function ‘nfsd_vfs_read’: fs/nfsd/vfs.c:880:16: warning: variable ‘inode’ set but not used [-Wunused-but-set-variable] I discovered that a local variable "inode" was being set towards the beginning of nfsd_vfs_read() and then ignored for the rest of the function. Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 389e45ad200..f4e056ae53e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -874,13 +874,11 @@ static __be32 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { - struct inode *inode; mm_segment_t oldfs; __be32 err; int host_err; err = nfserr_perm; - inode = file->f_path.dentry->d_inode; if (file->f_op->splice_read && rqstp->rq_splice_ok) { struct splice_desc sd = { -- cgit v1.2.3-70-g09d2 From 1db2b9dde3317e181f76860410cb0e7433896f28 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 27 Apr 2011 15:47:15 -0400 Subject: NFSD: Check status from nfsd4_map_bcts_dir() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling gave me this warning: fs/nfsd/nfs4state.c: In function ‘nfsd4_bind_conn_to_session’: fs/nfsd/nfs4state.c:1623:9: warning: variable ‘status’ set but not used [-Wunused-but-set-variable] The local variable "status" was being set by nfsd4_map_bcts_dir() and then ignored before calling nfsd4_new_conn(). Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2bb03f86a03..a2ea14f40b4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1636,8 +1636,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, return nfserr_badsession; status = nfsd4_map_bcts_dir(&bcts->dir); - nfsd4_new_conn(rqstp, cstate->session, bcts->dir); - return nfs_ok; + if (!status) + nfsd4_new_conn(rqstp, cstate->session, bcts->dir); + return status; } static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) -- cgit v1.2.3-70-g09d2 From 6ce2357f1e73e0da8ebeace6ec829f48a646bb8c Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 27 Apr 2011 15:47:16 -0400 Subject: NFSD: Remove unused variable from nfsd4_decode_bind_conn_to_session() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling gave me this warning: fs/nfsd/nfs4xdr.c: In function ‘nfsd4_decode_bind_conn_to_session’: fs/nfsd/nfs4xdr.c:427:6: warning: variable ‘dummy’ set but not used [-Wunused-but-set-variable] The local variable "dummy" wasn't being used past the READ32() macro that set it. READ_BUF() should ensure that the xdr buffer is pushed past the data read into dummy already, so nothing needs to be read in. Signed-off-by: Bryan Schumaker [bfields@redhat.com: minor comment fixup.] Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6e7f10a8935..195a91d9405 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -424,15 +424,12 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) { DECODE_HEAD; - u32 dummy; READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); READ32(bcts->dir); - /* XXX: Perhaps Tom Tucker could help us figure out how we - * should be using ctsa_use_conn_in_rdma_mode: */ - READ32(dummy); - + /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker + * could help us figure out we should be using it. */ DECODE_TAIL; } -- cgit v1.2.3-70-g09d2 From 9b3aa589eaa1366200062ce1f9cc7ddca8d1d578 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Sat, 30 Apr 2011 16:57:45 +0200 Subject: dmaengine: at_hdmac: modify way to use interrupts Now we use Buffer Transfer Completed interrupts. If we want a chained buffer completed information, we setup the ATC_IEN bit in CTRLB register in the lli. This is done by set_desc_eol() function and used by memcpy/slave_sg functions. Signed-off-by: Nicolas Ferre Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 4 ++-- drivers/dma/at_hdmac_regs.h | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 3d7d705f026..13050e646f8 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -464,7 +464,7 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) for (i = 0; i < atdma->dma_common.chancnt; i++) { atchan = &atdma->chan[i]; - if (pending & (AT_DMA_CBTC(i) | AT_DMA_ERR(i))) { + if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) { if (pending & AT_DMA_ERR(i)) { /* Disable channel on AHB error */ dma_writel(atdma, CHDR, atchan->mask); @@ -549,7 +549,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, } ctrla = ATC_DEFAULT_CTRLA; - ctrlb = ATC_DEFAULT_CTRLB + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | ATC_SRC_ADDR_MODE_INCR | ATC_DST_ADDR_MODE_INCR | ATC_FC_MEM2MEM; diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 495457e3dc4..8303306ea82 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -309,8 +309,8 @@ static void atc_setup_irq(struct at_dma_chan *atchan, int on) struct at_dma *atdma = to_at_dma(atchan->chan_common.device); u32 ebci; - /* enable interrupts on buffer chain completion & error */ - ebci = AT_DMA_CBTC(atchan->chan_common.chan_id) + /* enable interrupts on buffer transfer completion & error */ + ebci = AT_DMA_BTC(atchan->chan_common.chan_id) | AT_DMA_ERR(atchan->chan_common.chan_id); if (on) dma_writel(atdma, EBCIER, ebci); @@ -347,7 +347,12 @@ static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) */ static void set_desc_eol(struct at_desc *desc) { - desc->lli.ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; + u32 ctrlb = desc->lli.ctrlb; + + ctrlb &= ~ATC_IEN; + ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; + + desc->lli.ctrlb = ctrlb; desc->lli.dscr = 0; } -- cgit v1.2.3-70-g09d2 From 53830cc75974a199b6b654c062ff8c54c58caa0b Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Sat, 30 Apr 2011 16:57:46 +0200 Subject: dmaengine: at_hdmac: add cyclic DMA operation support Signed-off-by: Nicolas Ferre Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 231 +++++++++++++++++++++++++++++++++++++++++--- drivers/dma/at_hdmac_regs.h | 14 ++- 2 files changed, 229 insertions(+), 16 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 13050e646f8..ed9d92429de 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -164,6 +164,29 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) } } +/** + * atc_desc_chain - build chain adding a descripor + * @first: address of first descripor of the chain + * @prev: address of previous descripor of the chain + * @desc: descriptor to queue + * + * Called from prep_* functions + */ +static void atc_desc_chain(struct at_desc **first, struct at_desc **prev, + struct at_desc *desc) +{ + if (!(*first)) { + *first = desc; + } else { + /* inform the HW lli about chaining */ + (*prev)->lli.dscr = desc->txd.phys; + /* insert the link descriptor to the LD ring */ + list_add_tail(&desc->desc_node, + &(*first)->tx_list); + } + *prev = desc; +} + /** * atc_assign_cookie - compute and assign new cookie * @atchan: channel we work on @@ -237,16 +260,12 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) static void atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) { - dma_async_tx_callback callback; - void *param; struct dma_async_tx_descriptor *txd = &desc->txd; dev_vdbg(chan2dev(&atchan->chan_common), "descriptor %u complete\n", txd->cookie); atchan->completed_cookie = txd->cookie; - callback = txd->callback; - param = txd->callback_param; /* move children to free_list */ list_splice_init(&desc->tx_list, &atchan->free_list); @@ -278,12 +297,19 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) } } - /* - * The API requires that no submissions are done from a - * callback, so we don't need to drop the lock here - */ - if (callback) - callback(param); + /* for cyclic transfers, + * no need to replay callback function while stopping */ + if (!test_bit(ATC_IS_CYCLIC, &atchan->status)) { + dma_async_tx_callback callback = txd->callback; + void *param = txd->callback_param; + + /* + * The API requires that no submissions are done from a + * callback, so we don't need to drop the lock here + */ + if (callback) + callback(param); + } dma_run_dependencies(txd); } @@ -419,6 +445,26 @@ static void atc_handle_error(struct at_dma_chan *atchan) atc_chain_complete(atchan, bad_desc); } +/** + * atc_handle_cyclic - at the end of a period, run callback function + * @atchan: channel used for cyclic operations + * + * Called with atchan->lock held and bh disabled + */ +static void atc_handle_cyclic(struct at_dma_chan *atchan) +{ + struct at_desc *first = atc_first_active(atchan); + struct dma_async_tx_descriptor *txd = &first->txd; + dma_async_tx_callback callback = txd->callback; + void *param = txd->callback_param; + + dev_vdbg(chan2dev(&atchan->chan_common), + "new cyclic period llp 0x%08x\n", + channel_readl(atchan, DSCR)); + + if (callback) + callback(param); +} /*-- IRQ & Tasklet ---------------------------------------------------*/ @@ -434,8 +480,10 @@ static void atc_tasklet(unsigned long data) } spin_lock(&atchan->lock); - if (test_and_clear_bit(0, &atchan->error_status)) + if (test_and_clear_bit(ATC_IS_ERROR, &atchan->status)) atc_handle_error(atchan); + else if (test_bit(ATC_IS_CYCLIC, &atchan->status)) + atc_handle_cyclic(atchan); else atc_advance_work(atchan); @@ -469,7 +517,7 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) /* Disable channel on AHB error */ dma_writel(atdma, CHDR, atchan->mask); /* Give information to tasklet */ - set_bit(0, &atchan->error_status); + set_bit(ATC_IS_ERROR, &atchan->status); } tasklet_schedule(&atchan->tasklet); ret = IRQ_HANDLED; @@ -759,6 +807,148 @@ err_desc_get: return NULL; } +/** + * atc_dma_cyclic_check_values + * Check for too big/unaligned periods and unaligned DMA buffer + */ +static int +atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr, + size_t period_len, enum dma_data_direction direction) +{ + if (period_len > (ATC_BTSIZE_MAX << reg_width)) + goto err_out; + if (unlikely(period_len & ((1 << reg_width) - 1))) + goto err_out; + if (unlikely(buf_addr & ((1 << reg_width) - 1))) + goto err_out; + if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE)))) + goto err_out; + + return 0; + +err_out: + return -EINVAL; +} + +/** + * atc_dma_cyclic_fill_desc - Fill one period decriptor + */ +static int +atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc, + unsigned int period_index, dma_addr_t buf_addr, + size_t period_len, enum dma_data_direction direction) +{ + u32 ctrla; + unsigned int reg_width = atslave->reg_width; + + /* prepare common CRTLA value */ + ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla + | ATC_DST_WIDTH(reg_width) + | ATC_SRC_WIDTH(reg_width) + | period_len >> reg_width; + + switch (direction) { + case DMA_TO_DEVICE: + desc->lli.saddr = buf_addr + (period_len * period_index); + desc->lli.daddr = atslave->tx_reg; + desc->lli.ctrla = ctrla; + desc->lli.ctrlb = ATC_DEFAULT_CTRLB + | ATC_DST_ADDR_MODE_FIXED + | ATC_SRC_ADDR_MODE_INCR + | ATC_FC_MEM2PER; + break; + + case DMA_FROM_DEVICE: + desc->lli.saddr = atslave->rx_reg; + desc->lli.daddr = buf_addr + (period_len * period_index); + desc->lli.ctrla = ctrla; + desc->lli.ctrlb = ATC_DEFAULT_CTRLB + | ATC_DST_ADDR_MODE_INCR + | ATC_SRC_ADDR_MODE_FIXED + | ATC_FC_PER2MEM; + break; + + default: + return -EINVAL; + } + + return 0; +} + +/** + * atc_prep_dma_cyclic - prepare the cyclic DMA transfer + * @chan: the DMA channel to prepare + * @buf_addr: physical DMA address where the buffer starts + * @buf_len: total number of bytes for the entire buffer + * @period_len: number of bytes for each period + * @direction: transfer direction, to or from device + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_data_direction direction) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma_slave *atslave = chan->private; + struct at_desc *first = NULL; + struct at_desc *prev = NULL; + unsigned long was_cyclic; + unsigned int periods = buf_len / period_len; + unsigned int i; + + dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n", + direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE", + buf_addr, + periods, buf_len, period_len); + + if (unlikely(!atslave || !buf_len || !period_len)) { + dev_dbg(chan2dev(chan), "prep_dma_cyclic: length is zero!\n"); + return NULL; + } + + was_cyclic = test_and_set_bit(ATC_IS_CYCLIC, &atchan->status); + if (was_cyclic) { + dev_dbg(chan2dev(chan), "prep_dma_cyclic: channel in use!\n"); + return NULL; + } + + /* Check for too big/unaligned periods and unaligned DMA buffer */ + if (atc_dma_cyclic_check_values(atslave->reg_width, buf_addr, + period_len, direction)) + goto err_out; + + /* build cyclic linked list */ + for (i = 0; i < periods; i++) { + struct at_desc *desc; + + desc = atc_desc_get(atchan); + if (!desc) + goto err_desc_get; + + if (atc_dma_cyclic_fill_desc(atslave, desc, i, buf_addr, + period_len, direction)) + goto err_desc_get; + + atc_desc_chain(&first, &prev, desc); + } + + /* lets make a cyclic list */ + prev->lli.dscr = first->txd.phys; + + /* First descriptor of the chain embedds additional information */ + first->txd.cookie = -EBUSY; + first->len = buf_len; + + return &first->txd; + +err_desc_get: + dev_err(chan2dev(chan), "not enough descriptors available\n"); + atc_desc_put(atchan, first); +err_out: + clear_bit(ATC_IS_CYCLIC, &atchan->status); + return NULL; +} + + static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned long arg) { @@ -793,6 +983,9 @@ static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, list_for_each_entry_safe(desc, _desc, &list, desc_node) atc_chain_complete(atchan, desc); + /* if channel dedicated to cyclic operations, free it */ + clear_bit(ATC_IS_CYCLIC, &atchan->status); + spin_unlock_bh(&atchan->lock); return 0; @@ -853,6 +1046,10 @@ static void atc_issue_pending(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "issue_pending\n"); + /* Not needed for cyclic transfers */ + if (test_bit(ATC_IS_CYCLIC, &atchan->status)) + return; + spin_lock_bh(&atchan->lock); if (!atc_chan_is_enabled(atchan)) { atc_advance_work(atchan); @@ -959,6 +1156,7 @@ static void atc_free_chan_resources(struct dma_chan *chan) } list_splice_init(&atchan->free_list, &list); atchan->descs_allocated = 0; + atchan->status = 0; dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); } @@ -1092,10 +1290,15 @@ static int __init at_dma_probe(struct platform_device *pdev) if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; - if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) { + if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg; + + if (dma_has_cap(DMA_CYCLIC, atdma->dma_common.cap_mask)) + atdma->dma_common.device_prep_dma_cyclic = atc_prep_dma_cyclic; + + if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) || + dma_has_cap(DMA_CYCLIC, atdma->dma_common.cap_mask)) atdma->dma_common.device_control = atc_control; - } dma_writel(atdma, EN, AT_DMA_ENABLE); diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 8303306ea82..c79a9e07f7b 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -180,13 +180,23 @@ txd_to_at_desc(struct dma_async_tx_descriptor *txd) /*-- Channels --------------------------------------------------------*/ +/** + * atc_status - information bits stored in channel status flag + * + * Manipulated with atomic operations. + */ +enum atc_status { + ATC_IS_ERROR = 0, + ATC_IS_CYCLIC = 24, +}; + /** * struct at_dma_chan - internal representation of an Atmel HDMAC channel * @chan_common: common dmaengine channel object members * @device: parent device * @ch_regs: memory mapped register base * @mask: channel index in a mask - * @error_status: transmit error status information from irq handler + * @status: transmit status information from irq/prep* functions * to tasklet (use atomic operations) * @tasklet: bottom half to finish transaction work * @lock: serializes enqueue/dequeue operations to descriptors lists @@ -201,7 +211,7 @@ struct at_dma_chan { struct at_dma *device; void __iomem *ch_regs; u8 mask; - unsigned long error_status; + unsigned long status; struct tasklet_struct tasklet; spinlock_t lock; -- cgit v1.2.3-70-g09d2 From cc52a10a048fc1fbe4ffba58c2f0afc79ae0f56f Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Sat, 30 Apr 2011 16:57:47 +0200 Subject: dmaengine: at_hdmac: debug information sg_len for prep_slave_sg Signed-off-by: Nicolas Ferre Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index ed9d92429de..8f50a0fb79e 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -687,7 +687,8 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, struct scatterlist *sg; size_t total_len = 0; - dev_vdbg(chan2dev(chan), "prep_slave_sg: %s f0x%lx\n", + dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n", + sg_len, direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE", flags); -- cgit v1.2.3-70-g09d2 From 2f432823ec6e693d7b934e805ce1838f41d66ce7 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Sat, 30 Apr 2011 16:57:48 +0200 Subject: dmaengine: at_hdmac: remove channel status testing in tasklet There is no need to test if channel is enabled in tasklet: - in error path, channel is disabled in interrupt routine - in normal path, this test is performed in sub functions to report a misuse of the engine. Signed-off-by: Nicolas Ferre Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 8f50a0fb79e..65bd52a84bc 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -472,13 +472,6 @@ static void atc_tasklet(unsigned long data) { struct at_dma_chan *atchan = (struct at_dma_chan *)data; - /* Channel cannot be enabled here */ - if (atc_chan_is_enabled(atchan)) { - dev_err(chan2dev(&atchan->chan_common), - "BUG: channel enabled in tasklet\n"); - return; - } - spin_lock(&atchan->lock); if (test_and_clear_bit(ATC_IS_ERROR, &atchan->status)) atc_handle_error(atchan); -- cgit v1.2.3-70-g09d2 From ae14d4b5e0a4ebc4e674831cbb97b73ba66dba08 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Sat, 30 Apr 2011 16:57:49 +0200 Subject: dmaengine: at_hdmac: specialize AHB interfaces to optimize transfers DMA controller has two AHB interfaces on the SOC internal matrix. It is more efficient to specialize each interface as the access to memory can introduce latencies that are not compatible with peripheral accesses requirements. Signed-off-by: Nicolas Ferre Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 26 +++++++++++++++----------- drivers/dma/at_hdmac_regs.h | 4 ++++ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 65bd52a84bc..f52c9e38d88 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -37,8 +37,8 @@ #define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO) #define ATC_DEFAULT_CTRLA (0) -#define ATC_DEFAULT_CTRLB (ATC_SIF(0) \ - |ATC_DIF(1)) +#define ATC_DEFAULT_CTRLB (ATC_SIF(AT_DMA_MEM_IF) \ + |ATC_DIF(AT_DMA_MEM_IF)) /* * Initial number of descriptors to allocate for each channel. This could @@ -693,14 +693,15 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg_width = atslave->reg_width; ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; - ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; + ctrlb = ATC_IEN; switch (direction) { case DMA_TO_DEVICE: ctrla |= ATC_DST_WIDTH(reg_width); ctrlb |= ATC_DST_ADDR_MODE_FIXED | ATC_SRC_ADDR_MODE_INCR - | ATC_FC_MEM2PER; + | ATC_FC_MEM2PER + | ATC_SIF(AT_DMA_MEM_IF) | ATC_DIF(AT_DMA_PER_IF); reg = atslave->tx_reg; for_each_sg(sgl, sg, sg_len, i) { struct at_desc *desc; @@ -741,7 +742,8 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, ctrla |= ATC_SRC_WIDTH(reg_width); ctrlb |= ATC_DST_ADDR_MODE_INCR | ATC_SRC_ADDR_MODE_FIXED - | ATC_FC_PER2MEM; + | ATC_FC_PER2MEM + | ATC_SIF(AT_DMA_PER_IF) | ATC_DIF(AT_DMA_MEM_IF); reg = atslave->rx_reg; for_each_sg(sgl, sg, sg_len, i) { @@ -846,20 +848,22 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc, desc->lli.saddr = buf_addr + (period_len * period_index); desc->lli.daddr = atslave->tx_reg; desc->lli.ctrla = ctrla; - desc->lli.ctrlb = ATC_DEFAULT_CTRLB - | ATC_DST_ADDR_MODE_FIXED + desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED | ATC_SRC_ADDR_MODE_INCR - | ATC_FC_MEM2PER; + | ATC_FC_MEM2PER + | ATC_SIF(AT_DMA_MEM_IF) + | ATC_DIF(AT_DMA_PER_IF); break; case DMA_FROM_DEVICE: desc->lli.saddr = atslave->rx_reg; desc->lli.daddr = buf_addr + (period_len * period_index); desc->lli.ctrla = ctrla; - desc->lli.ctrlb = ATC_DEFAULT_CTRLB - | ATC_DST_ADDR_MODE_INCR + desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR | ATC_SRC_ADDR_MODE_FIXED - | ATC_FC_PER2MEM; + | ATC_FC_PER2MEM + | ATC_SIF(AT_DMA_PER_IF) + | ATC_DIF(AT_DMA_MEM_IF); break; default: diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index c79a9e07f7b..ae3056df4f4 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -103,6 +103,10 @@ /* Bitfields in CTRLB */ #define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ #define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ + /* Specify AHB interfaces */ +#define AT_DMA_MEM_IF 0 /* interface 0 as memory interface */ +#define AT_DMA_PER_IF 1 /* interface 1 as peripheral interface */ + #define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ #define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ #define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ -- cgit v1.2.3-70-g09d2 From 711b9cea92554be6bd44f04f2485582d762fc441 Mon Sep 17 00:00:00 2001 From: Philippe Langlais Date: Sat, 7 May 2011 17:09:43 +0200 Subject: dmaengine/ste_dma40: fix introduced warnings The compiler nowadays moans about possibly non-assigned variable. Fix this by default-assigning 0. Signed-off-by: Philippe Langlais Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index af955de035f..65d2535d12e 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -1829,7 +1829,7 @@ d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction) { struct stedma40_platform_data *plat = chan->base->plat_data; struct stedma40_chan_cfg *cfg = &chan->dma_cfg; - dma_addr_t addr; + dma_addr_t addr = 0; if (chan->runtime_addr) return chan->runtime_addr; -- cgit v1.2.3-70-g09d2 From 543aabc7d295bfe2489f184259395e3467520d48 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 6 May 2011 19:56:51 +0200 Subject: dmaengine: at_hdmac: set residue as total len in atc_tx_status If transfer status is !=DMA_SUCCESS, return total transfer len as residue, instead of zero. Idea from dw_dmac patch by Viresh Kumar. Signed-off-by: Nicolas Ferre Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index f52c9e38d88..ba0b5ec4e4c 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1026,7 +1026,12 @@ atc_tx_status(struct dma_chan *chan, spin_unlock_bh(&atchan->lock); - dma_set_tx_state(txstate, last_complete, last_used, 0); + if (ret != DMA_SUCCESS) + dma_set_tx_state(txstate, last_complete, last_used, + atc_first_active(atchan)->len); + else + dma_set_tx_state(txstate, last_complete, last_used, 0); + dev_vdbg(chan2dev(chan), "tx_status: %d (d%d, u%d)\n", cookie, last_complete ? last_complete : 0, last_used ? last_used : 0); -- cgit v1.2.3-70-g09d2 From 23b5e3ad68a3c26a6a36039ea907997664aedcab Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 6 May 2011 19:56:52 +0200 Subject: dmaengine: at_hdmac: implement pause and resume in atc_control Pause and resume controls are useful for audio devices. This also returns correct status from atc_tx_status() in case chan is paused. Idea from dw_dmac patch by Linus Walleij. Signed-off-by: Nicolas Ferre Acked-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 97 ++++++++++++++++++++++++++++++++------------- drivers/dma/at_hdmac_regs.h | 1 + 2 files changed, 71 insertions(+), 27 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index ba0b5ec4e4c..5968245e1e6 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -508,7 +508,8 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) { if (pending & AT_DMA_ERR(i)) { /* Disable channel on AHB error */ - dma_writel(atdma, CHDR, atchan->mask); + dma_writel(atdma, CHDR, + AT_DMA_RES(i) | atchan->mask); /* Give information to tasklet */ set_bit(ATC_IS_ERROR, &atchan->status); } @@ -952,39 +953,78 @@ static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - struct at_desc *desc, *_desc; + int chan_id = atchan->chan_common.chan_id; + LIST_HEAD(list); - /* Only supports DMA_TERMINATE_ALL */ - if (cmd != DMA_TERMINATE_ALL) - return -ENXIO; + dev_vdbg(chan2dev(chan), "atc_control (%d)\n", cmd); - /* - * This is only called when something went wrong elsewhere, so - * we don't really care about the data. Just disable the - * channel. We still have to poll the channel enable bit due - * to AHB/HSB limitations. - */ - spin_lock_bh(&atchan->lock); + if (cmd == DMA_PAUSE) { + int pause_timeout = 1000; - dma_writel(atdma, CHDR, atchan->mask); + spin_lock_bh(&atchan->lock); - /* confirm that this channel is disabled */ - while (dma_readl(atdma, CHSR) & atchan->mask) - cpu_relax(); + dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id)); + + /* wait for FIFO to be empty */ + while (!(dma_readl(atdma, CHSR) & AT_DMA_EMPT(chan_id))) { + if (pause_timeout-- > 0) { + /* the FIFO can only drain if the peripheral + * is still requesting data: + * -> timeout if it is not the case. */ + dma_writel(atdma, CHDR, AT_DMA_RES(chan_id)); + spin_unlock_bh(&atchan->lock); + return -ETIMEDOUT; + } + cpu_relax(); + } - /* active_list entries will end up before queued entries */ - list_splice_init(&atchan->queue, &list); - list_splice_init(&atchan->active_list, &list); + set_bit(ATC_IS_PAUSED, &atchan->status); - /* Flush all pending and queued descriptors */ - list_for_each_entry_safe(desc, _desc, &list, desc_node) - atc_chain_complete(atchan, desc); + spin_unlock_bh(&atchan->lock); + } else if (cmd == DMA_RESUME) { + if (!test_bit(ATC_IS_PAUSED, &atchan->status)) + return 0; - /* if channel dedicated to cyclic operations, free it */ - clear_bit(ATC_IS_CYCLIC, &atchan->status); + spin_lock_bh(&atchan->lock); - spin_unlock_bh(&atchan->lock); + dma_writel(atdma, CHDR, AT_DMA_RES(chan_id)); + clear_bit(ATC_IS_PAUSED, &atchan->status); + + spin_unlock_bh(&atchan->lock); + } else if (cmd == DMA_TERMINATE_ALL) { + struct at_desc *desc, *_desc; + /* + * This is only called when something went wrong elsewhere, so + * we don't really care about the data. Just disable the + * channel. We still have to poll the channel enable bit due + * to AHB/HSB limitations. + */ + spin_lock_bh(&atchan->lock); + + /* disabling channel: must also remove suspend state */ + dma_writel(atdma, CHDR, AT_DMA_RES(chan_id) | atchan->mask); + + /* confirm that this channel is disabled */ + while (dma_readl(atdma, CHSR) & atchan->mask) + cpu_relax(); + + /* active_list entries will end up before queued entries */ + list_splice_init(&atchan->queue, &list); + list_splice_init(&atchan->active_list, &list); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + atc_chain_complete(atchan, desc); + + clear_bit(ATC_IS_PAUSED, &atchan->status); + /* if channel dedicated to cyclic operations, free it */ + clear_bit(ATC_IS_CYCLIC, &atchan->status); + + spin_unlock_bh(&atchan->lock); + } else { + return -ENXIO; + } return 0; } @@ -1032,8 +1072,11 @@ atc_tx_status(struct dma_chan *chan, else dma_set_tx_state(txstate, last_complete, last_used, 0); - dev_vdbg(chan2dev(chan), "tx_status: %d (d%d, u%d)\n", - cookie, last_complete ? last_complete : 0, + if (test_bit(ATC_IS_PAUSED, &atchan->status)) + ret = DMA_PAUSED; + + dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d (d%d, u%d)\n", + ret, cookie, last_complete ? last_complete : 0, last_used ? last_used : 0); return ret; diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index ae3056df4f4..087dbf1dd39 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -191,6 +191,7 @@ txd_to_at_desc(struct dma_async_tx_descriptor *txd) */ enum atc_status { ATC_IS_ERROR = 0, + ATC_IS_PAUSED = 1, ATC_IS_CYCLIC = 24, }; -- cgit v1.2.3-70-g09d2 From e257e1563f28890f54b5f82861373bb4b32dd770 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 6 May 2011 19:56:53 +0200 Subject: dmaengine: at_hdmac: use descriptor chaining help function A little function helps to chain descriptors: it is already used in cyclic dma operations, now use it in memcpy and slave_sg preparation functions. Signed-off-by: Nicolas Ferre Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 5968245e1e6..2c6bc3aa3dd 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -626,16 +626,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, desc->txd.cookie = 0; - if (!first) { - first = desc; - } else { - /* inform the HW lli about chaining */ - prev->lli.dscr = desc->txd.phys; - /* insert the link descriptor to the LD ring */ - list_add_tail(&desc->desc_node, - &first->tx_list); - } - prev = desc; + atc_desc_chain(&first, &prev, desc); } /* First descriptor of the chain embedds additional information */ @@ -726,16 +717,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | len >> mem_width; desc->lli.ctrlb = ctrlb; - if (!first) { - first = desc; - } else { - /* inform the HW lli about chaining */ - prev->lli.dscr = desc->txd.phys; - /* insert the link descriptor to the LD ring */ - list_add_tail(&desc->desc_node, - &first->tx_list); - } - prev = desc; + atc_desc_chain(&first, &prev, desc); total_len += len; } break; @@ -769,16 +751,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | len >> reg_width; desc->lli.ctrlb = ctrlb; - if (!first) { - first = desc; - } else { - /* inform the HW lli about chaining */ - prev->lli.dscr = desc->txd.phys; - /* insert the link descriptor to the LD ring */ - list_add_tail(&desc->desc_node, - &first->tx_list); - } - prev = desc; + atc_desc_chain(&first, &prev, desc); total_len += len; } break; -- cgit v1.2.3-70-g09d2 From c8fcba600c46c5d7667ec230b1d9ce3ce5859f9c Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Mon, 9 May 2011 16:09:35 +0900 Subject: pch_dma: fix dma direction issue for ML7213 IOH video-in Currently, even-channel number is set as tx direction and odd is set as rx. However, though video-in uses ch6, the direction is not tx but rx. This patch sets video-in's DMA direction correctly. Signed-off-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index 6eebc6205c6..d28c47a42f1 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -478,7 +478,6 @@ static int pd_alloc_chan_resources(struct dma_chan *chan) spin_unlock_bh(&pd_chan->lock); pdc_enable_irq(chan, 1); - pdc_set_dir(chan); return pd_chan->descs_allocated; } @@ -561,6 +560,9 @@ static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan, else return NULL; + pd_chan->dir = direction; + pdc_set_dir(chan); + for_each_sg(sgl, sg, sg_len, i) { desc = pdc_desc_get(pd_chan); @@ -850,8 +852,6 @@ static int __devinit pch_dma_probe(struct pci_dev *pdev, pd_chan->membase = ®s->desc[i]; - pd_chan->dir = (i % 2) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - spin_lock_init(&pd_chan->lock); INIT_LIST_HEAD(&pd_chan->active_list); -- cgit v1.2.3-70-g09d2 From 08645fdc7bab4564f7dfd07525da8a1761f8f106 Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Mon, 9 May 2011 16:09:36 +0900 Subject: pch_dma: modify for checkpatch Fix checkpatch warnings. Signed-off-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index d28c47a42f1..afd6fba08a6 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -138,7 +138,8 @@ struct pch_dma { #define dma_writel(pd, name, val) \ writel((val), (pd)->membase + PCH_DMA_##name) -static inline struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd) +static inline +struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd) { return container_of(txd, struct pch_dma_desc, txd); } @@ -163,13 +164,15 @@ static inline struct device *chan2parent(struct dma_chan *chan) return chan->dev->device.parent; } -static inline struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan) +static inline +struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan) { return list_first_entry(&pd_chan->active_list, struct pch_dma_desc, desc_node); } -static inline struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan) +static inline +struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan) { return list_first_entry(&pd_chan->queue, struct pch_dma_desc, desc_node); -- cgit v1.2.3-70-g09d2 From 60092d0bde4c8741198da4a69b693d3709385bf1 Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Mon, 9 May 2011 16:09:37 +0900 Subject: pch_dma: Fix DMA setting issue Currently, Direct-Start mode(*) is enabled. Our IOH's devices must not use this mode. This causes unexpected behavior. This patch deletes Direct-Start setting. (*) This mode is used in order for CPU to generate the DMA request. Signed-off-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index afd6fba08a6..4e466720406 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -254,9 +254,6 @@ static bool pdc_is_idle(struct pch_dma_chan *pd_chan) static void pdc_dostart(struct pch_dma_chan *pd_chan, struct pch_dma_desc* desc) { - struct pch_dma *pd = to_pd(pd_chan->chan.device); - u32 val; - if (!pdc_is_idle(pd_chan)) { dev_err(chan2dev(&pd_chan->chan), "BUG: Attempt to start non-idle channel\n"); @@ -282,10 +279,6 @@ static void pdc_dostart(struct pch_dma_chan *pd_chan, struct pch_dma_desc* desc) channel_writel(pd_chan, NEXT, desc->txd.phys); pdc_set_mode(&pd_chan->chan, DMA_CTL0_SG); } - - val = dma_readl(pd, CTL2); - val |= 1 << (DMA_CTL2_START_SHIFT_BITS + pd_chan->chan.chan_id); - dma_writel(pd, CTL2, val); } static void pdc_chain_complete(struct pch_dma_chan *pd_chan, -- cgit v1.2.3-70-g09d2 From 194f5f2706c7472f9c6bb2d17fa788993606581f Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Mon, 9 May 2011 16:09:38 +0900 Subject: pch_dma: Support I2S for ML7213 IOH Support I2S device for ML7213 IOH Signed-off-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 62 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index 4e466720406..2edcc9c1029 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -77,10 +77,10 @@ struct pch_dma_regs { u32 dma_ctl0; u32 dma_ctl1; u32 dma_ctl2; - u32 reserved1; + u32 dma_ctl3; u32 dma_sts0; u32 dma_sts1; - u32 reserved2; + u32 dma_sts2; u32 reserved3; struct pch_dma_desc_regs desc[MAX_CHAN_NR]; }; @@ -130,6 +130,7 @@ struct pch_dma { #define PCH_DMA_CTL0 0x00 #define PCH_DMA_CTL1 0x04 #define PCH_DMA_CTL2 0x08 +#define PCH_DMA_CTL3 0x0C #define PCH_DMA_STS0 0x10 #define PCH_DMA_STS1 0x14 @@ -202,16 +203,30 @@ static void pdc_set_dir(struct dma_chan *chan) struct pch_dma *pd = to_pd(chan->device); u32 val; - val = dma_readl(pd, CTL0); + if (chan->chan_id < 8) { + val = dma_readl(pd, CTL0); - if (pd_chan->dir == DMA_TO_DEVICE) - val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + - DMA_CTL0_DIR_SHIFT_BITS); - else - val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + - DMA_CTL0_DIR_SHIFT_BITS)); + if (pd_chan->dir == DMA_TO_DEVICE) + val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + + DMA_CTL0_DIR_SHIFT_BITS); + else + val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + + DMA_CTL0_DIR_SHIFT_BITS)); + + dma_writel(pd, CTL0, val); + } else { + int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ + val = dma_readl(pd, CTL3); - dma_writel(pd, CTL0, val); + if (pd_chan->dir == DMA_TO_DEVICE) + val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch + + DMA_CTL0_DIR_SHIFT_BITS); + else + val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch + + DMA_CTL0_DIR_SHIFT_BITS)); + + dma_writel(pd, CTL3, val); + } dev_dbg(chan2dev(chan), "pdc_set_dir: chan %d -> %x\n", chan->chan_id, val); @@ -222,13 +237,26 @@ static void pdc_set_mode(struct dma_chan *chan, u32 mode) struct pch_dma *pd = to_pd(chan->device); u32 val; - val = dma_readl(pd, CTL0); + if (chan->chan_id < 8) { + val = dma_readl(pd, CTL0); - val &= ~(DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * chan->chan_id)); - val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id); + val &= ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id); - dma_writel(pd, CTL0, val); + dma_writel(pd, CTL0, val); + } else { + int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ + + val = dma_readl(pd, CTL3); + + val &= ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch)); + val |= mode << (DMA_CTL0_BITS_PER_CH * ch); + + dma_writel(pd, CTL3, val); + + } dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n", chan->chan_id, val); @@ -701,6 +729,7 @@ static void pch_dma_save_regs(struct pch_dma *pd) pd->regs.dma_ctl0 = dma_readl(pd, CTL0); pd->regs.dma_ctl1 = dma_readl(pd, CTL1); pd->regs.dma_ctl2 = dma_readl(pd, CTL2); + pd->regs.dma_ctl3 = dma_readl(pd, CTL3); list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) { pd_chan = to_pd_chan(chan); @@ -723,6 +752,7 @@ static void pch_dma_restore_regs(struct pch_dma *pd) dma_writel(pd, CTL0, pd->regs.dma_ctl0); dma_writel(pd, CTL1, pd->regs.dma_ctl1); dma_writel(pd, CTL2, pd->regs.dma_ctl2); + dma_writel(pd, CTL3, pd->regs.dma_ctl3); list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) { pd_chan = to_pd_chan(chan); @@ -925,6 +955,7 @@ static void __devexit pch_dma_remove(struct pci_dev *pdev) #define PCI_DEVICE_ID_ML7213_DMA1_8CH 0x8026 #define PCI_DEVICE_ID_ML7213_DMA2_8CH 0x802B #define PCI_DEVICE_ID_ML7213_DMA3_4CH 0x8034 +#define PCI_DEVICE_ID_ML7213_DMA4_12CH 0x8032 static const struct pci_device_id pch_dma_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 }, @@ -932,6 +963,7 @@ static const struct pci_device_id pch_dma_id_table[] = { { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA2_8CH), 8}, /* PCMIF SPI */ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA3_4CH), 4}, /* FPGA */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA4_12CH), 12}, /* I2S */ { 0, }, }; -- cgit v1.2.3-70-g09d2 From c0dfc04ac96847913a791f5459f4ac83a81a4745 Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Mon, 9 May 2011 16:09:39 +0900 Subject: pch_dma: Support new device ML7223 IOH Support new device OKI SEMICONDUCTOR ML7223 IOH(Input/Output Hub). The ML7223 IOH is for MP(Media Phone) use. The ML7223 is companion chip for Intel Atom E6xx series. The ML7223 is completely compatible for Intel EG20T PCH. Signed-off-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 12 +++++++----- drivers/dma/pch_dma.c | 8 ++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index a572600e44e..25cf327cd1c 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -200,16 +200,18 @@ config PL330_DMA platform_data for a dma-pl330 device. config PCH_DMA - tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7213 IOH DMA support" + tristate "Intel EG20T PCH / OKI Semi IOH(ML7213/ML7223) DMA support" depends on PCI && X86 select DMA_ENGINE help Enable support for Intel EG20T PCH DMA engine. - This driver also can be used for OKI SEMICONDUCTOR ML7213 IOH(Input/ - Output Hub) which is for IVI(In-Vehicle Infotainment) use. - ML7213 is companion chip for Intel Atom E6xx series. - ML7213 is completely compatible for Intel EG20T PCH. + This driver also can be used for OKI SEMICONDUCTOR IOH(Input/ + Output Hub), ML7213 and ML7223. + ML7213 IOH is for IVI(In-Vehicle Infotainment) use and ML7223 IOH is + for MP(Media Phone) use. + ML7213/ML7223 is companion chip for Intel Atom E6xx series. + ML7213/ML7223 is completely compatible for Intel EG20T PCH. config IMX_SDMA tristate "i.MX SDMA support" diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index 2edcc9c1029..083e698da53 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -956,6 +956,10 @@ static void __devexit pch_dma_remove(struct pci_dev *pdev) #define PCI_DEVICE_ID_ML7213_DMA2_8CH 0x802B #define PCI_DEVICE_ID_ML7213_DMA3_4CH 0x8034 #define PCI_DEVICE_ID_ML7213_DMA4_12CH 0x8032 +#define PCI_DEVICE_ID_ML7223_DMA1_4CH 0x800B +#define PCI_DEVICE_ID_ML7223_DMA2_4CH 0x800E +#define PCI_DEVICE_ID_ML7223_DMA3_4CH 0x8017 +#define PCI_DEVICE_ID_ML7223_DMA4_4CH 0x803B static const struct pci_device_id pch_dma_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 }, @@ -964,6 +968,10 @@ static const struct pci_device_id pch_dma_id_table[] = { { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA2_8CH), 8}, /* PCMIF SPI */ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA3_4CH), 4}, /* FPGA */ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA4_12CH), 12}, /* I2S */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA1_4CH), 4}, /* UART */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA2_4CH), 4}, /* Video SPI */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA3_4CH), 4}, /* Security */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA4_4CH), 4}, /* FPGA */ { 0, }, }; -- cgit v1.2.3-70-g09d2 From eb8590b504caacb029dea4540e0b0dcc98da4381 Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Mon, 9 May 2011 16:09:40 +0900 Subject: pch_dma: modify pci device table definition Signed-off-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index 083e698da53..ff5b38f9d45 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -961,7 +961,7 @@ static void __devexit pch_dma_remove(struct pci_dev *pdev) #define PCI_DEVICE_ID_ML7223_DMA3_4CH 0x8017 #define PCI_DEVICE_ID_ML7223_DMA4_4CH 0x803B -static const struct pci_device_id pch_dma_id_table[] = { +DEFINE_PCI_DEVICE_TABLE(pch_dma_id_table) = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 }, { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */ -- cgit v1.2.3-70-g09d2 From 0f6896f195df014064014493c8287a8e2d18f938 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 13 Apr 2011 11:33:17 +0800 Subject: ACPICA: Split all internal Global Lock functions to new file - evglock These functions were moved from evmisc.c Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/Makefile | 2 +- drivers/acpi/acpica/acevents.h | 17 ++- drivers/acpi/acpica/evglock.c | 335 +++++++++++++++++++++++++++++++++++++++++ drivers/acpi/acpica/evmisc.c | 309 ------------------------------------- 4 files changed, 347 insertions(+), 316 deletions(-) create mode 100644 drivers/acpi/acpica/evglock.c diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile index a1224712fd0..301bd2d388a 100644 --- a/drivers/acpi/acpica/Makefile +++ b/drivers/acpi/acpica/Makefile @@ -14,7 +14,7 @@ acpi-y := dsfield.o dsmthdat.o dsopcode.o dswexec.o dswscope.o \ acpi-y += evevent.o evregion.o evsci.o evxfevnt.o \ evmisc.o evrgnini.o evxface.o evxfregn.o \ - evgpe.o evgpeblk.o evgpeinit.o evgpeutil.o evxfgpe.o + evgpe.o evgpeblk.o evgpeinit.o evgpeutil.o evxfgpe.o evglock.o acpi-y += exconfig.o exfield.o exnames.o exoparg6.o exresolv.o exstorob.o\ exconvrt.o exfldio.o exoparg1.o exprep.o exresop.o exsystem.o\ diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index 41d247daf46..bea3b489918 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -58,18 +58,23 @@ u32 acpi_ev_fixed_event_detect(void); */ u8 acpi_ev_is_notify_object(struct acpi_namespace_node *node); -acpi_status acpi_ev_acquire_global_lock(u16 timeout); - -acpi_status acpi_ev_release_global_lock(void); - -acpi_status acpi_ev_init_global_lock_handler(void); - u32 acpi_ev_get_gpe_number_index(u32 gpe_number); acpi_status acpi_ev_queue_notify_request(struct acpi_namespace_node *node, u32 notify_value); +/* + * evglock - Global Lock support + */ +acpi_status acpi_ev_init_global_lock_handler(void); + +acpi_status acpi_ev_acquire_global_lock(u16 timeout); + +acpi_status acpi_ev_release_global_lock(void); + +acpi_status acpi_ev_remove_global_lock_handler(void); + /* * evgpe - Low-level GPE support */ diff --git a/drivers/acpi/acpica/evglock.c b/drivers/acpi/acpica/evglock.c new file mode 100644 index 00000000000..56a562a1e5d --- /dev/null +++ b/drivers/acpi/acpica/evglock.c @@ -0,0 +1,335 @@ +/****************************************************************************** + * + * Module Name: evglock - Global Lock support + * + *****************************************************************************/ + +/* + * Copyright (C) 2000 - 2011, Intel Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + */ + +#include +#include "accommon.h" +#include "acevents.h" +#include "acinterp.h" + +#define _COMPONENT ACPI_EVENTS +ACPI_MODULE_NAME("evglock") + +/* Local prototypes */ +static u32 acpi_ev_global_lock_handler(void *context); + +/******************************************************************************* + * + * FUNCTION: acpi_ev_init_global_lock_handler + * + * PARAMETERS: None + * + * RETURN: Status + * + * DESCRIPTION: Install a handler for the global lock release event + * + ******************************************************************************/ + +acpi_status acpi_ev_init_global_lock_handler(void) +{ + acpi_status status; + + ACPI_FUNCTION_TRACE(ev_init_global_lock_handler); + + /* Attempt installation of the global lock handler */ + + status = acpi_install_fixed_event_handler(ACPI_EVENT_GLOBAL, + acpi_ev_global_lock_handler, + NULL); + + /* + * If the global lock does not exist on this platform, the attempt to + * enable GBL_STATUS will fail (the GBL_ENABLE bit will not stick). + * Map to AE_OK, but mark global lock as not present. Any attempt to + * actually use the global lock will be flagged with an error. + */ + acpi_gbl_global_lock_present = FALSE; + if (status == AE_NO_HARDWARE_RESPONSE) { + ACPI_ERROR((AE_INFO, + "No response from Global Lock hardware, disabling lock")); + + return_ACPI_STATUS(AE_OK); + } + + status = acpi_os_create_lock(&acpi_gbl_global_lock_pending_lock); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + acpi_gbl_global_lock_pending = FALSE; + acpi_gbl_global_lock_present = TRUE; + return_ACPI_STATUS(status); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ev_remove_global_lock_handler + * + * PARAMETERS: None + * + * RETURN: Status + * + * DESCRIPTION: Remove the handler for the Global Lock + * + ******************************************************************************/ + +acpi_status acpi_ev_remove_global_lock_handler(void) +{ + acpi_status status; + + ACPI_FUNCTION_TRACE(ev_remove_global_lock_handler); + + acpi_gbl_global_lock_present = FALSE; + status = acpi_remove_fixed_event_handler(ACPI_EVENT_GLOBAL, + acpi_ev_global_lock_handler); + + return_ACPI_STATUS(status); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ev_global_lock_handler + * + * PARAMETERS: Context - From thread interface, not used + * + * RETURN: ACPI_INTERRUPT_HANDLED + * + * DESCRIPTION: Invoked directly from the SCI handler when a global lock + * release interrupt occurs. If there is actually a pending + * request for the lock, signal the waiting thread. + * + ******************************************************************************/ + +static u32 acpi_ev_global_lock_handler(void *context) +{ + acpi_status status; + acpi_cpu_flags flags; + + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); + + /* + * If a request for the global lock is not actually pending, + * we are done. This handles "spurious" global lock interrupts + * which are possible (and have been seen) with bad BIOSs. + */ + if (!acpi_gbl_global_lock_pending) { + goto cleanup_and_exit; + } + + /* + * Send a unit to the global lock semaphore. The actual acquisition + * of the global lock will be performed by the waiting thread. + */ + status = acpi_os_signal_semaphore(acpi_gbl_global_lock_semaphore, 1); + if (ACPI_FAILURE(status)) { + ACPI_ERROR((AE_INFO, "Could not signal Global Lock semaphore")); + } + + acpi_gbl_global_lock_pending = FALSE; + + cleanup_and_exit: + + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + return (ACPI_INTERRUPT_HANDLED); +} + +/****************************************************************************** + * + * FUNCTION: acpi_ev_acquire_global_lock + * + * PARAMETERS: Timeout - Max time to wait for the lock, in millisec. + * + * RETURN: Status + * + * DESCRIPTION: Attempt to gain ownership of the Global Lock. + * + * MUTEX: Interpreter must be locked + * + * Note: The original implementation allowed multiple threads to "acquire" the + * Global Lock, and the OS would hold the lock until the last thread had + * released it. However, this could potentially starve the BIOS out of the + * lock, especially in the case where there is a tight handshake between the + * Embedded Controller driver and the BIOS. Therefore, this implementation + * allows only one thread to acquire the HW Global Lock at a time, and makes + * the global lock appear as a standard mutex on the OS side. + * + *****************************************************************************/ + +acpi_status acpi_ev_acquire_global_lock(u16 timeout) +{ + acpi_cpu_flags flags; + acpi_status status; + u8 acquired = FALSE; + + ACPI_FUNCTION_TRACE(ev_acquire_global_lock); + + /* + * Only one thread can acquire the GL at a time, the global_lock_mutex + * enforces this. This interface releases the interpreter if we must wait. + */ + status = + acpi_ex_system_wait_mutex(acpi_gbl_global_lock_mutex->mutex. + os_mutex, timeout); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* + * Update the global lock handle and check for wraparound. The handle is + * only used for the external global lock interfaces, but it is updated + * here to properly handle the case where a single thread may acquire the + * lock via both the AML and the acpi_acquire_global_lock interfaces. The + * handle is therefore updated on the first acquire from a given thread + * regardless of where the acquisition request originated. + */ + acpi_gbl_global_lock_handle++; + if (acpi_gbl_global_lock_handle == 0) { + acpi_gbl_global_lock_handle = 1; + } + + /* + * Make sure that a global lock actually exists. If not, just + * treat the lock as a standard mutex. + */ + if (!acpi_gbl_global_lock_present) { + acpi_gbl_global_lock_acquired = TRUE; + return_ACPI_STATUS(AE_OK); + } + + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); + + do { + + /* Attempt to acquire the actual hardware lock */ + + ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired); + if (acquired) { + acpi_gbl_global_lock_acquired = TRUE; + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Acquired hardware Global Lock\n")); + break; + } + + /* + * Did not get the lock. The pending bit was set above, and + * we must now wait until we receive the global lock + * released interrupt. + */ + acpi_gbl_global_lock_pending = TRUE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Waiting for hardware Global Lock\n")); + + /* + * Wait for handshake with the global lock interrupt handler. + * This interface releases the interpreter if we must wait. + */ + status = + acpi_ex_system_wait_semaphore + (acpi_gbl_global_lock_semaphore, ACPI_WAIT_FOREVER); + + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); + + } while (ACPI_SUCCESS(status)); + + acpi_gbl_global_lock_pending = FALSE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + + return_ACPI_STATUS(status); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ev_release_global_lock + * + * PARAMETERS: None + * + * RETURN: Status + * + * DESCRIPTION: Releases ownership of the Global Lock. + * + ******************************************************************************/ + +acpi_status acpi_ev_release_global_lock(void) +{ + u8 pending = FALSE; + acpi_status status = AE_OK; + + ACPI_FUNCTION_TRACE(ev_release_global_lock); + + /* Lock must be already acquired */ + + if (!acpi_gbl_global_lock_acquired) { + ACPI_WARNING((AE_INFO, + "Cannot release the ACPI Global Lock, it has not been acquired")); + return_ACPI_STATUS(AE_NOT_ACQUIRED); + } + + if (acpi_gbl_global_lock_present) { + + /* Allow any thread to release the lock */ + + ACPI_RELEASE_GLOBAL_LOCK(acpi_gbl_FACS, pending); + + /* + * If the pending bit was set, we must write GBL_RLS to the control + * register + */ + if (pending) { + status = + acpi_write_bit_register + (ACPI_BITREG_GLOBAL_LOCK_RELEASE, + ACPI_ENABLE_EVENT); + } + + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Released hardware Global Lock\n")); + } + + acpi_gbl_global_lock_acquired = FALSE; + + /* Release the local GL mutex */ + + acpi_os_release_mutex(acpi_gbl_global_lock_mutex->mutex.os_mutex); + return_ACPI_STATUS(status); +} diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c index 69a3b4aa862..d0b33184442 100644 --- a/drivers/acpi/acpica/evmisc.c +++ b/drivers/acpi/acpica/evmisc.c @@ -45,7 +45,6 @@ #include "accommon.h" #include "acevents.h" #include "acnamesp.h" -#include "acinterp.h" #define _COMPONENT ACPI_EVENTS ACPI_MODULE_NAME("evmisc") @@ -53,10 +52,6 @@ ACPI_MODULE_NAME("evmisc") /* Local prototypes */ static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context); -static u32 acpi_ev_global_lock_handler(void *context); - -static acpi_status acpi_ev_remove_global_lock_handler(void); - /******************************************************************************* * * FUNCTION: acpi_ev_is_notify_object @@ -275,310 +270,6 @@ static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context) acpi_ut_delete_generic_state(notify_info); } -/******************************************************************************* - * - * FUNCTION: acpi_ev_global_lock_handler - * - * PARAMETERS: Context - From thread interface, not used - * - * RETURN: ACPI_INTERRUPT_HANDLED - * - * DESCRIPTION: Invoked directly from the SCI handler when a global lock - * release interrupt occurs. If there is actually a pending - * request for the lock, signal the waiting thread. - * - ******************************************************************************/ - -static u32 acpi_ev_global_lock_handler(void *context) -{ - acpi_status status; - acpi_cpu_flags flags; - - flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); - - /* - * If a request for the global lock is not actually pending, - * we are done. This handles "spurious" global lock interrupts - * which are possible (and have been seen) with bad BIOSs. - */ - if (!acpi_gbl_global_lock_pending) { - goto cleanup_and_exit; - } - - /* - * Send a unit to the global lock semaphore. The actual acquisition - * of the global lock will be performed by the waiting thread. - */ - status = acpi_os_signal_semaphore(acpi_gbl_global_lock_semaphore, 1); - if (ACPI_FAILURE(status)) { - ACPI_ERROR((AE_INFO, "Could not signal Global Lock semaphore")); - } - - acpi_gbl_global_lock_pending = FALSE; - -cleanup_and_exit: - - acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); - return (ACPI_INTERRUPT_HANDLED); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ev_init_global_lock_handler - * - * PARAMETERS: None - * - * RETURN: Status - * - * DESCRIPTION: Install a handler for the global lock release event - * - ******************************************************************************/ - -acpi_status acpi_ev_init_global_lock_handler(void) -{ - acpi_status status; - - ACPI_FUNCTION_TRACE(ev_init_global_lock_handler); - - /* Attempt installation of the global lock handler */ - - status = acpi_install_fixed_event_handler(ACPI_EVENT_GLOBAL, - acpi_ev_global_lock_handler, - NULL); - - /* - * If the global lock does not exist on this platform, the attempt to - * enable GBL_STATUS will fail (the GBL_ENABLE bit will not stick). - * Map to AE_OK, but mark global lock as not present. Any attempt to - * actually use the global lock will be flagged with an error. - */ - acpi_gbl_global_lock_present = FALSE; - if (status == AE_NO_HARDWARE_RESPONSE) { - ACPI_ERROR((AE_INFO, - "No response from Global Lock hardware, disabling lock")); - - return_ACPI_STATUS(AE_OK); - } - - status = acpi_os_create_lock(&acpi_gbl_global_lock_pending_lock); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - acpi_gbl_global_lock_pending = FALSE; - acpi_gbl_global_lock_present = TRUE; - return_ACPI_STATUS(status); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ev_remove_global_lock_handler - * - * PARAMETERS: None - * - * RETURN: Status - * - * DESCRIPTION: Remove the handler for the Global Lock - * - ******************************************************************************/ - -static acpi_status acpi_ev_remove_global_lock_handler(void) -{ - acpi_status status; - - ACPI_FUNCTION_TRACE(ev_remove_global_lock_handler); - - acpi_gbl_global_lock_present = FALSE; - status = acpi_remove_fixed_event_handler(ACPI_EVENT_GLOBAL, - acpi_ev_global_lock_handler); - - return_ACPI_STATUS(status); -} - -/****************************************************************************** - * - * FUNCTION: acpi_ev_acquire_global_lock - * - * PARAMETERS: Timeout - Max time to wait for the lock, in millisec. - * - * RETURN: Status - * - * DESCRIPTION: Attempt to gain ownership of the Global Lock. - * - * MUTEX: Interpreter must be locked - * - * Note: The original implementation allowed multiple threads to "acquire" the - * Global Lock, and the OS would hold the lock until the last thread had - * released it. However, this could potentially starve the BIOS out of the - * lock, especially in the case where there is a tight handshake between the - * Embedded Controller driver and the BIOS. Therefore, this implementation - * allows only one thread to acquire the HW Global Lock at a time, and makes - * the global lock appear as a standard mutex on the OS side. - * - *****************************************************************************/ -static acpi_thread_id acpi_ev_global_lock_thread_id; -static int acpi_ev_global_lock_acquired; - -acpi_status acpi_ev_acquire_global_lock(u16 timeout) -{ - acpi_cpu_flags flags; - acpi_status status; - u8 acquired = FALSE; - - ACPI_FUNCTION_TRACE(ev_acquire_global_lock); - - /* - * Only one thread can acquire the GL at a time, the global_lock_mutex - * enforces this. This interface releases the interpreter if we must wait. - */ - status = acpi_ex_system_wait_mutex( - acpi_gbl_global_lock_mutex->mutex.os_mutex, 0); - if (status == AE_TIME) { - if (acpi_ev_global_lock_thread_id == acpi_os_get_thread_id()) { - acpi_ev_global_lock_acquired++; - return AE_OK; - } - } - - if (ACPI_FAILURE(status)) { - status = acpi_ex_system_wait_mutex( - acpi_gbl_global_lock_mutex->mutex.os_mutex, - timeout); - } - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - acpi_ev_global_lock_thread_id = acpi_os_get_thread_id(); - acpi_ev_global_lock_acquired++; - - /* - * Update the global lock handle and check for wraparound. The handle is - * only used for the external global lock interfaces, but it is updated - * here to properly handle the case where a single thread may acquire the - * lock via both the AML and the acpi_acquire_global_lock interfaces. The - * handle is therefore updated on the first acquire from a given thread - * regardless of where the acquisition request originated. - */ - acpi_gbl_global_lock_handle++; - if (acpi_gbl_global_lock_handle == 0) { - acpi_gbl_global_lock_handle = 1; - } - - /* - * Make sure that a global lock actually exists. If not, just - * treat the lock as a standard mutex. - */ - if (!acpi_gbl_global_lock_present) { - acpi_gbl_global_lock_acquired = TRUE; - return_ACPI_STATUS(AE_OK); - } - - flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); - - do { - - /* Attempt to acquire the actual hardware lock */ - - ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired); - if (acquired) { - acpi_gbl_global_lock_acquired = TRUE; - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "Acquired hardware Global Lock\n")); - break; - } - - /* - * Did not get the lock. The pending bit was set above, and - * we must now wait until we receive the global lock - * released interrupt. - */ - acpi_gbl_global_lock_pending = TRUE; - acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); - - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "Waiting for hardware Global Lock\n")); - - /* - * Wait for handshake with the global lock interrupt handler. - * This interface releases the interpreter if we must wait. - */ - status = - acpi_ex_system_wait_semaphore - (acpi_gbl_global_lock_semaphore, ACPI_WAIT_FOREVER); - - flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); - - } while (ACPI_SUCCESS(status)); - - acpi_gbl_global_lock_pending = FALSE; - acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); - - return_ACPI_STATUS(status); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ev_release_global_lock - * - * PARAMETERS: None - * - * RETURN: Status - * - * DESCRIPTION: Releases ownership of the Global Lock. - * - ******************************************************************************/ - -acpi_status acpi_ev_release_global_lock(void) -{ - u8 pending = FALSE; - acpi_status status = AE_OK; - - ACPI_FUNCTION_TRACE(ev_release_global_lock); - - /* Lock must be already acquired */ - - if (!acpi_gbl_global_lock_acquired) { - ACPI_WARNING((AE_INFO, - "Cannot release the ACPI Global Lock, it has not been acquired")); - return_ACPI_STATUS(AE_NOT_ACQUIRED); - } - - acpi_ev_global_lock_acquired--; - if (acpi_ev_global_lock_acquired > 0) { - return AE_OK; - } - - if (acpi_gbl_global_lock_present) { - - /* Allow any thread to release the lock */ - - ACPI_RELEASE_GLOBAL_LOCK(acpi_gbl_FACS, pending); - - /* - * If the pending bit was set, we must write GBL_RLS to the control - * register - */ - if (pending) { - status = - acpi_write_bit_register - (ACPI_BITREG_GLOBAL_LOCK_RELEASE, - ACPI_ENABLE_EVENT); - } - - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "Released hardware Global Lock\n")); - } - - acpi_gbl_global_lock_acquired = FALSE; - - /* Release the local GL mutex */ - acpi_ev_global_lock_thread_id = 0; - acpi_ev_global_lock_acquired = 0; - acpi_os_release_mutex(acpi_gbl_global_lock_mutex->mutex.os_mutex); - return_ACPI_STATUS(status); -} - /****************************************************************************** * * FUNCTION: acpi_ev_terminate -- cgit v1.2.3-70-g09d2 From 945488b9c5fc3f85e3f6a3580235b5c73febc8a6 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 13 Apr 2011 13:15:58 +0800 Subject: ACPICA: Add more methods eligible for NULL package element removal This change adds another group of predefined names to the list of names eligible to have NULL package elements dynamically removed. This group is the names that return a single variable-length package containing simple data types such as integers, buffers, strings. This includes: _ALx,_BCL,_CID, _DOD,_EDL,_FIX,_PCL,_PLD,_PMD,_PRx,_PSL,_Sx, and _TZD. http://www.acpica.org/bugzilla/show_bug.cgi?id=914 Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/nsrepair.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c index 1d76ac85b5e..ac7b854b0bd 100644 --- a/drivers/acpi/acpica/nsrepair.c +++ b/drivers/acpi/acpica/nsrepair.c @@ -74,7 +74,6 @@ ACPI_MODULE_NAME("nsrepair") * * Additional possible repairs: * - * Optional/unnecessary NULL package elements removed * Required package elements that are NULL replaced by Integer/String/Buffer * Incorrect standalone package wrapped with required outer package * @@ -623,16 +622,12 @@ acpi_ns_remove_null_elements(struct acpi_predefined_data *data, ACPI_FUNCTION_NAME(ns_remove_null_elements); /* - * PTYPE1 packages contain no subpackages. - * PTYPE2 packages contain a variable number of sub-packages. We can - * safely remove all NULL elements from the PTYPE2 packages. + * We can safely remove all NULL elements from these package types: + * PTYPE1_VAR packages contain a variable number of simple data types. + * PTYPE2 packages contain a variable number of sub-packages. */ switch (package_type) { - case ACPI_PTYPE1_FIXED: case ACPI_PTYPE1_VAR: - case ACPI_PTYPE1_OPTION: - return; - case ACPI_PTYPE2: case ACPI_PTYPE2_COUNT: case ACPI_PTYPE2_PKG_COUNT: @@ -642,6 +637,8 @@ acpi_ns_remove_null_elements(struct acpi_predefined_data *data, break; default: + case ACPI_PTYPE1_FIXED: + case ACPI_PTYPE1_OPTION: return; } -- cgit v1.2.3-70-g09d2 From 82a1b7cb83b6bd7d2fe3972e847c8ccbff55cb9c Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 13 Apr 2011 13:19:35 +0800 Subject: ACPICA: Update internal address SpaceID for DataTable regions Moved this internal space id in preparation for ACPI 5.0 changes that will include some new space IDs. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/acconfig.h | 2 +- drivers/acpi/acpica/amlcode.h | 15 --------------- drivers/acpi/acpica/dswload.c | 2 +- drivers/acpi/acpica/dswload2.c | 2 +- drivers/acpi/acpica/excreate.c | 3 ++- drivers/acpi/acpica/utdecode.c | 5 +++-- include/acpi/actypes.h | 13 +++++++++++-- 7 files changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h index ab87396c2c0..c115bb9bd4d 100644 --- a/drivers/acpi/acpica/acconfig.h +++ b/drivers/acpi/acpica/acconfig.h @@ -187,7 +187,7 @@ /* Operation regions */ -#define ACPI_NUM_PREDEFINED_REGIONS 9 +#define ACPI_NUM_PREDEFINED_REGIONS 8 #define ACPI_USER_REGION_BEGIN 0x80 /* Maximum space_ids for Operation Regions */ diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h index f4f0998d396..1077f17859e 100644 --- a/drivers/acpi/acpica/amlcode.h +++ b/drivers/acpi/acpica/amlcode.h @@ -394,21 +394,6 @@ #define AML_CLASS_METHOD_CALL 0x09 #define AML_CLASS_UNKNOWN 0x0A -/* Predefined Operation Region space_iDs */ - -typedef enum { - REGION_MEMORY = 0, - REGION_IO, - REGION_PCI_CONFIG, - REGION_EC, - REGION_SMBUS, - REGION_CMOS, - REGION_PCI_BAR, - REGION_IPMI, - REGION_DATA_TABLE, /* Internal use only */ - REGION_FIXED_HW = 0x7F -} AML_REGION_TYPES; - /* Comparison operation codes for match_op operator */ typedef enum { diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index 23a3b1ab20c..324acec1179 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -450,7 +450,7 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state) status = acpi_ex_create_region(op->named.data, op->named.length, - REGION_DATA_TABLE, + ACPI_ADR_SPACE_DATA_TABLE, walk_state); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c index 4be4e921dfe..976318138c5 100644 --- a/drivers/acpi/acpica/dswload2.c +++ b/drivers/acpi/acpica/dswload2.c @@ -562,7 +562,7 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state) ((op->common.value.arg)->common.value. integer); } else { - region_space = REGION_DATA_TABLE; + region_space = ACPI_ADR_SPACE_DATA_TABLE; } /* diff --git a/drivers/acpi/acpica/excreate.c b/drivers/acpi/acpica/excreate.c index e7b372d1766..110711afada 100644 --- a/drivers/acpi/acpica/excreate.c +++ b/drivers/acpi/acpica/excreate.c @@ -305,7 +305,8 @@ acpi_ex_create_region(u8 * aml_start, * range */ if ((region_space >= ACPI_NUM_PREDEFINED_REGIONS) && - (region_space < ACPI_USER_REGION_BEGIN)) { + (region_space < ACPI_USER_REGION_BEGIN) && + (region_space != ACPI_ADR_SPACE_DATA_TABLE)) { ACPI_ERROR((AE_INFO, "Invalid AddressSpace type 0x%X", region_space)); return_ACPI_STATUS(AE_AML_INVALID_SPACE_ID); diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c index 136a814cec6..97cb36f85ce 100644 --- a/drivers/acpi/acpica/utdecode.c +++ b/drivers/acpi/acpica/utdecode.c @@ -170,8 +170,7 @@ const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS] = { "SMBus", "SystemCMOS", "PCIBARTarget", - "IPMI", - "DataTable" + "IPMI" }; char *acpi_ut_get_region_name(u8 space_id) @@ -179,6 +178,8 @@ char *acpi_ut_get_region_name(u8 space_id) if (space_id >= ACPI_USER_REGION_BEGIN) { return ("UserDefinedRegion"); + } else if (space_id == ACPI_ADR_SPACE_DATA_TABLE) { + return ("DataTable"); } else if (space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { return ("FunctionalFixedHW"); } else if (space_id >= ACPI_NUM_PREDEFINED_REGIONS) { diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 64f838beaab..ad77c613c74 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -712,8 +712,17 @@ typedef u8 acpi_adr_space_type; #define ACPI_ADR_SPACE_CMOS (acpi_adr_space_type) 5 #define ACPI_ADR_SPACE_PCI_BAR_TARGET (acpi_adr_space_type) 6 #define ACPI_ADR_SPACE_IPMI (acpi_adr_space_type) 7 -#define ACPI_ADR_SPACE_DATA_TABLE (acpi_adr_space_type) 8 -#define ACPI_ADR_SPACE_FIXED_HARDWARE (acpi_adr_space_type) 127 + +/* + * Special region types + * + * Note: A Data Table region is a special type of operation region + * that has its own AML opcode. However, internally, the AML + * interpreter simply creates an operation region with an an address + * space type of ACPI_ADR_SPACE_DATA_TABLE. + */ +#define ACPI_ADR_SPACE_DATA_TABLE (acpi_adr_space_type) 0x7E /* Internal to ACPICA only */ +#define ACPI_ADR_SPACE_FIXED_HARDWARE (acpi_adr_space_type) 0x7F /* * bit_register IDs -- cgit v1.2.3-70-g09d2 From 07aa99e9df2184e78068f7d5414e29e4a5a1b452 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 13 Apr 2011 13:20:49 +0800 Subject: ACPICA: Move ACPI_NUM_PREDEFINED_REGIONS to a more appropriate place Moved to where the predefined regions are actually defined. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/acconfig.h | 1 - include/acpi/actypes.h | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h index c115bb9bd4d..bc533dde16c 100644 --- a/drivers/acpi/acpica/acconfig.h +++ b/drivers/acpi/acpica/acconfig.h @@ -187,7 +187,6 @@ /* Operation regions */ -#define ACPI_NUM_PREDEFINED_REGIONS 8 #define ACPI_USER_REGION_BEGIN 0x80 /* Maximum space_ids for Operation Regions */ diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index ad77c613c74..f3b29fa5654 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -713,8 +713,10 @@ typedef u8 acpi_adr_space_type; #define ACPI_ADR_SPACE_PCI_BAR_TARGET (acpi_adr_space_type) 6 #define ACPI_ADR_SPACE_IPMI (acpi_adr_space_type) 7 +#define ACPI_NUM_PREDEFINED_REGIONS 8 + /* - * Special region types + * Special Address Spaces * * Note: A Data Table region is a special type of operation region * that has its own AML opcode. However, internally, the AML -- cgit v1.2.3-70-g09d2 From e2066ca1b211ff08325c98be9fb8ad95affbaba8 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 13 Apr 2011 13:22:04 +0800 Subject: ACPICA: Execute an orphan _REG method under the EC device This change will force the execution of a _REG method underneath the EC device even if there is no corresponding operation region of type EmbeddedControl. Fixes a problem seen on some machines and apparently is compatible with Windows behavior. http://www.acpica.org/bugzilla/show_bug.cgi?id=875 Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/evregion.c | 121 ++++++++++++++++++++++++++++++++++++++++- drivers/acpi/acpica/evrgnini.c | 2 +- drivers/acpi/acpica/evxfregn.c | 13 +++-- include/acpi/actypes.h | 5 ++ 4 files changed, 132 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index bea7223d7a7..f0edf5c43c0 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -55,6 +55,8 @@ static u8 acpi_ev_has_default_handler(struct acpi_namespace_node *node, acpi_adr_space_type space_id); +static void acpi_ev_orphan_ec_reg_method(void); + static acpi_status acpi_ev_reg_run(acpi_handle obj_handle, u32 level, void *context, void **return_value); @@ -561,7 +563,9 @@ acpi_ev_detach_region(union acpi_operand_object *region_obj, /* Now stop region accesses by executing the _REG method */ - status = acpi_ev_execute_reg_method(region_obj, 0); + status = + acpi_ev_execute_reg_method(region_obj, + ACPI_REG_DISCONNECT); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "from region _REG, [%s]", @@ -1062,6 +1066,12 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, NULL, &space_id, NULL); + /* Special case for EC: handle "orphan" _REG methods with no region */ + + if (space_id == ACPI_ADR_SPACE_EC) { + acpi_ev_orphan_ec_reg_method(); + } + return_ACPI_STATUS(status); } @@ -1120,6 +1130,113 @@ acpi_ev_reg_run(acpi_handle obj_handle, return (AE_OK); } - status = acpi_ev_execute_reg_method(obj_desc, 1); + status = acpi_ev_execute_reg_method(obj_desc, ACPI_REG_CONNECT); return (status); } + +/******************************************************************************* + * + * FUNCTION: acpi_ev_orphan_ec_reg_method + * + * PARAMETERS: None + * + * RETURN: None + * + * DESCRIPTION: Execute an "orphan" _REG method that appears under the EC + * device. This is a _REG method that has no corresponding region + * within the EC device scope. The orphan _REG method appears to + * have been enabled by the description of the ECDT in the ACPI + * specification: "The availability of the region space can be + * detected by providing a _REG method object underneath the + * Embedded Controller device." + * + * To quickly access the EC device, we use the EC_ID that appears + * within the ECDT. Otherwise, we would need to perform a time- + * consuming namespace walk, executing _HID methods to find the + * EC device. + * + ******************************************************************************/ + +static void acpi_ev_orphan_ec_reg_method(void) +{ + struct acpi_table_ecdt *table; + acpi_status status; + struct acpi_object_list args; + union acpi_object objects[2]; + struct acpi_namespace_node *ec_device_node; + struct acpi_namespace_node *reg_method; + struct acpi_namespace_node *next_node; + + ACPI_FUNCTION_TRACE(ev_orphan_ec_reg_method); + + /* Get the ECDT (if present in system) */ + + status = acpi_get_table(ACPI_SIG_ECDT, 0, + ACPI_CAST_INDIRECT_PTR(struct acpi_table_header, + &table)); + if (ACPI_FAILURE(status)) { + return_VOID; + } + + /* We need a valid EC_ID string */ + + if (!(*table->id)) { + return_VOID; + } + + /* Namespace is currently locked, must release */ + + (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); + + /* Get a handle to the EC device referenced in the ECDT */ + + status = acpi_get_handle(NULL, + ACPI_CAST_PTR(char, table->id), + ACPI_CAST_PTR(acpi_handle, &ec_device_node)); + if (ACPI_FAILURE(status)) { + goto exit; + } + + /* Get a handle to a _REG method immediately under the EC device */ + + status = acpi_get_handle(ec_device_node, + METHOD_NAME__REG, ACPI_CAST_PTR(acpi_handle, + ®_method)); + if (ACPI_FAILURE(status)) { + goto exit; + } + + /* + * Execute the _REG method only if there is no Operation Region in + * this scope with the Embedded Controller space ID. Otherwise, it + * will already have been executed. Note, this allows for Regions + * with other space IDs to be present; but the code below will then + * execute the _REG method with the EC space ID argument. + */ + next_node = acpi_ns_get_next_node(ec_device_node, NULL); + while (next_node) { + if ((next_node->type == ACPI_TYPE_REGION) && + (next_node->object) && + (next_node->object->region.space_id == ACPI_ADR_SPACE_EC)) { + goto exit; /* Do not execute _REG */ + } + next_node = acpi_ns_get_next_node(ec_device_node, next_node); + } + + /* Evaluate the _REG(EC,Connect) method */ + + args.count = 2; + args.pointer = objects; + objects[0].type = ACPI_TYPE_INTEGER; + objects[0].integer.value = ACPI_ADR_SPACE_EC; + objects[1].type = ACPI_TYPE_INTEGER; + objects[1].integer.value = ACPI_REG_CONNECT; + + status = acpi_evaluate_object(reg_method, NULL, &args, NULL); + + exit: + /* We ignore all errors from above, don't care */ + + status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); + return_VOID; +} diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c index 9659cee6093..55a5d35ef34 100644 --- a/drivers/acpi/acpica/evrgnini.c +++ b/drivers/acpi/acpica/evrgnini.c @@ -637,7 +637,7 @@ acpi_ev_initialize_region(union acpi_operand_object *region_obj, status = acpi_ev_execute_reg_method - (region_obj, 1); + (region_obj, ACPI_REG_CONNECT); if (acpi_ns_locked) { status = diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index c85c8c45599..00cd95692a9 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -130,20 +130,21 @@ acpi_install_address_space_handler(acpi_handle device, case ACPI_ADR_SPACE_PCI_CONFIG: case ACPI_ADR_SPACE_DATA_TABLE: - if (acpi_gbl_reg_methods_executed) { + if (!acpi_gbl_reg_methods_executed) { - /* Run all _REG methods for this address space */ - - status = acpi_ev_execute_reg_methods(node, space_id); + /* We will defer execution of the _REG methods for this space */ + goto unlock_and_exit; } break; default: - - status = acpi_ev_execute_reg_methods(node, space_id); break; } + /* Run all _REG methods for this address space */ + + status = acpi_ev_execute_reg_methods(node, space_id); + unlock_and_exit: (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); return_ACPI_STATUS(status); diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index f3b29fa5654..a6412b82a57 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -726,6 +726,11 @@ typedef u8 acpi_adr_space_type; #define ACPI_ADR_SPACE_DATA_TABLE (acpi_adr_space_type) 0x7E /* Internal to ACPICA only */ #define ACPI_ADR_SPACE_FIXED_HARDWARE (acpi_adr_space_type) 0x7F +/* Values for _REG connection code */ + +#define ACPI_REG_DISCONNECT 0 +#define ACPI_REG_CONNECT 1 + /* * bit_register IDs * -- cgit v1.2.3-70-g09d2 From 0a63e2308cbbdc7e2f5645769afaf53785bcb9fa Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 15 Apr 2011 09:12:47 +0800 Subject: ACPICA: Update to version 20110413 Version 20110413 Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index f6ad63d25b7..2ed0a8486c1 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20110316 +#define ACPI_CA_VERSION 0x20110413 #include "actypes.h" #include "actbl.h" -- cgit v1.2.3-70-g09d2 From de7a2f9f7b6f5b48d8531ff4c9c9b95cab8a8ce8 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Mon, 9 May 2011 18:11:37 +0200 Subject: dmaengine: at_hdmac: pause: no need to wait for FIFO empty With the addition of the "pause" feature, an active wait was introduced to check the "FIFO empty" event. This event was not always happening and a timout contition was needed. But, in some cases, this event depend on the peripheral connected to the channel that is paused: FIFO becomes empty if the peripheral consumes data. The timeout is pretty difficult to evaluate. Moreover, this check is not needed. In conclusion, it seems sensible to entirely remove the checking of "FIFO empty" status when pausing. Signed-off-by: Nicolas Ferre [commit msg edited for grammer] Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 2c6bc3aa3dd..3134003eec8 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -933,25 +933,9 @@ static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, dev_vdbg(chan2dev(chan), "atc_control (%d)\n", cmd); if (cmd == DMA_PAUSE) { - int pause_timeout = 1000; - spin_lock_bh(&atchan->lock); dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id)); - - /* wait for FIFO to be empty */ - while (!(dma_readl(atdma, CHSR) & AT_DMA_EMPT(chan_id))) { - if (pause_timeout-- > 0) { - /* the FIFO can only drain if the peripheral - * is still requesting data: - * -> timeout if it is not the case. */ - dma_writel(atdma, CHDR, AT_DMA_RES(chan_id)); - spin_unlock_bh(&atchan->lock); - return -ETIMEDOUT; - } - cpu_relax(); - } - set_bit(ATC_IS_PAUSED, &atchan->status); spin_unlock_bh(&atchan->lock); -- cgit v1.2.3-70-g09d2 From 5fedefb87bd0a64281d28edd295f29e3b989d78c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 15 Apr 2011 16:03:35 +0530 Subject: dmaengine/dw_dmac: don't call callback routine in case dmaengine_terminate_all() is called If dmaengine_terminate_all() is called for dma channel, then it doesn't make much sense to call registered callback routine. While in case of success or failure it must be called. Signed-off-by: Viresh Kumar Signed-off-by: Vinod Koul --- drivers/dma/dw_dmac.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index b15c32ca0ef..265d43c8b35 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -195,18 +195,21 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) /*----------------------------------------------------------------------*/ static void -dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc) +dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc, + bool callback_required) { - dma_async_tx_callback callback; - void *param; + dma_async_tx_callback callback = NULL; + void *param = NULL; struct dma_async_tx_descriptor *txd = &desc->txd; struct dw_desc *child; dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie); dwc->completed = txd->cookie; - callback = txd->callback; - param = txd->callback_param; + if (callback_required) { + callback = txd->callback; + param = txd->callback_param; + } dwc_sync_desc_for_cpu(dwc, desc); @@ -238,11 +241,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc) } } - /* - * The API requires that no submissions are done from a - * callback, so we don't need to drop the lock here - */ - if (callback) + if (callback_required && callback) callback(param); } @@ -272,7 +271,7 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) } list_for_each_entry_safe(desc, _desc, &list, desc_node) - dwc_descriptor_complete(dwc, desc); + dwc_descriptor_complete(dwc, desc, true); } static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) @@ -322,7 +321,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) * No descriptors so far seem to be in progress, i.e. * this one must be done. */ - dwc_descriptor_complete(dwc, desc); + dwc_descriptor_complete(dwc, desc, true); } dev_err(chan2dev(&dwc->chan), @@ -384,7 +383,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) dwc_dump_lli(dwc, &child->lli); /* Pretend the descriptor completed successfully */ - dwc_descriptor_complete(dwc, bad_desc); + dwc_descriptor_complete(dwc, bad_desc, true); } /* --------------------- Cyclic DMA API extensions -------------------- */ @@ -831,7 +830,7 @@ static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, /* Flush all pending and queued descriptors */ list_for_each_entry_safe(desc, _desc, &list, desc_node) - dwc_descriptor_complete(dwc, desc); + dwc_descriptor_complete(dwc, desc, false); return 0; } -- cgit v1.2.3-70-g09d2 From abf53902dcc6d44d2e06b09817fa67857aa686fe Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 15 Apr 2011 16:03:35 +0530 Subject: dmaengine/dw_dmac: set residue as total len in dwc_tx_status if status is !DMA_SUCCESS If transfer status is !=DMA_SUCCESS, return total transfer len as residue, instead of zero. Signed-off-by: Viresh Kumar Signed-off-by: Vinod Koul --- drivers/dma/dw_dmac.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 265d43c8b35..f209ff8835e 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -860,7 +860,11 @@ dwc_tx_status(struct dma_chan *chan, ret = dma_async_is_complete(cookie, last_complete, last_used); } - dma_set_tx_state(txstate, last_complete, last_used, 0); + if (ret != DMA_SUCCESS) + dma_set_tx_state(txstate, last_complete, last_used, + dwc_first_active(dwc)->len); + else + dma_set_tx_state(txstate, last_complete, last_used, 0); return ret; } -- cgit v1.2.3-70-g09d2 From 69dc14b51c1aad9d82afd8f96bf4e4835089bffc Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 18 Apr 2011 14:54:56 +0530 Subject: dmaengine/dw_dmac: Divide one sg to many desc, if sg len is greater than DWC_MAX_COUNT If len passed in sg for slave_sg transfers is greater than DWC_MAX_COUNT, then driver programmes controller incorrectly. This patch adds code to handle this situation by allocation more than one desc for same sg. Signed-off-by: Viresh Kumar Signed-off-by: Vinod Koul --- drivers/dma/dw_dmac.c | 65 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index f209ff8835e..a9755e3dd60 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -693,9 +693,15 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg = dws->tx_reg; for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; - u32 len; - u32 mem; + u32 len, dlen, mem; + mem = sg_phys(sg); + len = sg_dma_len(sg); + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; + +slave_sg_todev_fill_desc: desc = dwc_desc_get(dwc); if (!desc) { dev_err(chan2dev(chan), @@ -703,16 +709,19 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, goto err_desc_get; } - mem = sg_phys(sg); - len = sg_dma_len(sg); - mem_width = 2; - if (unlikely(mem & 3 || len & 3)) - mem_width = 0; - desc->lli.sar = mem; desc->lli.dar = reg; desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width); - desc->lli.ctlhi = len >> mem_width; + if ((len >> mem_width) > DWC_MAX_COUNT) { + dlen = DWC_MAX_COUNT << mem_width; + mem += dlen; + len -= dlen; + } else { + dlen = len; + len = 0; + } + + desc->lli.ctlhi = dlen >> mem_width; if (!first) { first = desc; @@ -726,7 +735,10 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, &first->tx_list); } prev = desc; - total_len += len; + total_len += dlen; + + if (len) + goto slave_sg_todev_fill_desc; } break; case DMA_FROM_DEVICE: @@ -739,15 +751,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg = dws->rx_reg; for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; - u32 len; - u32 mem; - - desc = dwc_desc_get(dwc); - if (!desc) { - dev_err(chan2dev(chan), - "not enough descriptors available\n"); - goto err_desc_get; - } + u32 len, dlen, mem; mem = sg_phys(sg); len = sg_dma_len(sg); @@ -755,10 +759,26 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (unlikely(mem & 3 || len & 3)) mem_width = 0; +slave_sg_fromdev_fill_desc: + desc = dwc_desc_get(dwc); + if (!desc) { + dev_err(chan2dev(chan), + "not enough descriptors available\n"); + goto err_desc_get; + } + desc->lli.sar = reg; desc->lli.dar = mem; desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width); - desc->lli.ctlhi = len >> reg_width; + if ((len >> reg_width) > DWC_MAX_COUNT) { + dlen = DWC_MAX_COUNT << reg_width; + mem += dlen; + len -= dlen; + } else { + dlen = len; + len = 0; + } + desc->lli.ctlhi = dlen >> reg_width; if (!first) { first = desc; @@ -772,7 +792,10 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, &first->tx_list); } prev = desc; - total_len += len; + total_len += dlen; + + if (len) + goto slave_sg_fromdev_fill_desc; } break; default: -- cgit v1.2.3-70-g09d2 From 69cea5a00d3135677939fce1fefe54ed522055a0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 15 Apr 2011 16:03:35 +0530 Subject: dmaengine/dw_dmac: Replace spin_lock* with irqsave variants and enable submission from callback dmaengine routines can be called from interrupt context and with interrupts disabled. Whereas spin_unlock_bh can't be called from such contexts. So this patch converts all spin_*_bh routines to irqsave variants. Also, spin_lock() used in tasklet is converted to irqsave variants, as tasklet can be interrupted, and dma requests from such interruptions may also call spin_lock. Now, submission from callbacks are permitted as per dmaengine framework. So we shouldn't hold any locks while calling callbacks. As locks were taken by parent routines, so releasing them before calling callbacks doesn't look clean enough. So, locks are taken inside all routine now, whereever they are required. And dwc_descriptor_complete is always called without taking locks. Signed-off-by: Viresh Kumar Signed-off-by: Vinod Koul --- drivers/dma/dw_dmac.c | 116 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 77 insertions(+), 39 deletions(-) diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index a9755e3dd60..442b98b81e7 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -93,8 +93,9 @@ static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) struct dw_desc *desc, *_desc; struct dw_desc *ret = NULL; unsigned int i = 0; + unsigned long flags; - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) { if (async_tx_test_ack(&desc->txd)) { list_del(&desc->desc_node); @@ -104,7 +105,7 @@ static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc); i++; } - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i); @@ -130,12 +131,14 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc) */ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) { + unsigned long flags; + if (desc) { struct dw_desc *child; dwc_sync_desc_for_cpu(dwc, desc); - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); list_for_each_entry(child, &desc->tx_list, desc_node) dev_vdbg(chan2dev(&dwc->chan), "moving child desc %p to freelist\n", @@ -143,7 +146,7 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) list_splice_init(&desc->tx_list, &dwc->free_list); dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &dwc->free_list); - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); } } @@ -202,9 +205,11 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc, void *param = NULL; struct dma_async_tx_descriptor *txd = &desc->txd; struct dw_desc *child; + unsigned long flags; dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie); + spin_lock_irqsave(&dwc->lock, flags); dwc->completed = txd->cookie; if (callback_required) { callback = txd->callback; @@ -241,6 +246,8 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc, } } + spin_unlock_irqrestore(&dwc->lock, flags); + if (callback_required && callback) callback(param); } @@ -249,7 +256,9 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) { struct dw_desc *desc, *_desc; LIST_HEAD(list); + unsigned long flags; + spin_lock_irqsave(&dwc->lock, flags); if (dma_readl(dw, CH_EN) & dwc->mask) { dev_err(chan2dev(&dwc->chan), "BUG: XFER bit set, but channel not idle!\n"); @@ -270,6 +279,8 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) dwc_dostart(dwc, dwc_first_active(dwc)); } + spin_unlock_irqrestore(&dwc->lock, flags); + list_for_each_entry_safe(desc, _desc, &list, desc_node) dwc_descriptor_complete(dwc, desc, true); } @@ -280,7 +291,9 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) struct dw_desc *desc, *_desc; struct dw_desc *child; u32 status_xfer; + unsigned long flags; + spin_lock_irqsave(&dwc->lock, flags); /* * Clear block interrupt flag before scanning so that we don't * miss any, and read LLP before RAW_XFER to ensure it is @@ -293,35 +306,47 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) if (status_xfer & dwc->mask) { /* Everything we've submitted is done */ dma_writel(dw, CLEAR.XFER, dwc->mask); + spin_unlock_irqrestore(&dwc->lock, flags); + dwc_complete_all(dw, dwc); return; } - if (list_empty(&dwc->active_list)) + if (list_empty(&dwc->active_list)) { + spin_unlock_irqrestore(&dwc->lock, flags); return; + } dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp); list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { /* check first descriptors addr */ - if (desc->txd.phys == llp) + if (desc->txd.phys == llp) { + spin_unlock_irqrestore(&dwc->lock, flags); return; + } /* check first descriptors llp */ - if (desc->lli.llp == llp) + if (desc->lli.llp == llp) { /* This one is currently in progress */ + spin_unlock_irqrestore(&dwc->lock, flags); return; + } list_for_each_entry(child, &desc->tx_list, desc_node) - if (child->lli.llp == llp) + if (child->lli.llp == llp) { /* Currently in progress */ + spin_unlock_irqrestore(&dwc->lock, flags); return; + } /* * No descriptors so far seem to be in progress, i.e. * this one must be done. */ + spin_unlock_irqrestore(&dwc->lock, flags); dwc_descriptor_complete(dwc, desc, true); + spin_lock_irqsave(&dwc->lock, flags); } dev_err(chan2dev(&dwc->chan), @@ -336,6 +361,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) list_move(dwc->queue.next, &dwc->active_list); dwc_dostart(dwc, dwc_first_active(dwc)); } + spin_unlock_irqrestore(&dwc->lock, flags); } static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli) @@ -350,9 +376,12 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) { struct dw_desc *bad_desc; struct dw_desc *child; + unsigned long flags; dwc_scan_descriptors(dw, dwc); + spin_lock_irqsave(&dwc->lock, flags); + /* * The descriptor currently at the head of the active list is * borked. Since we don't have any way to report errors, we'll @@ -382,6 +411,8 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) list_for_each_entry(child, &bad_desc->tx_list, desc_node) dwc_dump_lli(dwc, &child->lli); + spin_unlock_irqrestore(&dwc->lock, flags); + /* Pretend the descriptor completed successfully */ dwc_descriptor_complete(dwc, bad_desc, true); } @@ -406,6 +437,8 @@ EXPORT_SYMBOL(dw_dma_get_dst_addr); static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, u32 status_block, u32 status_err, u32 status_xfer) { + unsigned long flags; + if (status_block & dwc->mask) { void (*callback)(void *param); void *callback_param; @@ -416,11 +449,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, callback = dwc->cdesc->period_callback; callback_param = dwc->cdesc->period_callback_param; - if (callback) { - spin_unlock(&dwc->lock); + + if (callback) callback(callback_param); - spin_lock(&dwc->lock); - } } /* @@ -434,6 +465,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, dev_err(chan2dev(&dwc->chan), "cyclic DMA unexpected %s " "interrupt, stopping DMA transfer\n", status_xfer ? "xfer" : "error"); + + spin_lock_irqsave(&dwc->lock, flags); + dev_err(chan2dev(&dwc->chan), " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n", channel_readl(dwc, SAR), @@ -457,6 +491,8 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, for (i = 0; i < dwc->cdesc->periods; i++) dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli); + + spin_unlock_irqrestore(&dwc->lock, flags); } } @@ -480,7 +516,6 @@ static void dw_dma_tasklet(unsigned long data) for (i = 0; i < dw->dma.chancnt; i++) { dwc = &dw->chan[i]; - spin_lock(&dwc->lock); if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) dwc_handle_cyclic(dw, dwc, status_block, status_err, status_xfer); @@ -488,7 +523,6 @@ static void dw_dma_tasklet(unsigned long data) dwc_handle_error(dw, dwc); else if ((status_block | status_xfer) & (1 << i)) dwc_scan_descriptors(dw, dwc); - spin_unlock(&dwc->lock); } /* @@ -543,8 +577,9 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) struct dw_desc *desc = txd_to_dw_desc(tx); struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); dma_cookie_t cookie; + unsigned long flags; - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); cookie = dwc_assign_cookie(dwc, desc); /* @@ -564,7 +599,7 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) list_add_tail(&desc->desc_node, &dwc->queue); } - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); return cookie; } @@ -826,6 +861,7 @@ static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); struct dw_desc *desc, *_desc; + unsigned long flags; LIST_HEAD(list); /* Only supports DMA_TERMINATE_ALL */ @@ -838,7 +874,7 @@ static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, * channel. We still have to poll the channel enable bit due * to AHB/HSB limitations. */ - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); channel_clear_bit(dw, CH_EN, dwc->mask); @@ -849,7 +885,7 @@ static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, list_splice_init(&dwc->queue, &list); list_splice_init(&dwc->active_list, &list); - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); /* Flush all pending and queued descriptors */ list_for_each_entry_safe(desc, _desc, &list, desc_node) @@ -873,9 +909,7 @@ dwc_tx_status(struct dma_chan *chan, ret = dma_async_is_complete(cookie, last_complete, last_used); if (ret != DMA_SUCCESS) { - spin_lock_bh(&dwc->lock); dwc_scan_descriptors(to_dw_dma(chan->device), dwc); - spin_unlock_bh(&dwc->lock); last_complete = dwc->completed; last_used = chan->cookie; @@ -896,10 +930,8 @@ static void dwc_issue_pending(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - spin_lock_bh(&dwc->lock); if (!list_empty(&dwc->queue)) dwc_scan_descriptors(to_dw_dma(chan->device), dwc); - spin_unlock_bh(&dwc->lock); } static int dwc_alloc_chan_resources(struct dma_chan *chan) @@ -911,6 +943,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) int i; u32 cfghi; u32 cfglo; + unsigned long flags; dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); @@ -948,16 +981,16 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) * doesn't mean what you think it means), and status writeback. */ - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); i = dwc->descs_allocated; while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) { - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL); if (!desc) { dev_info(chan2dev(chan), "only allocated %d descriptors\n", i); - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); break; } @@ -969,7 +1002,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) sizeof(desc->lli), DMA_TO_DEVICE); dwc_desc_put(dwc, desc); - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); i = ++dwc->descs_allocated; } @@ -978,7 +1011,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) channel_set_bit(dw, MASK.BLOCK, dwc->mask); channel_set_bit(dw, MASK.ERROR, dwc->mask); - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); dev_dbg(chan2dev(chan), "alloc_chan_resources allocated %d descriptors\n", i); @@ -991,6 +1024,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); struct dw_desc *desc, *_desc; + unsigned long flags; LIST_HEAD(list); dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n", @@ -1001,7 +1035,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) BUG_ON(!list_empty(&dwc->queue)); BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask); - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); list_splice_init(&dwc->free_list, &list); dwc->descs_allocated = 0; @@ -1010,7 +1044,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) channel_clear_bit(dw, MASK.BLOCK, dwc->mask); channel_clear_bit(dw, MASK.ERROR, dwc->mask); - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); list_for_each_entry_safe(desc, _desc, &list, desc_node) { dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); @@ -1035,13 +1069,14 @@ int dw_dma_cyclic_start(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(dwc->chan.device); + unsigned long flags; if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { dev_err(chan2dev(&dwc->chan), "missing prep for cyclic DMA\n"); return -ENODEV; } - spin_lock(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); /* assert channel is idle */ if (dma_readl(dw, CH_EN) & dwc->mask) { @@ -1054,7 +1089,7 @@ int dw_dma_cyclic_start(struct dma_chan *chan) channel_readl(dwc, LLP), channel_readl(dwc, CTL_HI), channel_readl(dwc, CTL_LO)); - spin_unlock(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); return -EBUSY; } @@ -1069,7 +1104,7 @@ int dw_dma_cyclic_start(struct dma_chan *chan) channel_set_bit(dw, CH_EN, dwc->mask); - spin_unlock(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); return 0; } @@ -1085,14 +1120,15 @@ void dw_dma_cyclic_stop(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(dwc->chan.device); + unsigned long flags; - spin_lock(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); channel_clear_bit(dw, CH_EN, dwc->mask); while (dma_readl(dw, CH_EN) & dwc->mask) cpu_relax(); - spin_unlock(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); } EXPORT_SYMBOL(dw_dma_cyclic_stop); @@ -1121,17 +1157,18 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, unsigned int reg_width; unsigned int periods; unsigned int i; + unsigned long flags; - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); if (!list_empty(&dwc->queue) || !list_empty(&dwc->active_list)) { - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); dev_dbg(chan2dev(&dwc->chan), "queue and/or active list are not empty\n"); return ERR_PTR(-EBUSY); } was_cyclic = test_and_set_bit(DW_DMA_IS_CYCLIC, &dwc->flags); - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); if (was_cyclic) { dev_dbg(chan2dev(&dwc->chan), "channel already prepared for cyclic DMA\n"); @@ -1245,13 +1282,14 @@ void dw_dma_cyclic_free(struct dma_chan *chan) struct dw_dma *dw = to_dw_dma(dwc->chan.device); struct dw_cyclic_desc *cdesc = dwc->cdesc; int i; + unsigned long flags; dev_dbg(chan2dev(&dwc->chan), "cyclic free\n"); if (!cdesc) return; - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); channel_clear_bit(dw, CH_EN, dwc->mask); while (dma_readl(dw, CH_EN) & dwc->mask) @@ -1261,7 +1299,7 @@ void dw_dma_cyclic_free(struct dma_chan *chan) dma_writel(dw, CLEAR.ERROR, dwc->mask); dma_writel(dw, CLEAR.XFER, dwc->mask); - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); for (i = 0; i < cdesc->periods; i++) dwc_desc_put(dwc, cdesc->desc[i]); -- cgit v1.2.3-70-g09d2 From a7c57cf7d4327c41510f8cbf45b1b970e02c34f8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 19 Apr 2011 08:31:32 +0800 Subject: dmaengine/dw_dmac: implement pause and resume in dwc_control Some peripherals like amba-pl011 needs pause to be implemented in DMA controller drivers. This also returns correct status from dwc_tx_status() in case chan is paused. Signed-off-by: Linus Walleij Signed-off-by: Viresh Kumar Signed-off-by: Vinod Koul --- drivers/dma/dw_dmac.c | 59 ++++++++++++++++++++++++++++++---------------- drivers/dma/dw_dmac_regs.h | 1 + 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 442b98b81e7..eec675bf4f9 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -862,34 +862,50 @@ static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, struct dw_dma *dw = to_dw_dma(chan->device); struct dw_desc *desc, *_desc; unsigned long flags; + u32 cfglo; LIST_HEAD(list); - /* Only supports DMA_TERMINATE_ALL */ - if (cmd != DMA_TERMINATE_ALL) - return -ENXIO; + if (cmd == DMA_PAUSE) { + spin_lock_irqsave(&dwc->lock, flags); - /* - * This is only called when something went wrong elsewhere, so - * we don't really care about the data. Just disable the - * channel. We still have to poll the channel enable bit due - * to AHB/HSB limitations. - */ - spin_lock_irqsave(&dwc->lock, flags); + cfglo = channel_readl(dwc, CFG_LO); + channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP); + while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY)) + cpu_relax(); - channel_clear_bit(dw, CH_EN, dwc->mask); + dwc->paused = true; + spin_unlock_irqrestore(&dwc->lock, flags); + } else if (cmd == DMA_RESUME) { + if (!dwc->paused) + return 0; - while (dma_readl(dw, CH_EN) & dwc->mask) - cpu_relax(); + spin_lock_irqsave(&dwc->lock, flags); - /* active_list entries will end up before queued entries */ - list_splice_init(&dwc->queue, &list); - list_splice_init(&dwc->active_list, &list); + cfglo = channel_readl(dwc, CFG_LO); + channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP); + dwc->paused = false; - spin_unlock_irqrestore(&dwc->lock, flags); + spin_unlock_irqrestore(&dwc->lock, flags); + } else if (cmd == DMA_TERMINATE_ALL) { + spin_lock_irqsave(&dwc->lock, flags); - /* Flush all pending and queued descriptors */ - list_for_each_entry_safe(desc, _desc, &list, desc_node) - dwc_descriptor_complete(dwc, desc, false); + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + + dwc->paused = false; + + /* active_list entries will end up before queued entries */ + list_splice_init(&dwc->queue, &list); + list_splice_init(&dwc->active_list, &list); + + spin_unlock_irqrestore(&dwc->lock, flags); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + dwc_descriptor_complete(dwc, desc, false); + } else + return -ENXIO; return 0; } @@ -923,6 +939,9 @@ dwc_tx_status(struct dma_chan *chan, else dma_set_tx_state(txstate, last_complete, last_used, 0); + if (dwc->paused) + return DMA_PAUSED; + return ret; } diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index 720f821527f..c968597c32a 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -138,6 +138,7 @@ struct dw_dma_chan { void __iomem *ch_regs; u8 mask; u8 priority; + bool paused; spinlock_t lock; -- cgit v1.2.3-70-g09d2 From c47d832bc0155153920e507f075647519bad09a2 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Mon, 16 May 2011 16:38:14 +0200 Subject: nfsd: make local functions static This also fixes a number of sparse warnings. Signed-off-by: Daniel Mack Cc: Trond Myklebust Cc: J. Bruce Fields Cc: Neil Brown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3xdr.c | 2 +- fs/nfsd/nfs4state.c | 4 ++-- fs/nfsd/nfs4xdr.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 7e84a852cda..291f2d2551b 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -842,7 +842,7 @@ out: return rv; } -__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) +static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) { struct svc_fh fh; int err; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a2ea14f40b4..a8e4e91587a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2482,7 +2482,7 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid) return NULL; } -int share_access_to_flags(u32 share_access) +static int share_access_to_flags(u32 share_access) { share_access &= ~NFS4_SHARE_WANT_MASK; @@ -2902,7 +2902,7 @@ out: return status; } -struct lock_manager nfsd4_manager = { +static struct lock_manager nfsd4_manager = { }; static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 195a91d9405..99018110321 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3115,7 +3115,7 @@ nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, return nfserr; } -__be32 +static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_sequence *seq) { -- cgit v1.2.3-70-g09d2 From a0eb221a446f2f6c988430f0b0a13f74b7c2b799 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 18 May 2011 14:18:57 +0200 Subject: dmaengine: move link order Move the dmaengine subsystem up early in the drivers Makefile so DMA is made available early to all drivers, just like e.g. regulators. Now even regulators can use DMA on the same initlevel. As a result we can bump the ste_dma40 and coh901318 dmaengine drivers down one initlevel to subsys_init(). Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/Makefile | 4 +++- drivers/dma/coh901318.c | 2 +- drivers/dma/ste_dma40.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/Makefile b/drivers/Makefile index 3f135b6fb01..e68423da897 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -17,6 +17,9 @@ obj-$(CONFIG_SFI) += sfi/ # was used and do nothing if so obj-$(CONFIG_PNP) += pnp/ obj-$(CONFIG_ARM_AMBA) += amba/ +# Many drivers will want to use DMA so this has to be made available +# really early. +obj-$(CONFIG_DMA_ENGINE) += dma/ obj-$(CONFIG_VIRTIO) += virtio/ obj-$(CONFIG_XEN) += xen/ @@ -92,7 +95,6 @@ obj-$(CONFIG_EISA) += eisa/ obj-y += lguest/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_CPU_IDLE) += cpuidle/ -obj-$(CONFIG_DMA_ENGINE) += dma/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_NEW_LEDS) += leds/ diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index 00deabd9a04..b5a318916d0 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1610,7 +1610,7 @@ int __init coh901318_init(void) { return platform_driver_probe(&coh901318_driver, coh901318_probe); } -arch_initcall(coh901318_init); +subsys_initcall(coh901318_init); void __exit coh901318_exit(void) { diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 65d2535d12e..5d054bb908e 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -2962,4 +2962,4 @@ static int __init stedma40_init(void) { return platform_driver_probe(&d40_driver, d40_probe); } -arch_initcall(stedma40_init); +subsys_initcall(stedma40_init); -- cgit v1.2.3-70-g09d2 From 3d2606f42984613d324ad3047cf503bcddc3880a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 20 May 2011 09:46:54 +0200 Subject: oprofile, x86: Enable preemption during pci device setup in IBS init IBS initialization is a mix of per-core register access and per-node pci device setup. Register access should be pinned to the cpu, but pci setup must run with preemption enabled. This patch better separates the code into non-/preemptible sections and fixes sleeping with preemption disabled. See bug message below. Fixes also freeing the eilvt entry by introducing put_eilvt(). BUG: sleeping function called from invalid context at mm/slub.c:824 in_atomic(): 1, irqs_disabled(): 0, pid: 32357, name: modprobe INFO: lockdep is turned off. Pid: 32357, comm: modprobe Not tainted 2.6.39-rc7+ #14 Call Trace: [] __might_sleep+0x112/0x117 [] kmem_cache_alloc_trace+0x4b/0xe7 [] kzalloc.constprop.0+0x29/0x2b [] pci_get_subsys+0x36/0x78 [] ? setup_APIC_eilvt+0xfb/0x139 [] pci_get_device+0x16/0x18 [] op_amd_init+0xd3/0x211 [oprofile] [] ? 0xffffffffa064cfff [] op_nmi_init+0x21e/0x26a [oprofile] [] oprofile_arch_init+0xe/0x26 [oprofile] [] oprofile_init+0x10/0x42 [oprofile] [] do_one_initcall+0x7f/0x13a [] sys_init_module+0x132/0x281 [] system_call_fastpath+0x16/0x1b Reported-by: Dave Jones Cc: [2.6.37.x] Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 95 +++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index c3b8e24f2b1..9fd8a567fe1 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -316,16 +316,23 @@ static void op_amd_stop_ibs(void) wrmsrl(MSR_AMD64_IBSOPCTL, 0); } -static inline int eilvt_is_available(int offset) +static inline int get_eilvt(int offset) { - /* check if we may assign a vector */ return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); } +static inline int put_eilvt(int offset) +{ + return !setup_APIC_eilvt(offset, 0, 0, 1); +} + static inline int ibs_eilvt_valid(void) { int offset; u64 val; + int valid = 0; + + preempt_disable(); rdmsrl(MSR_AMD64_IBSCTL, val); offset = val & IBSCTL_LVT_OFFSET_MASK; @@ -333,16 +340,20 @@ static inline int ibs_eilvt_valid(void) if (!(val & IBSCTL_LVT_OFFSET_VALID)) { pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - return 0; + goto out; } - if (!eilvt_is_available(offset)) { + if (!get_eilvt(offset)) { pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - return 0; + goto out; } - return 1; + valid = 1; +out: + preempt_enable(); + + return valid; } static inline int get_ibs_offset(void) @@ -600,67 +611,69 @@ static int setup_ibs_ctl(int ibs_eilvt_off) static int force_ibs_eilvt_setup(void) { - int i; + int offset; int ret; - /* find the next free available EILVT entry */ - for (i = 1; i < 4; i++) { - if (!eilvt_is_available(i)) - continue; - ret = setup_ibs_ctl(i); - if (ret) - return ret; - pr_err(FW_BUG "using offset %d for IBS interrupts\n", i); - return 0; + /* + * find the next free available EILVT entry, skip offset 0, + * pin search to this cpu + */ + preempt_disable(); + for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { + if (get_eilvt(offset)) + break; } + preempt_enable(); - printk(KERN_DEBUG "No EILVT entry available\n"); - - return -EBUSY; -} - -static int __init_ibs_nmi(void) -{ - int ret; - - if (ibs_eilvt_valid()) - return 0; + if (offset == APIC_EILVT_NR_MAX) { + printk(KERN_DEBUG "No EILVT entry available\n"); + return -EBUSY; + } - ret = force_ibs_eilvt_setup(); + ret = setup_ibs_ctl(offset); if (ret) - return ret; + goto out; - if (!ibs_eilvt_valid()) - return -EFAULT; + if (!ibs_eilvt_valid()) { + ret = -EFAULT; + goto out; + } + pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); return 0; +out: + preempt_disable(); + put_eilvt(offset); + preempt_enable(); + return ret; } /* * check and reserve APIC extended interrupt LVT offset for IBS if * available - * - * init_ibs() preforms implicitly cpu-local operations, so pin this - * thread to its current CPU */ static void init_ibs(void) { - preempt_disable(); - ibs_caps = get_ibs_caps(); + if (!ibs_caps) + return; + + if (ibs_eilvt_valid()) goto out; - if (__init_ibs_nmi() < 0) - ibs_caps = 0; - else - printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); + if (!force_ibs_eilvt_setup()) + goto out; + + /* Failed to setup ibs */ + ibs_caps = 0; + return; out: - preempt_enable(); + printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); } static int (*create_arch_files)(struct super_block *sb, struct dentry *root); -- cgit v1.2.3-70-g09d2 From 74b2107543da4ed9607ec484f63c42362dc9fca6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Apr 2011 12:02:53 -0400 Subject: Btrfs: make sure to use the delalloc reserve when filling delalloc In the prealloc filling code and compressed code we don't set trans->block_rsv to the delalloc block reserve properly, which is going to make us use metadata from the wrong pool, this patch fixes that. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cd8ab0ef04..3b9f1643aa5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -619,6 +619,7 @@ retry: trans = btrfs_join_transaction(root, 1); BUG_ON(IS_ERR(trans)); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_reserve_extent(trans, root, async_extent->compressed_size, async_extent->compressed_size, @@ -1060,6 +1061,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, trans = btrfs_join_transaction(root, 1); } BUG_ON(IS_ERR(trans)); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; cow_start = (u64)-1; cur_offset = start; -- cgit v1.2.3-70-g09d2 From 7a7eaa40a39bde4eefc91aadeb1ce3dc4e6a1252 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Apr 2011 12:54:33 -0400 Subject: Btrfs: take away the num_items argument from btrfs_join_transaction I keep forgetting that btrfs_join_transaction() just ignores the num_items argument, which leads me to sending pointless patches and looking stupid :). So just kill the num_items argument from btrfs_join_transaction and btrfs_start_ioctl_transaction, since neither of them use it. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 6 +++--- fs/btrfs/extent-tree.c | 12 ++++++------ fs/btrfs/inode.c | 34 +++++++++++++++++----------------- fs/btrfs/ioctl.c | 4 ++-- fs/btrfs/relocation.c | 12 ++++++------ fs/btrfs/transaction.c | 13 +++++-------- fs/btrfs/transaction.h | 9 +++------ 7 files changed, 42 insertions(+), 48 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 228cf36ece8..9d6c9e332ca 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1568,7 +1568,7 @@ static int transaction_kthread(void *arg) transid = cur->transid; spin_unlock(&root->fs_info->new_trans_lock); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); if (transid == trans->transid) { ret = btrfs_commit_transaction(trans, root); @@ -2495,13 +2495,13 @@ int btrfs_commit_super(struct btrfs_root *root) down_write(&root->fs_info->cleanup_work_sem); up_write(&root->fs_info->cleanup_work_sem); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); /* run commit again to drop the original snapshot */ - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9ee6bd55e16..941b28e7893 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3174,7 +3174,7 @@ again: spin_unlock(&data_sinfo->lock); alloc: alloc_target = btrfs_get_alloc_profile(root, 1); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -3202,7 +3202,7 @@ alloc: commit_trans: if (!committed && !root->fs_info->open_ioctl_trans) { committed = 1; - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); ret = btrfs_commit_transaction(trans, root); @@ -3589,7 +3589,7 @@ again: goto out; ret = -ENOSPC; - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) goto out; ret = btrfs_commit_transaction(trans, root); @@ -3816,7 +3816,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, if (trans) return -EAGAIN; - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); ret = btrfs_commit_transaction(trans, root); return 0; @@ -7649,7 +7649,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) BUG_ON(reloc_root->commit_root != NULL); while (1) { - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); mutex_lock(&root->fs_info->drop_mutex); @@ -8176,7 +8176,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, BUG_ON(cache->ro); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); alloc_flags = update_block_group_flags(root, cache->flags); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3b9f1643aa5..e47bdf0fb75 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -420,7 +420,7 @@ again: } } if (start == 0) { - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -617,7 +617,7 @@ retry: async_extent->start + async_extent->ram_size - 1, GFP_NOFS); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_reserve_extent(trans, root, @@ -779,7 +779,7 @@ static noinline int cow_file_range(struct inode *inode, int ret = 0; BUG_ON(root == root->fs_info->tree_root); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1056,9 +1056,9 @@ static noinline int run_delalloc_nocow(struct inode *inode, BUG_ON(!path); if (root == root->fs_info->tree_root) { nolock = true; - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); } else { - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); } BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1718,9 +1718,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); if (!ret) { if (nolock) - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); else - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1735,9 +1735,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 0, &cached_state, GFP_NOFS); if (nolock) - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); else - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -2415,7 +2415,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) (u64)-1); if (root->orphan_block_rsv || root->orphan_item_inserted) { - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (!IS_ERR(trans)) btrfs_end_transaction(trans, root); } @@ -4378,9 +4378,9 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) { if (nolock) - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); else - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); btrfs_set_trans_block_group(trans, inode); @@ -4407,7 +4407,7 @@ void btrfs_dirty_inode(struct inode *inode) if (BTRFS_I(inode)->dummy_inode) return; - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); @@ -5226,7 +5226,7 @@ again: free_extent_map(em); em = NULL; btrfs_release_path(root, path); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return ERR_CAST(trans); goto again; @@ -5470,7 +5470,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, btrfs_drop_extent_cache(inode, start, start + len - 1, 0); } - trans = btrfs_join_transaction(root, 0); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return ERR_CAST(trans); @@ -5703,7 +5703,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * to make sure the current transaction stays open * while we look for nocow cross refs */ - trans = btrfs_join_transaction(root, 0); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) goto must_cow; @@ -5841,7 +5841,7 @@ again: BUG_ON(!ordered); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { err = -ENOMEM; goto out; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2616f7ed479..908c3d4b48c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -242,7 +242,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); } - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); ret = btrfs_update_inode(trans, root, inode); @@ -2182,7 +2182,7 @@ static long btrfs_ioctl_trans_start(struct file *file) mutex_unlock(&root->fs_info->trans_mutex); ret = -ENOMEM; - trans = btrfs_start_ioctl_transaction(root, 0); + trans = btrfs_start_ioctl_transaction(root); if (IS_ERR(trans)) goto out_drop; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 199a8013431..8bb256667f2 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2149,7 +2149,7 @@ again: err = ret; } - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) { if (!err) btrfs_block_rsv_release(rc->extent_root, @@ -3233,7 +3233,7 @@ truncate: goto out; } - trans = btrfs_join_transaction(root, 0); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { btrfs_free_path(path); ret = PTR_ERR(trans); @@ -3642,7 +3642,7 @@ int prepare_to_relocate(struct reloc_control *rc) rc->create_reloc_tree = 1; set_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); BUG_ON(IS_ERR(trans)); btrfs_commit_transaction(trans, rc->extent_root); return 0; @@ -3831,7 +3831,7 @@ restart: btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); /* get rid of pinned extents */ - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) err = PTR_ERR(trans); else @@ -4156,7 +4156,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) set_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) { unset_reloc_control(rc); err = PTR_ERR(trans); @@ -4190,7 +4190,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) unset_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) err = PTR_ERR(trans); else diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c571734d5e5..70bfb26df96 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -257,22 +257,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, { return start_transaction(root, num_items, TRANS_START); } -struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, - int num_blocks) +struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) { return start_transaction(root, 0, TRANS_JOIN); } -struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, - int num_blocks) +struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) { return start_transaction(root, 0, TRANS_JOIN_NOLOCK); } -struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, - int num_blocks) +struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) { - return start_transaction(r, 0, TRANS_USERSPACE); + return start_transaction(root, 0, TRANS_USERSPACE); } /* wait for a transaction commit to be fully complete */ @@ -1171,7 +1168,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, INIT_DELAYED_WORK(&ac->work, do_async_commit); ac->root = root; - ac->newtrans = btrfs_join_transaction(root, 0); + ac->newtrans = btrfs_join_transaction(root); if (IS_ERR(ac->newtrans)) { int err = PTR_ERR(ac->newtrans); kfree(ac); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e441acc6c58..1f573f09dba 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -92,12 +92,9 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_items); -struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, - int num_blocks); -struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, - int num_blocks); -struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, - int num_blocks); +struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -- cgit v1.2.3-70-g09d2 From 2a1eb4614d984d5cd4c928784e9afcf5c07f93be Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Apr 2011 15:15:59 -0400 Subject: Btrfs: if we've already started a trans handle, use that one We currently track trans handles in current->journal_info, but we don't actually use it. This patch fixes it. This will cover the case where we have multiple people starting transactions down the call chain. This keeps us from having to allocate a new handle and all of that, we just increase the use count of the current handle, save the old block_rsv, and return. I tested this with xfstests and it worked out fine. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 17 +++++++++++++++++ fs/btrfs/transaction.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 70bfb26df96..46f40564c16 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -184,6 +184,15 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) return ERR_PTR(-EROFS); + + if (current->journal_info) { + WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); + h = current->journal_info; + h->use_count++; + h->orig_rsv = h->block_rsv; + h->block_rsv = NULL; + goto got_it; + } again: h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); if (!h) @@ -213,7 +222,9 @@ again: h->block_group = 0; h->bytes_reserved = 0; h->delayed_ref_updates = 0; + h->use_count = 1; h->block_rsv = NULL; + h->orig_rsv = NULL; smp_mb(); if (cur_trans->blocked && may_wait_transaction(root, type)) { @@ -241,6 +252,7 @@ again: } } +got_it: if (type != TRANS_JOIN_NOLOCK) mutex_lock(&root->fs_info->trans_mutex); record_root_in_trans(h, root); @@ -428,6 +440,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info = root->fs_info; int count = 0; + if (--trans->use_count) { + trans->block_rsv = trans->orig_rsv; + return 0; + } + while (count < 4) { unsigned long cur = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 1f573f09dba..154314f80f8 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -47,11 +47,13 @@ struct btrfs_trans_handle { u64 transid; u64 block_group; u64 bytes_reserved; + unsigned long use_count; unsigned long blocks_reserved; unsigned long blocks_used; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; + struct btrfs_block_rsv *orig_rsv; }; struct btrfs_pending_snapshot { -- cgit v1.2.3-70-g09d2 From a4abeea41adfa3c143c289045f4625dfaeba2212 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Apr 2011 17:25:13 -0400 Subject: Btrfs: kill trans_mutex We use trans_mutex for lots of things, here's a basic list 1) To serialize trans_handles joining the currently running transaction 2) To make sure that no new trans handles are started while we are committing 3) To protect the dead_roots list and the transaction lists Really the serializing trans_handles joining is not too hard, and can really get bogged down in acquiring a reference to the transaction. So replace the trans_mutex with a trans_lock spinlock and use it to do the following 1) Protect fs_info->running_transaction. All trans handles have to do is check this, and then take a reference of the transaction and keep on going. 2) Protect the fs_info->trans_list. This doesn't get used too much, basically it just holds the current transactions, which will usually just be the currently committing transaction and the currently running transaction at most. 3) Protect the dead roots list. This is only ever processed by splicing the list so this is relatively simple. 4) Protect the fs_info->reloc_ctl stuff. This is very lightweight and was using the trans_mutex before, so this is a pretty straightforward change. 5) Protect fs_info->no_trans_join. Because we don't hold the trans_lock over the entirety of the commit we need to have a way to block new people from creating a new transaction while we're doing our work. So we set no_trans_join and in join_transaction we test to see if that is set, and if it is we do a wait_on_commit. 6) Make the transaction use count atomic so we don't need to take locks to modify it when we're dropping references. 7) Add a commit_lock to the transaction to make sure multiple people trying to commit the same transaction don't race and commit at the same time. 8) Make open_ioctl_trans an atomic so we don't have to take any locks for ioctl trans. I have tested this with xfstests, but obviously it is a pretty hairy change so lots of testing is greatly appreciated. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 6 +- fs/btrfs/disk-io.c | 30 +++--- fs/btrfs/extent-tree.c | 3 +- fs/btrfs/file.c | 4 +- fs/btrfs/ioctl.c | 12 +-- fs/btrfs/relocation.c | 16 +-- fs/btrfs/transaction.c | 271 ++++++++++++++++++++++++++----------------------- fs/btrfs/transaction.h | 4 +- 8 files changed, 177 insertions(+), 169 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8f4b81de3ae..522a39b0033 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -919,7 +919,6 @@ struct btrfs_fs_info { * is required instead of the faster short fsync log commits */ u64 last_trans_log_full_commit; - u64 open_ioctl_trans; unsigned long mount_opt:20; unsigned long compress_type:4; u64 max_inline; @@ -936,7 +935,6 @@ struct btrfs_fs_info { struct super_block *sb; struct inode *btree_inode; struct backing_dev_info bdi; - struct mutex trans_mutex; struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; @@ -957,6 +955,7 @@ struct btrfs_fs_info { struct rw_semaphore subvol_sem; struct srcu_struct subvol_srcu; + spinlock_t trans_lock; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -969,6 +968,7 @@ struct btrfs_fs_info { atomic_t async_submit_draining; atomic_t nr_async_bios; atomic_t async_delalloc_pages; + atomic_t open_ioctl_trans; /* * this is used by the balancing code to wait for all the pending @@ -1032,6 +1032,7 @@ struct btrfs_fs_info { int closing; int log_root_recovering; int enospc_unlink; + int trans_no_join; u64 total_pinned; @@ -1053,7 +1054,6 @@ struct btrfs_fs_info { struct reloc_control *reloc_ctl; spinlock_t delalloc_lock; - spinlock_t new_trans_lock; u64 delalloc_bytes; /* data_alloc_cluster is only used in ssd mode */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9d6c9e332ca..93ef254ec43 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1551,22 +1551,22 @@ static int transaction_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); - spin_lock(&root->fs_info->new_trans_lock); + spin_lock(&root->fs_info->trans_lock); cur = root->fs_info->running_transaction; if (!cur) { - spin_unlock(&root->fs_info->new_trans_lock); + spin_unlock(&root->fs_info->trans_lock); goto sleep; } now = get_seconds(); if (!cur->blocked && (now < cur->start_time || now - cur->start_time < 30)) { - spin_unlock(&root->fs_info->new_trans_lock); + spin_unlock(&root->fs_info->trans_lock); delay = HZ * 5; goto sleep; } transid = cur->transid; - spin_unlock(&root->fs_info->new_trans_lock); + spin_unlock(&root->fs_info->trans_lock); trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); @@ -1658,7 +1658,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->ordered_operations); INIT_LIST_HEAD(&fs_info->caching_block_groups); spin_lock_init(&fs_info->delalloc_lock); - spin_lock_init(&fs_info->new_trans_lock); + spin_lock_init(&fs_info->trans_lock); spin_lock_init(&fs_info->ref_cache_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); @@ -1687,6 +1687,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb = sb; fs_info->max_inline = 8192 * 1024; fs_info->metadata_ratio = 0; + fs_info->trans_no_join = 0; fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); @@ -1735,7 +1736,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->do_barriers = 1; - mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->chunk_mutex); @@ -3006,10 +3006,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) WARN_ON(1); - mutex_lock(&root->fs_info->trans_mutex); mutex_lock(&root->fs_info->transaction_kthread_mutex); + spin_lock(&root->fs_info->trans_lock); list_splice_init(&root->fs_info->trans_list, &list); + root->fs_info->trans_no_join = 1; + spin_unlock(&root->fs_info->trans_lock); + while (!list_empty(&list)) { t = list_entry(list.next, struct btrfs_transaction, list); if (!t) @@ -3034,23 +3037,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) t->blocked = 0; if (waitqueue_active(&root->fs_info->transaction_wait)) wake_up(&root->fs_info->transaction_wait); - mutex_unlock(&root->fs_info->trans_mutex); - mutex_lock(&root->fs_info->trans_mutex); t->commit_done = 1; if (waitqueue_active(&t->commit_wait)) wake_up(&t->commit_wait); - mutex_unlock(&root->fs_info->trans_mutex); - - mutex_lock(&root->fs_info->trans_mutex); btrfs_destroy_pending_snapshots(t); btrfs_destroy_delalloc_inodes(root); - spin_lock(&root->fs_info->new_trans_lock); + spin_lock(&root->fs_info->trans_lock); root->fs_info->running_transaction = NULL; - spin_unlock(&root->fs_info->new_trans_lock); + spin_unlock(&root->fs_info->trans_lock); btrfs_destroy_marked_extents(root, &t->dirty_pages, EXTENT_DIRTY); @@ -3064,8 +3062,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) kmem_cache_free(btrfs_transaction_cachep, t); } + spin_lock(&root->fs_info->trans_lock); + root->fs_info->trans_no_join = 0; + spin_unlock(&root->fs_info->trans_lock); mutex_unlock(&root->fs_info->transaction_kthread_mutex); - mutex_unlock(&root->fs_info->trans_mutex); return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 941b28e7893..ca599654ce1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3200,7 +3200,8 @@ alloc: /* commit the current transaction and try again */ commit_trans: - if (!committed && !root->fs_info->open_ioctl_trans) { + if (!committed && + !atomic_read(&root->fs_info->open_ioctl_trans)) { committed = 1; trans = btrfs_join_transaction(root); if (IS_ERR(trans)) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 75899a01dde..cd5e82e500c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1222,14 +1222,12 @@ int btrfs_sync_file(struct file *file, int datasync) * the current transaction, we can bail out now without any * syncing */ - mutex_lock(&root->fs_info->trans_mutex); + smp_mb(); if (BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed) { BTRFS_I(inode)->last_trans = 0; - mutex_unlock(&root->fs_info->trans_mutex); goto out; } - mutex_unlock(&root->fs_info->trans_mutex); /* * ok we haven't committed the transaction yet, lets do a commit diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 908c3d4b48c..a578620e06a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2177,9 +2177,7 @@ static long btrfs_ioctl_trans_start(struct file *file) if (ret) goto out; - mutex_lock(&root->fs_info->trans_mutex); - root->fs_info->open_ioctl_trans++; - mutex_unlock(&root->fs_info->trans_mutex); + atomic_inc(&root->fs_info->open_ioctl_trans); ret = -ENOMEM; trans = btrfs_start_ioctl_transaction(root); @@ -2190,9 +2188,7 @@ static long btrfs_ioctl_trans_start(struct file *file) return 0; out_drop: - mutex_lock(&root->fs_info->trans_mutex); - root->fs_info->open_ioctl_trans--; - mutex_unlock(&root->fs_info->trans_mutex); + atomic_dec(&root->fs_info->open_ioctl_trans); mnt_drop_write(file->f_path.mnt); out: return ret; @@ -2426,9 +2422,7 @@ long btrfs_ioctl_trans_end(struct file *file) btrfs_end_transaction(trans, root); - mutex_lock(&root->fs_info->trans_mutex); - root->fs_info->open_ioctl_trans--; - mutex_unlock(&root->fs_info->trans_mutex); + atomic_dec(&root->fs_info->open_ioctl_trans); mnt_drop_write(file->f_path.mnt); return 0; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 8bb256667f2..09c30d37d43 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2136,10 +2136,10 @@ int prepare_to_merge(struct reloc_control *rc, int err) u64 num_bytes = 0; int ret; - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&root->fs_info->trans_lock); rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; rc->merging_rsv_size += rc->nodes_relocated * 2; - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); again: if (!err) { num_bytes = rc->merging_rsv_size; @@ -2208,9 +2208,9 @@ int merge_reloc_roots(struct reloc_control *rc) int ret; again: root = rc->extent_root; - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&root->fs_info->trans_lock); list_splice_init(&rc->reloc_roots, &reloc_roots); - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); while (!list_empty(&reloc_roots)) { found = 1; @@ -3583,17 +3583,17 @@ next: static void set_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - mutex_lock(&fs_info->trans_mutex); + spin_lock(&fs_info->trans_lock); fs_info->reloc_ctl = rc; - mutex_unlock(&fs_info->trans_mutex); + spin_unlock(&fs_info->trans_lock); } static void unset_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - mutex_lock(&fs_info->trans_mutex); + spin_lock(&fs_info->trans_lock); fs_info->reloc_ctl = NULL; - mutex_unlock(&fs_info->trans_mutex); + spin_unlock(&fs_info->trans_lock); } static int check_extent_flags(u64 flags) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 46f40564c16..43816f8b23e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -34,6 +34,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(atomic_read(&transaction->use_count) == 0); if (atomic_dec_and_test(&transaction->use_count)) { + BUG_ON(!list_empty(&transaction->list)); memset(transaction, 0, sizeof(*transaction)); kmem_cache_free(btrfs_transaction_cachep, transaction); } @@ -48,47 +49,73 @@ static noinline void switch_commit_root(struct btrfs_root *root) /* * either allocate a new transaction or hop into the existing one */ -static noinline int join_transaction(struct btrfs_root *root) +static noinline int join_transaction(struct btrfs_root *root, int nofail) { struct btrfs_transaction *cur_trans; + + spin_lock(&root->fs_info->trans_lock); + if (root->fs_info->trans_no_join) { + if (!nofail) { + spin_unlock(&root->fs_info->trans_lock); + return -EBUSY; + } + } + cur_trans = root->fs_info->running_transaction; - if (!cur_trans) { - cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, - GFP_NOFS); - if (!cur_trans) - return -ENOMEM; - root->fs_info->generation++; - atomic_set(&cur_trans->num_writers, 1); - cur_trans->num_joined = 0; - cur_trans->transid = root->fs_info->generation; - init_waitqueue_head(&cur_trans->writer_wait); - init_waitqueue_head(&cur_trans->commit_wait); - cur_trans->in_commit = 0; - cur_trans->blocked = 0; - atomic_set(&cur_trans->use_count, 1); - cur_trans->commit_done = 0; - cur_trans->start_time = get_seconds(); - - cur_trans->delayed_refs.root = RB_ROOT; - cur_trans->delayed_refs.num_entries = 0; - cur_trans->delayed_refs.num_heads_ready = 0; - cur_trans->delayed_refs.num_heads = 0; - cur_trans->delayed_refs.flushing = 0; - cur_trans->delayed_refs.run_delayed_start = 0; - spin_lock_init(&cur_trans->delayed_refs.lock); - - INIT_LIST_HEAD(&cur_trans->pending_snapshots); - list_add_tail(&cur_trans->list, &root->fs_info->trans_list); - extent_io_tree_init(&cur_trans->dirty_pages, - root->fs_info->btree_inode->i_mapping, - GFP_NOFS); - spin_lock(&root->fs_info->new_trans_lock); - root->fs_info->running_transaction = cur_trans; - spin_unlock(&root->fs_info->new_trans_lock); - } else { + if (cur_trans) { + atomic_inc(&cur_trans->use_count); + atomic_inc(&cur_trans->num_writers); + cur_trans->num_joined++; + spin_unlock(&root->fs_info->trans_lock); + return 0; + } + spin_unlock(&root->fs_info->trans_lock); + + cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); + if (!cur_trans) + return -ENOMEM; + spin_lock(&root->fs_info->trans_lock); + if (root->fs_info->running_transaction) { + kmem_cache_free(btrfs_transaction_cachep, cur_trans); + cur_trans = root->fs_info->running_transaction; + atomic_inc(&cur_trans->use_count); atomic_inc(&cur_trans->num_writers); cur_trans->num_joined++; + spin_unlock(&root->fs_info->trans_lock); + return 0; } + atomic_set(&cur_trans->num_writers, 1); + cur_trans->num_joined = 0; + init_waitqueue_head(&cur_trans->writer_wait); + init_waitqueue_head(&cur_trans->commit_wait); + cur_trans->in_commit = 0; + cur_trans->blocked = 0; + /* + * One for this trans handle, one so it will live on until we + * commit the transaction. + */ + atomic_set(&cur_trans->use_count, 2); + cur_trans->commit_done = 0; + cur_trans->start_time = get_seconds(); + + cur_trans->delayed_refs.root = RB_ROOT; + cur_trans->delayed_refs.num_entries = 0; + cur_trans->delayed_refs.num_heads_ready = 0; + cur_trans->delayed_refs.num_heads = 0; + cur_trans->delayed_refs.flushing = 0; + cur_trans->delayed_refs.run_delayed_start = 0; + spin_lock_init(&cur_trans->commit_lock); + spin_lock_init(&cur_trans->delayed_refs.lock); + + INIT_LIST_HEAD(&cur_trans->pending_snapshots); + list_add_tail(&cur_trans->list, &root->fs_info->trans_list); + extent_io_tree_init(&cur_trans->dirty_pages, + root->fs_info->btree_inode->i_mapping, + GFP_NOFS); + root->fs_info->generation++; + cur_trans->transid = root->fs_info->generation; + root->fs_info->running_transaction = cur_trans; + spin_unlock(&root->fs_info->trans_lock); return 0; } @@ -99,39 +126,28 @@ static noinline int join_transaction(struct btrfs_root *root) * to make sure the old root from before we joined the transaction is deleted * when the transaction commits */ -static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { if (root->ref_cows && root->last_trans < trans->transid) { WARN_ON(root == root->fs_info->extent_root); WARN_ON(root->commit_root != root->node); + spin_lock(&root->fs_info->fs_roots_radix_lock); + if (root->last_trans == trans->transid) { + spin_unlock(&root->fs_info->fs_roots_radix_lock); + return 0; + } + root->last_trans = trans->transid; radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); - root->last_trans = trans->transid; + spin_unlock(&root->fs_info->fs_roots_radix_lock); btrfs_init_reloc_root(trans, root); } return 0; } -int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - if (!root->ref_cows) - return 0; - - mutex_lock(&root->fs_info->trans_mutex); - if (root->last_trans == trans->transid) { - mutex_unlock(&root->fs_info->trans_mutex); - return 0; - } - - record_root_in_trans(trans, root); - mutex_unlock(&root->fs_info->trans_mutex); - return 0; -} - /* wait for commit against the current transaction to become unblocked * when this is done, it is safe to start a new transaction, but the current * transaction might not be fully on disk. @@ -140,21 +156,23 @@ static void wait_current_trans(struct btrfs_root *root) { struct btrfs_transaction *cur_trans; + spin_lock(&root->fs_info->trans_lock); cur_trans = root->fs_info->running_transaction; if (cur_trans && cur_trans->blocked) { DEFINE_WAIT(wait); atomic_inc(&cur_trans->use_count); + spin_unlock(&root->fs_info->trans_lock); while (1) { prepare_to_wait(&root->fs_info->transaction_wait, &wait, TASK_UNINTERRUPTIBLE); if (!cur_trans->blocked) break; - mutex_unlock(&root->fs_info->trans_mutex); schedule(); - mutex_lock(&root->fs_info->trans_mutex); } finish_wait(&root->fs_info->transaction_wait, &wait); put_transaction(cur_trans); + } else { + spin_unlock(&root->fs_info->trans_lock); } } @@ -167,10 +185,16 @@ enum btrfs_trans_type { static int may_wait_transaction(struct btrfs_root *root, int type) { - if (!root->fs_info->log_root_recovering && - ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || - type == TRANS_USERSPACE)) + if (root->fs_info->log_root_recovering) + return 0; + + if (type == TRANS_USERSPACE) + return 1; + + if (type == TRANS_START && + !atomic_read(&root->fs_info->open_ioctl_trans)) return 1; + return 0; } @@ -198,23 +222,21 @@ again: if (!h) return ERR_PTR(-ENOMEM); - if (type != TRANS_JOIN_NOLOCK) - mutex_lock(&root->fs_info->trans_mutex); if (may_wait_transaction(root, type)) wait_current_trans(root); - ret = join_transaction(root); + do { + ret = join_transaction(root, type == TRANS_JOIN_NOLOCK); + if (ret == -EBUSY) + wait_current_trans(root); + } while (ret == -EBUSY); + if (ret < 0) { kmem_cache_free(btrfs_trans_handle_cachep, h); - if (type != TRANS_JOIN_NOLOCK) - mutex_unlock(&root->fs_info->trans_mutex); return ERR_PTR(ret); } cur_trans = root->fs_info->running_transaction; - atomic_inc(&cur_trans->use_count); - if (type != TRANS_JOIN_NOLOCK) - mutex_unlock(&root->fs_info->trans_mutex); h->transid = cur_trans->transid; h->transaction = cur_trans; @@ -253,11 +275,7 @@ again: } got_it: - if (type != TRANS_JOIN_NOLOCK) - mutex_lock(&root->fs_info->trans_mutex); - record_root_in_trans(h, root); - if (type != TRANS_JOIN_NOLOCK) - mutex_unlock(&root->fs_info->trans_mutex); + btrfs_record_root_in_trans(h, root); if (!current->journal_info && type != TRANS_USERSPACE) current->journal_info = h; @@ -289,17 +307,13 @@ static noinline int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { DEFINE_WAIT(wait); - mutex_lock(&root->fs_info->trans_mutex); while (!commit->commit_done) { prepare_to_wait(&commit->commit_wait, &wait, TASK_UNINTERRUPTIBLE); if (commit->commit_done) break; - mutex_unlock(&root->fs_info->trans_mutex); schedule(); - mutex_lock(&root->fs_info->trans_mutex); } - mutex_unlock(&root->fs_info->trans_mutex); finish_wait(&commit->commit_wait, &wait); return 0; } @@ -309,50 +323,49 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) struct btrfs_transaction *cur_trans = NULL, *t; int ret; - mutex_lock(&root->fs_info->trans_mutex); - ret = 0; if (transid) { if (transid <= root->fs_info->last_trans_committed) - goto out_unlock; + goto out; /* find specified transaction */ + spin_lock(&root->fs_info->trans_lock); list_for_each_entry(t, &root->fs_info->trans_list, list) { if (t->transid == transid) { cur_trans = t; + atomic_inc(&cur_trans->use_count); break; } if (t->transid > transid) break; } + spin_unlock(&root->fs_info->trans_lock); ret = -EINVAL; if (!cur_trans) - goto out_unlock; /* bad transid */ + goto out; /* bad transid */ } else { /* find newest transaction that is committing | committed */ + spin_lock(&root->fs_info->trans_lock); list_for_each_entry_reverse(t, &root->fs_info->trans_list, list) { if (t->in_commit) { if (t->commit_done) - goto out_unlock; + goto out; cur_trans = t; + atomic_inc(&cur_trans->use_count); break; } } + spin_unlock(&root->fs_info->trans_lock); if (!cur_trans) - goto out_unlock; /* nothing committing|committed */ + goto out; /* nothing committing|committed */ } - atomic_inc(&cur_trans->use_count); - mutex_unlock(&root->fs_info->trans_mutex); - wait_for_commit(root, cur_trans); - mutex_lock(&root->fs_info->trans_mutex); put_transaction(cur_trans); ret = 0; -out_unlock: - mutex_unlock(&root->fs_info->trans_mutex); +out: return ret; } @@ -401,10 +414,8 @@ harder: void btrfs_throttle(struct btrfs_root *root) { - mutex_lock(&root->fs_info->trans_mutex); - if (!root->fs_info->open_ioctl_trans) + if (!atomic_read(&root->fs_info->open_ioctl_trans)) wait_current_trans(root); - mutex_unlock(&root->fs_info->trans_mutex); } static int should_end_transaction(struct btrfs_trans_handle *trans, @@ -422,6 +433,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans = trans->transaction; int updates; + smp_mb(); if (cur_trans->blocked || cur_trans->delayed_refs.flushing) return 1; @@ -467,9 +479,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_metadata(trans, root); - if (lock && !root->fs_info->open_ioctl_trans && - should_end_transaction(trans, root)) + if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && + should_end_transaction(trans, root)) { trans->transaction->blocked = 1; + smp_wmb(); + } if (lock && cur_trans->blocked && !cur_trans->in_commit) { if (throttle) @@ -739,9 +753,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, */ int btrfs_add_dead_root(struct btrfs_root *root) { - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&root->fs_info->trans_lock); list_add(&root->root_list, &root->fs_info->dead_roots); - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); return 0; } @@ -757,6 +771,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, int ret; int err = 0; + spin_lock(&fs_info->fs_roots_radix_lock); while (1) { ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, (void **)gang, 0, @@ -769,6 +784,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); + spin_unlock(&fs_info->fs_roots_radix_lock); btrfs_free_log(trans, root); btrfs_update_reloc_root(trans, root); @@ -783,10 +799,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, err = btrfs_update_root(trans, fs_info->tree_root, &root->root_key, &root->root_item); + spin_lock(&fs_info->fs_roots_radix_lock); if (err) break; } } + spin_unlock(&fs_info->fs_roots_radix_lock); return err; } @@ -972,7 +990,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, parent = dget_parent(dentry); parent_inode = parent->d_inode; parent_root = BTRFS_I(parent_inode)->root; - record_root_in_trans(trans, parent_root); + btrfs_record_root_in_trans(trans, parent_root); /* * insert the directory item @@ -990,7 +1008,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, parent_root, parent_inode); BUG_ON(ret); - record_root_in_trans(trans, root); + btrfs_record_root_in_trans(trans, root); btrfs_set_root_last_snapshot(&root->root_item, trans->transid); memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); btrfs_check_and_init_root_item(new_root_item); @@ -1080,20 +1098,20 @@ static void update_super_roots(struct btrfs_root *root) int btrfs_transaction_in_commit(struct btrfs_fs_info *info) { int ret = 0; - spin_lock(&info->new_trans_lock); + spin_lock(&info->trans_lock); if (info->running_transaction) ret = info->running_transaction->in_commit; - spin_unlock(&info->new_trans_lock); + spin_unlock(&info->trans_lock); return ret; } int btrfs_transaction_blocked(struct btrfs_fs_info *info) { int ret = 0; - spin_lock(&info->new_trans_lock); + spin_lock(&info->trans_lock); if (info->running_transaction) ret = info->running_transaction->blocked; - spin_unlock(&info->new_trans_lock); + spin_unlock(&info->trans_lock); return ret; } @@ -1117,9 +1135,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, &wait); break; } - mutex_unlock(&root->fs_info->trans_mutex); schedule(); - mutex_lock(&root->fs_info->trans_mutex); finish_wait(&root->fs_info->transaction_blocked_wait, &wait); } } @@ -1145,9 +1161,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, &wait); break; } - mutex_unlock(&root->fs_info->trans_mutex); schedule(); - mutex_lock(&root->fs_info->trans_mutex); finish_wait(&root->fs_info->transaction_wait, &wait); } @@ -1193,22 +1207,18 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, } /* take transaction reference */ - mutex_lock(&root->fs_info->trans_mutex); cur_trans = trans->transaction; atomic_inc(&cur_trans->use_count); - mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); schedule_delayed_work(&ac->work, 0); /* wait for transaction to start and unblock */ - mutex_lock(&root->fs_info->trans_mutex); if (wait_for_unblock) wait_current_trans_commit_start_and_unblock(root, cur_trans); else wait_current_trans_commit_start(root, cur_trans); put_transaction(cur_trans); - mutex_unlock(&root->fs_info->trans_mutex); return 0; } @@ -1252,38 +1262,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_run_delayed_refs(trans, root, 0); BUG_ON(ret); - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&cur_trans->commit_lock); if (cur_trans->in_commit) { + spin_unlock(&cur_trans->commit_lock); atomic_inc(&cur_trans->use_count); - mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); ret = wait_for_commit(root, cur_trans); BUG_ON(ret); - mutex_lock(&root->fs_info->trans_mutex); put_transaction(cur_trans); - mutex_unlock(&root->fs_info->trans_mutex); return 0; } trans->transaction->in_commit = 1; trans->transaction->blocked = 1; + spin_unlock(&cur_trans->commit_lock); wake_up(&root->fs_info->transaction_blocked_wait); + spin_lock(&root->fs_info->trans_lock); if (cur_trans->list.prev != &root->fs_info->trans_list) { prev_trans = list_entry(cur_trans->list.prev, struct btrfs_transaction, list); if (!prev_trans->commit_done) { atomic_inc(&prev_trans->use_count); - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); wait_for_commit(root, prev_trans); - mutex_lock(&root->fs_info->trans_mutex); put_transaction(prev_trans); + } else { + spin_unlock(&root->fs_info->trans_lock); } + } else { + spin_unlock(&root->fs_info->trans_lock); } if (now < cur_trans->start_time || now - cur_trans->start_time < 1) @@ -1291,12 +1304,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, do { int snap_pending = 0; + joined = cur_trans->num_joined; if (!list_empty(&trans->transaction->pending_snapshots)) snap_pending = 1; WARN_ON(cur_trans != trans->transaction); - mutex_unlock(&root->fs_info->trans_mutex); if (flush_on_commit || snap_pending) { btrfs_start_delalloc_inodes(root, 1); @@ -1316,14 +1329,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, prepare_to_wait(&cur_trans->writer_wait, &wait, TASK_UNINTERRUPTIBLE); - smp_mb(); if (atomic_read(&cur_trans->num_writers) > 1) schedule_timeout(MAX_SCHEDULE_TIMEOUT); else if (should_grow) schedule_timeout(1); - mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); + spin_lock(&root->fs_info->trans_lock); + root->fs_info->trans_no_join = 1; + spin_unlock(&root->fs_info->trans_lock); } while (atomic_read(&cur_trans->num_writers) > 1 || (should_grow && cur_trans->num_joined != joined)); @@ -1364,9 +1378,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_prepare_extent_commit(trans, root); cur_trans = root->fs_info->running_transaction; - spin_lock(&root->fs_info->new_trans_lock); - root->fs_info->running_transaction = NULL; - spin_unlock(&root->fs_info->new_trans_lock); btrfs_set_root_node(&root->fs_info->tree_root->root_item, root->fs_info->tree_root->node); @@ -1387,10 +1398,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, sizeof(root->fs_info->super_copy)); trans->transaction->blocked = 0; + spin_lock(&root->fs_info->trans_lock); + root->fs_info->running_transaction = NULL; + root->fs_info->trans_no_join = 0; + spin_unlock(&root->fs_info->trans_lock); wake_up(&root->fs_info->transaction_wait); - mutex_unlock(&root->fs_info->trans_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root, 0); @@ -1403,22 +1417,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_finish_extent_commit(trans, root); - mutex_lock(&root->fs_info->trans_mutex); - cur_trans->commit_done = 1; root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); + spin_lock(&root->fs_info->trans_lock); list_del_init(&cur_trans->list); + spin_unlock(&root->fs_info->trans_lock); + put_transaction(cur_trans); put_transaction(cur_trans); trace_btrfs_transaction_commit(root); - mutex_unlock(&root->fs_info->trans_mutex); - if (current->journal_info == trans) current->journal_info = NULL; @@ -1438,9 +1451,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) LIST_HEAD(list); struct btrfs_fs_info *fs_info = root->fs_info; - mutex_lock(&fs_info->trans_mutex); + spin_lock(&fs_info->trans_lock); list_splice_init(&fs_info->dead_roots, &list); - mutex_unlock(&fs_info->trans_mutex); + spin_unlock(&fs_info->trans_lock); while (!list_empty(&list)) { root = list_entry(list.next, struct btrfs_root, root_list); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 154314f80f8..11c6efcd4ed 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -28,10 +28,12 @@ struct btrfs_transaction { * transaction can end */ atomic_t num_writers; + atomic_t use_count; unsigned long num_joined; + + spinlock_t commit_lock; int in_commit; - atomic_t use_count; int commit_done; int blocked; struct list_head list; -- cgit v1.2.3-70-g09d2 From fcb80c2affd63237cff5b34cba5756be7c976a5a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 May 2011 10:40:22 -0400 Subject: Btrfs: fix how we do space reservation for truncate The ceph guys keep running into problems where we have space reserved in our orphan block rsv when freeing it up. This is because they tend to do snapshots alot, so their truncates tend to use a bunch of space, so when we go to do things like update the inode we have to steal reservation space in order to make the reservation happen. This happens because truncate can use as much space as it freaking feels like, but we still have to hold space for removing the orphan item and updating the inode, which will definitely always happen. So in order to fix this we need to split all of the reservation stuf up. So with this patch we have 1) The orphan block reserve which only holds the space for deleting our orphan item when everything is over. 2) The truncate block reserve which gets allocated and used specifically for the space that the truncate will use on a per truncate basis. 3) The transaction will always have 1 item's worth of data reserved so we can update the inode normally. Hopefully this will make the ceph problem go away. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 3 ++ fs/btrfs/extent-tree.c | 46 +++++++++++++++----- fs/btrfs/inode.c | 111 +++++++++++++++++++++++++++++++++++++------------ 3 files changed, 123 insertions(+), 37 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 522a39b0033..f31aed7fedd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2224,6 +2224,9 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, void btrfs_block_rsv_release(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes); +int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_block_rsv *rsv); int btrfs_set_block_group_ro(struct btrfs_root *root, struct btrfs_block_group_cache *cache); int btrfs_set_block_group_rw(struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ca599654ce1..a2ca561c70f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3980,6 +3980,37 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) 3 * num_items; } +int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_block_rsv *rsv) +{ + struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; + u64 num_bytes; + int ret; + + /* + * Truncate should be freeing data, but give us 2 items just in case it + * needs to use some space. We may want to be smarter about this in the + * future. + */ + num_bytes = calc_trans_metadata_size(root, 2); + + /* We already have enough bytes, just return */ + if (rsv->reserved >= num_bytes) + return 0; + + num_bytes -= rsv->reserved; + + /* + * You should have reserved enough space before hand to do this, so this + * should not fail. + */ + ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); + BUG_ON(ret); + + return 0; +} + int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root, int num_items) @@ -4020,23 +4051,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; /* - * one for deleting orphan item, one for updating inode and - * two for calling btrfs_truncate_inode_items. - * - * btrfs_truncate_inode_items is a delete operation, it frees - * more space than it uses in most cases. So two units of - * metadata space should be enough for calling it many times. - * If all of the metadata space is used, we can commit - * transaction and use space it freed. + * We need to hold space in order to delete our orphan item once we've + * added it, so this takes the reservation so we can release it later + * when we are truly done with the orphan item. */ - u64 num_bytes = calc_trans_metadata_size(root, 4); + u64 num_bytes = calc_trans_metadata_size(root, 1); return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); } void btrfs_orphan_release_metadata(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 num_bytes = calc_trans_metadata_size(root, 4); + u64 num_bytes = calc_trans_metadata_size(root, 1); btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e47bdf0fb75..bc12ba23db5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6591,6 +6591,7 @@ out: static int btrfs_truncate(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_block_rsv *rsv; int ret; int err = 0; struct btrfs_trans_handle *trans; @@ -6604,28 +6605,83 @@ static int btrfs_truncate(struct inode *inode) btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_ordered_update_i_size(inode, inode->i_size, NULL); - trans = btrfs_start_transaction(root, 5); - if (IS_ERR(trans)) - return PTR_ERR(trans); + /* + * Yes ladies and gentelment, this is indeed ugly. The fact is we have + * 3 things going on here + * + * 1) We need to reserve space for our orphan item and the space to + * delete our orphan item. Lord knows we don't want to have a dangling + * orphan item because we didn't reserve space to remove it. + * + * 2) We need to reserve space to update our inode. + * + * 3) We need to have something to cache all the space that is going to + * be free'd up by the truncate operation, but also have some slack + * space reserved in case it uses space during the truncate (thank you + * very much snapshotting). + * + * And we need these to all be seperate. The fact is we can use alot of + * space doing the truncate, and we have no earthly idea how much space + * we will use, so we need the truncate reservation to be seperate so it + * doesn't end up using space reserved for updating the inode or + * removing the orphan item. We also need to be able to stop the + * transaction and start a new one, which means we need to be able to + * update the inode several times, and we have no idea of knowing how + * many times that will be, so we can't just reserve 1 item for the + * entirety of the opration, so that has to be done seperately as well. + * Then there is the orphan item, which does indeed need to be held on + * to for the whole operation, and we need nobody to touch this reserved + * space except the orphan code. + * + * So that leaves us with + * + * 1) root->orphan_block_rsv - for the orphan deletion. + * 2) rsv - for the truncate reservation, which we will steal from the + * transaction reservation. + * 3) fs_info->trans_block_rsv - this will have 1 items worth left for + * updating the inode. + */ + rsv = btrfs_alloc_block_rsv(root); + if (!rsv) + return -ENOMEM; + btrfs_add_durable_block_rsv(root->fs_info, rsv); + + trans = btrfs_start_transaction(root, 4); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out; + } btrfs_set_trans_block_group(trans, inode); + /* + * Reserve space for the truncate process. Truncate should be adding + * space, but if there are snapshots it may end up using space. + */ + ret = btrfs_truncate_reserve_metadata(trans, root, rsv); + BUG_ON(ret); + ret = btrfs_orphan_add(trans, inode); if (ret) { btrfs_end_transaction(trans, root); - return ret; + goto out; } nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); - /* Now start a transaction for the truncate */ - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); + /* + * Ok so we've already migrated our bytes over for the truncate, so here + * just reserve the one slot we need for updating the inode. + */ + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out; + } btrfs_set_trans_block_group(trans, inode); - trans->block_rsv = root->orphan_block_rsv; + trans->block_rsv = rsv; /* * setattr is responsible for setting the ordered_data_close flag, @@ -6649,24 +6705,18 @@ static int btrfs_truncate(struct inode *inode) while (1) { if (!trans) { - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, inode); - trans->block_rsv = root->orphan_block_rsv; - } + trans = btrfs_start_transaction(root, 3); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out; + } - ret = btrfs_block_rsv_check(trans, root, - root->orphan_block_rsv, 0, 5); - if (ret == -EAGAIN) { - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; - trans = NULL; - continue; - } else if (ret) { - err = ret; - break; + ret = btrfs_truncate_reserve_metadata(trans, root, + rsv); + BUG_ON(ret); + + btrfs_set_trans_block_group(trans, inode); + trans->block_rsv = rsv; } ret = btrfs_truncate_inode_items(trans, root, inode, @@ -6677,6 +6727,7 @@ static int btrfs_truncate(struct inode *inode) break; } + trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); if (ret) { err = ret; @@ -6690,6 +6741,7 @@ static int btrfs_truncate(struct inode *inode) } if (ret == 0 && inode->i_nlink > 0) { + trans->block_rsv = root->orphan_block_rsv; ret = btrfs_orphan_del(trans, inode); if (ret) err = ret; @@ -6701,15 +6753,20 @@ static int btrfs_truncate(struct inode *inode) ret = btrfs_orphan_del(NULL, inode); } + trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); if (ret && !err) err = ret; nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); + btrfs_btree_balance_dirty(root, nr); + +out: + btrfs_free_block_rsv(root, rsv); + if (ret && !err) err = ret; - btrfs_btree_balance_dirty(root, nr); return err; } -- cgit v1.2.3-70-g09d2 From af60bed24eb0e3b6d93eaa6bb395a5721e6c09a8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 4 May 2011 11:11:17 -0400 Subject: Btrfs: set range_start to the right start in count_range_bits In count_range_bits we are adjusting total_bytes based on the range we are searching for, but we don't adjust the range start according to the range we are searching for, which makes for weird results. For example, if the range [0-8192] is set DELALLOC, but I search for 4096-8192, I will get back 4096 for the number of bytes found, but the range_start will be 0, which makes it look like the range is [0-4096]. So instead set range_start = max(cur_start, state->start). This makes everything come out right. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ba41da59e31..b5f6f227a97 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1480,7 +1480,7 @@ u64 count_range_bits(struct extent_io_tree *tree, if (total_bytes >= max_bytes) break; if (!found) { - *start = state->start; + *start = max(cur_start, state->start); found = 1; } last = state->end; -- cgit v1.2.3-70-g09d2 From cb25c2ea6a79702ab7895b873c6c43e0d3bc3c72 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 11 May 2011 12:17:34 -0400 Subject: Btrfs: map the node block when looking for readahead targets If we have particularly full nodes, we could call btrfs_node_blockptr up to 32 times, which is 32 pairs of kmap/kunmap, which _sucks_. So go ahead and map the extent buffer while we look for readahead targets. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 84d7ca1fe0b..009bcf7f1e4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1229,6 +1229,7 @@ static void reada_for_search(struct btrfs_root *root, u64 search; u64 target; u64 nread = 0; + u64 gen; int direction = path->reada; struct extent_buffer *eb; u32 nr; @@ -1256,6 +1257,15 @@ static void reada_for_search(struct btrfs_root *root, nritems = btrfs_header_nritems(node); nr = slot; while (1) { + if (!node->map_token) { + unsigned long offset = btrfs_node_key_ptr_offset(nr); + map_private_extent_buffer(node, offset, + sizeof(struct btrfs_key_ptr), + &node->map_token, + &node->kaddr, + &node->map_start, + &node->map_len, KM_USER1); + } if (direction < 0) { if (nr == 0) break; @@ -1273,14 +1283,23 @@ static void reada_for_search(struct btrfs_root *root, search = btrfs_node_blockptr(node, nr); if ((search <= target && target - search <= 65536) || (search > target && search - target <= 65536)) { - readahead_tree_block(root, search, blocksize, - btrfs_node_ptr_generation(node, nr)); + gen = btrfs_node_ptr_generation(node, nr); + if (node->map_token) { + unmap_extent_buffer(node, node->map_token, + KM_USER1); + node->map_token = NULL; + } + readahead_tree_block(root, search, blocksize, gen); nread += blocksize; } nscan++; if ((nread > 65536 || nscan > 32)) break; } + if (node->map_token) { + unmap_extent_buffer(node, node->map_token, KM_USER1); + node->map_token = NULL; + } } /* -- cgit v1.2.3-70-g09d2 From 7e2355ba1a11649f0b212a29fdb9f47476f1248e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 11 May 2011 12:25:37 -0400 Subject: Btrfs: don't look at the extent buffer level 3 times in a row We have a bit of debugging in btrfs_search_slot to make sure the level of the cow block is the same as the original block we were cow'ing. I don't think I've ever seen this tripped, so kill it. This saves us 2 kmap's per level in our search. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 009bcf7f1e4..f7a0a64b868 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1672,9 +1672,6 @@ again: } cow_done: BUG_ON(!cow && ins_len); - if (level != btrfs_header_level(b)) - WARN_ON(1); - level = btrfs_header_level(b); p->nodes[level] = b; if (!p->skip_locking) -- cgit v1.2.3-70-g09d2 From d82a6f1d7e8b61ed5996334d0db66651bb43641d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 11 May 2011 15:26:06 -0400 Subject: Btrfs: kill BTRFS_I(inode)->block_group Originally this was going to be used as a way to give hints to the allocator, but frankly we can get much better hints elsewhere and it's not even used at all for anything usefull. In addition to be completely useless, when we initialize an inode we try and find a freeish block group to set as the inodes block group, and with a completely full 40gb fs this takes _forever_, so I imagine with say 1tb fs this is just unbearable. So just axe the thing altoghether, we don't need it and it saves us 8 bytes in the inode and saves us 500 microseconds per inode lookup in my testcase. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/btrfs_inode.h | 3 -- fs/btrfs/ctree.h | 3 +- fs/btrfs/extent-tree.c | 10 ++---- fs/btrfs/inode.c | 87 ++++++-------------------------------------------- fs/btrfs/ioctl.c | 3 +- fs/btrfs/transaction.c | 1 - fs/btrfs/transaction.h | 14 -------- fs/btrfs/xattr.c | 2 -- 8 files changed, 13 insertions(+), 110 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 57c3bb2884c..4bc852d3b83 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -120,9 +120,6 @@ struct btrfs_inode { */ u64 index_cnt; - /* the start of block group preferred for allocations. */ - u64 block_group; - /* the fsync log has some corner cases that mean we have to check * directories to see if any unlinks have been done before * the directory was logged. See tree-log.c for all the diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f31aed7fedd..0f8c489bcc0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2512,8 +2512,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, - struct btrfs_root *new_root, - u64 new_dirid, u64 alloc_hint); + struct btrfs_root *new_root, u64 new_dirid); int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a2ca561c70f..9f0a4e3bd8a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5319,6 +5319,7 @@ checks: btrfs_add_free_space(block_group, offset, search_start - offset); BUG_ON(offset > search_start); + btrfs_put_block_group(block_group); break; loop: failed_cluster_refill = false; @@ -5411,14 +5412,7 @@ loop: ret = -ENOSPC; } else if (!ins->objectid) { ret = -ENOSPC; - } - - /* we found what we needed */ - if (ins->objectid) { - if (!(data & BTRFS_BLOCK_GROUP_DATA)) - trans->block_group = block_group->key.objectid; - - btrfs_put_block_group(block_group); + } else if (ins->objectid) { ret = 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bc12ba23db5..dd5938a7de2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -136,7 +136,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, return -ENOMEM; path->leave_spinning = 1; - btrfs_set_trans_block_group(trans, inode); key.objectid = inode->i_ino; key.offset = start; @@ -422,7 +421,6 @@ again: if (start == 0) { trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; /* lets try to make an inline extent */ @@ -781,7 +779,6 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(root == root->fs_info->tree_root); trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; num_bytes = (end - start + blocksize) & ~(blocksize - 1); @@ -1502,8 +1499,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, { struct btrfs_ordered_sum *sum; - btrfs_set_trans_block_group(trans, inode); - list_for_each_entry(sum, list, list) { btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root->fs_info->csum_root, sum); @@ -1722,7 +1717,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) else trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); @@ -1739,7 +1733,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) else trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) @@ -2495,7 +2488,6 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; int maybe_acls; - u64 alloc_group_block; u32 rdev; int ret; @@ -2539,8 +2531,6 @@ static void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->index_cnt = (u64)-1; BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); - alloc_group_block = btrfs_inode_block_group(leaf, inode_item); - /* * try to precache a NULL acl entry for files that don't have * any xattrs or acls @@ -2549,8 +2539,6 @@ static void btrfs_read_locked_inode(struct inode *inode) if (!maybe_acls) cache_no_acl(inode); - BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, - alloc_group_block, 0); btrfs_free_path(path); inode_item = NULL; @@ -2630,7 +2618,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); - btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); + btrfs_set_inode_block_group(leaf, item, 0); if (leaf->map_token) { unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); @@ -2971,8 +2959,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, @@ -3068,8 +3054,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { err = btrfs_unlink_subvol(trans, root, dir, BTRFS_I(inode)->location.objectid, @@ -3649,7 +3633,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) err = PTR_ERR(trans); break; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, inode, cur_offset, cur_offset + hole_size, @@ -3785,7 +3768,6 @@ void btrfs_evict_inode(struct inode *inode) while (1) { trans = btrfs_start_transaction(root, 0); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = root->orphan_block_rsv; ret = btrfs_block_rsv_check(trans, root, @@ -4383,7 +4365,6 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, inode); if (nolock) ret = btrfs_end_transaction_nolock(trans, root); else @@ -4409,7 +4390,6 @@ void btrfs_dirty_inode(struct inode *inode) trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); ret = btrfs_update_inode(trans, root, inode); if (ret && ret == -ENOSPC) { @@ -4424,7 +4404,6 @@ void btrfs_dirty_inode(struct inode *inode) } return; } - btrfs_set_trans_block_group(trans, inode); ret = btrfs_update_inode(trans, root, inode); if (ret) { @@ -4519,8 +4498,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, const char *name, int name_len, - u64 ref_objectid, u64 objectid, - u64 alloc_hint, int mode, u64 *index) + u64 ref_objectid, u64 objectid, int mode, + u64 *index) { struct inode *inode; struct btrfs_inode_item *inode_item; @@ -4567,8 +4546,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 0; else owner = 1; - BTRFS_I(inode)->block_group = - btrfs_find_block_group(root, 0, alloc_hint, owner); key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -4729,11 +4706,9 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, mode, &index); + mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -4745,7 +4720,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; @@ -4754,8 +4728,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); btrfs_update_inode(trans, root, inode); } - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); @@ -4791,11 +4763,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, mode, &index); + mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -4807,7 +4777,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; @@ -4818,8 +4787,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); @@ -4866,8 +4833,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, btrfs_inc_nlink(inode); inode->i_ctime = CURRENT_TIME; - - btrfs_set_trans_block_group(trans, dir); ihold(inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); @@ -4876,7 +4841,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; } else { struct dentry *parent = dget_parent(dentry); - btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); BUG_ON(err); btrfs_log_new_name(trans, inode, NULL, parent); @@ -4917,12 +4881,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) trans = btrfs_start_transaction(root, 5); if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, S_IFDIR | mode, - &index); + S_IFDIR | mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fail; @@ -4936,7 +4898,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; - btrfs_set_trans_block_group(trans, inode); btrfs_i_size_write(inode, 0); err = btrfs_update_inode(trans, root, inode); @@ -4950,8 +4911,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); drop_on_err = 0; - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); out_fail: nr = trans->blocks_used; @@ -6652,8 +6611,6 @@ static int btrfs_truncate(struct inode *inode) goto out; } - btrfs_set_trans_block_group(trans, inode); - /* * Reserve space for the truncate process. Truncate should be adding * space, but if there are snapshots it may end up using space. @@ -6680,7 +6637,6 @@ static int btrfs_truncate(struct inode *inode) err = PTR_ERR(trans); goto out; } - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = rsv; /* @@ -6715,7 +6671,6 @@ static int btrfs_truncate(struct inode *inode) rsv); BUG_ON(ret); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = rsv; } @@ -6775,15 +6730,14 @@ out: * create a new subvolume directory/inode (helper for the ioctl). */ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, - struct btrfs_root *new_root, - u64 new_dirid, u64 alloc_hint) + struct btrfs_root *new_root, u64 new_dirid) { struct inode *inode; int err; u64 index = 0; inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, - new_dirid, alloc_hint, S_IFDIR | 0700, &index); + new_dirid, S_IFDIR | 0700, &index); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &btrfs_dir_inode_operations; @@ -6893,21 +6847,6 @@ void btrfs_destroy_inode(struct inode *inode) spin_unlock(&root->fs_info->ordered_extent_lock); } - if (root == root->fs_info->tree_root) { - struct btrfs_block_group_cache *block_group; - - block_group = btrfs_lookup_block_group(root->fs_info, - BTRFS_I(inode)->block_group); - if (block_group && block_group->inode == inode) { - spin_lock(&block_group->lock); - block_group->inode = NULL; - spin_unlock(&block_group->lock); - btrfs_put_block_group(block_group); - } else if (block_group) { - btrfs_put_block_group(block_group); - } - } - spin_lock(&root->orphan_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", @@ -7091,8 +7030,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_notrans; } - btrfs_set_trans_block_group(trans, new_dir); - if (dest != root) btrfs_record_root_in_trans(trans, dest); @@ -7331,12 +7268,9 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, - &index); + S_IFLNK|S_IRWXUGO, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -7348,7 +7282,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; @@ -7359,8 +7292,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); if (drop_inode) goto out_unlock; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a578620e06a..8e90ccf4b76 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -413,8 +413,7 @@ static noinline int create_subvol(struct btrfs_root *root, btrfs_record_root_in_trans(trans, new_root); - ret = btrfs_create_subvol_root(trans, new_root, new_dirid, - BTRFS_I(dir)->block_group); + ret = btrfs_create_subvol_root(trans, new_root, new_dirid); /* * insert the directory item */ diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 43816f8b23e..f4ea695325b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -241,7 +241,6 @@ again: h->transid = cur_trans->transid; h->transaction = cur_trans; h->blocks_used = 0; - h->block_group = 0; h->bytes_reserved = 0; h->delayed_ref_updates = 0; h->use_count = 1; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 11c6efcd4ed..da7289e06a8 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -47,7 +47,6 @@ struct btrfs_transaction { struct btrfs_trans_handle { u64 transid; - u64 block_group; u64 bytes_reserved; unsigned long use_count; unsigned long blocks_reserved; @@ -70,19 +69,6 @@ struct btrfs_pending_snapshot { struct list_head list; }; -static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, - struct inode *inode) -{ - trans->block_group = BTRFS_I(inode)->block_group; -} - -static inline void btrfs_update_inode_block_group( - struct btrfs_trans_handle *trans, - struct inode *inode) -{ - BTRFS_I(inode)->block_group = trans->block_group; -} - static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, struct inode *inode) { diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index cfd660550de..72ab0295ca7 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -158,8 +158,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, inode); - ret = do_setxattr(trans, inode, name, value, size, flags); if (ret) goto out; -- cgit v1.2.3-70-g09d2 From 589d8ade83f07c0f11c8191c0ca309f34d7a2c14 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 11 May 2011 17:30:53 -0400 Subject: Btrfs: try not to sleep as much when doing slow caching When the fs is super full and we unmount the fs, we could get stuck in this thing where unmount is waiting for the caching kthread to make progress and the caching kthread keeps scheduling because we're in the middle of a commit. So instead just let the caching kthread keep going and only yeild if need_resched(). This makes my horrible umount case go from taking up to 10 minutes to taking less than 20 seconds. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9f0a4e3bd8a..96be6245031 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -378,15 +378,18 @@ again: if (ret) break; - caching_ctl->progress = last; - btrfs_release_path(extent_root, path); - up_read(&fs_info->extent_commit_sem); - mutex_unlock(&caching_ctl->mutex); - if (btrfs_transaction_in_commit(fs_info)) - schedule_timeout(1); - else + if (need_resched() || + btrfs_next_leaf(extent_root, path)) { + caching_ctl->progress = last; + btrfs_release_path(extent_root, path); + up_read(&fs_info->extent_commit_sem); + mutex_unlock(&caching_ctl->mutex); cond_resched(); - goto again; + goto again; + } + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + continue; } if (key.objectid < block_group->key.objectid) { -- cgit v1.2.3-70-g09d2 From 026fd317828500524cdc7e5ff9e8e7923abb2868 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 May 2011 10:32:11 -0400 Subject: Btrfs: don't always do readahead Our readahead is sort of sloppy, and really isn't always needed. For example if ls is doing a stating ls (which is the default) it's going to stat in non-disk order, so if say you have a directory with a stupid amount of files, readahead is going to do nothing but waste time in the case of doing the stat. Taking the unconditional readahead out made my test go from 57 minutes to 36 minutes. This means that everywhere we do loop through the tree we want to make sure we do set path->reada properly, so I went through and found all of the places where we loop through the path and set reada to 1. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 2 -- fs/btrfs/extent-tree.c | 3 ++- fs/btrfs/inode.c | 14 ++++++++++++-- fs/btrfs/relocation.c | 6 ++++++ 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f7a0a64b868..f61c16c1481 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -48,8 +48,6 @@ struct btrfs_path *btrfs_alloc_path(void) { struct btrfs_path *path; path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); - if (path) - path->reada = 1; return path; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 96be6245031..1ba2cc58eab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -347,7 +347,7 @@ static int caching_kthread(void *data) */ path->skip_locking = 1; path->search_commit_root = 1; - path->reada = 2; + path->reada = 1; key.objectid = last; key.offset = 0; @@ -8556,6 +8556,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); if (cache_gen != 0 && diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dd5938a7de2..6228a304b54 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4242,7 +4242,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, filp->f_pos = 2; } path = btrfs_alloc_path(); - path->reada = 2; + if (!path) + return -ENOMEM; + path->reada = 1; btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; @@ -5043,7 +5045,15 @@ again: if (!path) { path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + err = -ENOMEM; + goto out; + } + /* + * Chances are we'll be called again, so go ahead and do + * readahead + */ + path->reada = 1; } ret = btrfs_lookup_file_extent(trans, root, path, diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 09c30d37d43..5872b41581f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -676,6 +676,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, err = -ENOMEM; goto out; } + path1->reada = 1; + path2->reada = 2; node = alloc_backref_node(cache); if (!node) { @@ -1996,6 +1998,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; reloc_root = root->reloc_root; root_item = &reloc_root->root_item; @@ -3297,6 +3300,7 @@ static int find_data_references(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; root = read_fs_root(rc->extent_root->fs_info, ref_root); if (IS_ERR(root)) { @@ -3665,6 +3669,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; ret = prepare_to_relocate(rc); if (ret) { @@ -4090,6 +4095,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = -1; key.objectid = BTRFS_TREE_RELOC_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; -- cgit v1.2.3-70-g09d2 From cca1c81f43e26ab60c0d1090fb90992358d69bdf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 May 2011 11:07:12 -0400 Subject: Btrfs: don't try to allocate from a block group that doesn't have enough space If we have a very large filesystem, we can spend a lot of time in find_free_extent just trying to allocate from empty block groups. So instead check to see if the block group even has enough space for the allocation, and if not go on to the next block group. Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1ba2cc58eab..c8c318494de 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5159,6 +5159,14 @@ have_block_group: if (unlikely(block_group->ro)) goto loop; + spin_lock(&block_group->tree_lock); + if (cached && + block_group->free_space < num_bytes + empty_size) { + spin_unlock(&block_group->tree_lock); + goto loop; + } + spin_unlock(&block_group->tree_lock); + /* * Ok we want to try and use the cluster allocator, so lets look * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will -- cgit v1.2.3-70-g09d2 From 207dde8289d9b005b665cb9d8d2bb9464256101d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 May 2011 14:49:23 -0400 Subject: Btrfs: check for duplicate entries in the free space cache If there are duplicate entries in the free space cache, discard the entire cache and load it the old fashioned way. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/free-space-cache.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 63731a1fb0a..d634a7e4220 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -420,7 +420,14 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, spin_lock(&block_group->tree_lock); ret = link_free_space(block_group, e); spin_unlock(&block_group->tree_lock); - BUG_ON(ret); + if (ret) { + printk(KERN_ERR "Duplicate entries in " + "free space cache, dumping\n"); + kunmap(page); + unlock_page(page); + page_cache_release(page); + goto free_cache; + } } else { e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); if (!e->bitmap) { @@ -437,6 +444,14 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, recalculate_thresholds(block_group); spin_unlock(&block_group->tree_lock); list_add_tail(&e->list, &bitmaps); + if (ret) { + printk(KERN_ERR "Duplicate entries in " + "free space cache, dumping\n"); + kunmap(page); + unlock_page(page); + page_cache_release(page); + goto free_cache; + } } num_entries--; @@ -909,10 +924,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset, * logically. */ if (bitmap) { - WARN_ON(info->bitmap); + if (info->bitmap) { + WARN_ON_ONCE(1); + return -EEXIST; + } p = &(*p)->rb_right; } else { - WARN_ON(!info->bitmap); + if (!info->bitmap) { + WARN_ON_ONCE(1); + return -EEXIST; + } p = &(*p)->rb_left; } } -- cgit v1.2.3-70-g09d2 From d90c732122a1f6d0efe388a8a204f67f144b2eb3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 17 May 2011 09:50:54 -0400 Subject: Btrfs: leave spinning on lookup and map the leaf On lookup we only want to read the inode item, so leave the path spinning. Also we're just wholesale reading the leaf off, so map the leaf so we don't do a bunch of kmap/kunmaps. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6228a304b54..dc8fb2b3a14 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2493,6 +2493,7 @@ static void btrfs_read_locked_inode(struct inode *inode) path = btrfs_alloc_path(); BUG_ON(!path); + path->leave_spinning = 1; memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); @@ -2502,6 +2503,12 @@ static void btrfs_read_locked_inode(struct inode *inode) leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); + if (!leaf->map_token) + map_private_extent_buffer(leaf, (unsigned long)inode_item, + sizeof(struct btrfs_inode_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); inode->i_mode = btrfs_inode_mode(leaf, inode_item); inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); @@ -2539,6 +2546,11 @@ static void btrfs_read_locked_inode(struct inode *inode) if (!maybe_acls) cache_no_acl(inode); + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + btrfs_free_path(path); inode_item = NULL; -- cgit v1.2.3-70-g09d2 From 6e9101aeec39961308176e0f59e73ac5d37d243a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 24 May 2011 05:43:18 +0200 Subject: watchdog: Fix non-standard prototype of get_softlockup_thresh() This build warning slipped through: kernel/watchdog.c:102: warning: function declaration isn't a prototype As reported by Stephen Rothwell. Also address an unused variable warning that GCC 4.6.0 reports: we cannot do anything about failed watchdog ops during CPU hotplug (it's not serious enough to return an error from the notifier), so ignore them. Reported-by: Stephen Rothwell Cc: Mandeep Singh Baines Cc: Marcin Slusarz Cc: Don Zickus Cc: Peter Zijlstra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20110524134129.8da27016.sfr@canb.auug.org.au Signed-off-by: Ingo Molnar LKML-Reference: <20110517071642.GF22305@elte.hu> --- kernel/watchdog.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6e63097fa73..3d0c56ad479 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -98,7 +98,7 @@ __setup("nosoftlockup", nosoftlockup_setup); * the thresholds with a factor: we make the soft threshold twice the amount of * time the hard threshold is. */ -static int get_softlockup_thresh() +static int get_softlockup_thresh(void) { return watchdog_thresh * 2; } @@ -415,15 +415,13 @@ static void watchdog_nmi_disable(int cpu) { return; } #endif /* CONFIG_HARDLOCKUP_DETECTOR */ /* prepare/enable/disable routines */ -static int watchdog_prepare_cpu(int cpu) +static void watchdog_prepare_cpu(int cpu) { struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); WARN_ON(per_cpu(softlockup_watchdog, cpu)); hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; - - return 0; } static int watchdog_enable(int cpu) @@ -542,17 +540,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; - int err = 0; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - err = watchdog_prepare_cpu(hotcpu); + watchdog_prepare_cpu(hotcpu); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: if (watchdog_enabled) - err = watchdog_enable(hotcpu); + watchdog_enable(hotcpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: -- cgit v1.2.3-70-g09d2 From a3793a0d86e2d65dda6beb86fa295be25b238159 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Tue, 22 Feb 2011 09:57:44 +0000 Subject: sh: switch ap325rxa to dynamically manage the platform camera Use soc_camera_platform helper functions to dynamically manage the camera device. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- arch/sh/boards/mach-ap325rxa/setup.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index 618bd566cf5..969421f64a1 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -359,37 +359,31 @@ static struct soc_camera_link camera_link = { .priv = &camera_info, }; -static void dummy_release(struct device *dev) +static struct platform_device *camera_device; + +static void ap325rxa_camera_release(struct device *dev) { + soc_camera_platform_release(&camera_device); } -static struct platform_device camera_device = { - .name = "soc_camera_platform", - .dev = { - .platform_data = &camera_info, - .release = dummy_release, - }, -}; - static int ap325rxa_camera_add(struct soc_camera_link *icl, struct device *dev) { - if (icl != &camera_link || camera_probe() <= 0) - return -ENODEV; + int ret = soc_camera_platform_add(icl, dev, &camera_device, &camera_link, + ap325rxa_camera_release, 0); + if (ret < 0) + return ret; - camera_info.dev = dev; + ret = camera_probe(); + if (ret < 0) + soc_camera_platform_del(icl, camera_device, &camera_link); - return platform_device_register(&camera_device); + return ret; } static void ap325rxa_camera_del(struct soc_camera_link *icl) { - if (icl != &camera_link) - return; - - platform_device_unregister(&camera_device); - memset(&camera_device.dev.kobj, 0, - sizeof(camera_device.dev.kobj)); + soc_camera_platform_del(icl, camera_device, &camera_link); } #endif /* CONFIG_I2C */ -- cgit v1.2.3-70-g09d2 From 52b96c2592e8fdb93231c9644b68112518916a57 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 15 Apr 2011 18:30:55 +0000 Subject: sh: add MMCIF runtime PM support on ecovec Signed-off-by: Guennadi Liakhovetski Cc: Simon Horman Cc: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/boards/mach-ecovec24/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index bb13d0e1b96..3a32741cc0a 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -885,6 +885,9 @@ static struct platform_device sh_mmcif_device = { }, .num_resources = ARRAY_SIZE(sh_mmcif_resources), .resource = sh_mmcif_resources, + .archdata = { + .hwblk_id = HWBLK_MMC, + }, }; #endif -- cgit v1.2.3-70-g09d2 From 0f0ebd980e0e8a2fd33ab3ef0d5b0cffbcddd8a1 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 24 May 2011 17:25:23 +0900 Subject: sh: arch/sh/kernel/process_32.c needs linux/prefetch.h. Trivial build fix for certain configurations that don't grab linux/prefetch.h via alternate means (specifically SH-2 and SH-3 parts). Signed-off-by: Paul Mundt --- arch/sh/kernel/process_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 762a13984bb..b473f0c06fb 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From 4e2b1084b0fb79c963b73586815e1ef034dc2b57 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 24 May 2011 17:33:51 +0900 Subject: sh: Update shmin to reflect PIO dependency. shmin uses __set_io_port_base() for legacy I/O mapping that ethernet and other SuperI/O functions depend on. Ensure that PIO support is built in until the board is updated for MMIO properly. Signed-off-by: Paul Mundt --- arch/sh/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index b44e37753b9..a164c9e3c73 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -167,7 +167,7 @@ config ARCH_HAS_CPU_IDLE_WAIT config NO_IOPORT def_bool !PCI - depends on !SH_CAYMAN && !SH_SH4202_MICRODEV + depends on !SH_CAYMAN && !SH_SH4202_MICRODEV && !SH_SHMIN config IO_TRAPPED bool -- cgit v1.2.3-70-g09d2 From d819437156fd99da61d4e1402b2dbfc5cc472265 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Mon, 23 May 2011 10:22:40 -0400 Subject: oprofile, powerpc: Handle events that raise an exception without overflowing Commit 0837e3242c73566fc1c0196b4ec61779c25ffc93 fixes a situation on POWER7 where events can roll back if a specualtive event doesn't actually complete. This can raise a performance monitor exception. We need to catch this to ensure that we reset the PMC. In all cases the PMC will be less than 256 cycles from overflow. This patch lifts Anton's fix for the problem in perf and applies it to oprofile as well. Signed-off-by: Eric B Munson Cc: # as far back as it applies cleanly Tested-by: Maynard Johnson Signed-off-by: Robert Richter --- arch/powerpc/oprofile/op_model_power4.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index 8ee51a252cf..e6bec74be13 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -261,6 +261,28 @@ static int get_kernel(unsigned long pc, unsigned long mmcra) return is_kernel; } +static bool pmc_overflow(unsigned long val) +{ + if ((int)val < 0) + return true; + + /* + * Events on POWER7 can roll back if a speculative event doesn't + * eventually complete. Unfortunately in some rare cases they will + * raise a performance monitor exception. We need to catch this to + * ensure we reset the PMC. In all cases the PMC will be 256 or less + * cycles from overflow. + * + * We only do this if the first pass fails to find any overflowing + * PMCs because a user might set a period of less than 256 and we + * don't want to mistakenly reset them. + */ + if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) + return true; + + return false; +} + static void power4_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) { @@ -281,7 +303,7 @@ static void power4_handle_interrupt(struct pt_regs *regs, for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) { val = classic_ctr_read(i); - if (val < 0) { + if (pmc_overflow(val)) { if (oprofile_running && ctr[i].enabled) { oprofile_add_ext_sample(pc, regs, i, is_kernel); classic_ctr_write(i, reset_value[i]); -- cgit v1.2.3-70-g09d2 From b76a06e08d94b2a63e47837dfe46bbbf0a3af6c2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 8 May 2011 19:32:36 -0400 Subject: oprofile: Use linux/mutex.h The oprofile code is still including asm/mutex.h instead of linux/mutex.h. Signed-off-by: Anton Blanchard Signed-off-by: Robert Richter --- drivers/oprofile/event_buffer.h | 2 +- drivers/oprofile/oprof.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/oprofile/event_buffer.h b/drivers/oprofile/event_buffer.h index 4e70749f8d1..a8d5bb3cba8 100644 --- a/drivers/oprofile/event_buffer.h +++ b/drivers/oprofile/event_buffer.h @@ -11,7 +11,7 @@ #define EVENT_BUFFER_H #include -#include +#include int alloc_event_buffer(void); diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index f9bda64fcd1..dccd8636095 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "oprof.h" #include "event_buffer.h" -- cgit v1.2.3-70-g09d2 From b779260b097dec295e8798277ed08ec5ecd3b2c1 Mon Sep 17 00:00:00 2001 From: Joseph Cihula Date: Tue, 3 May 2011 00:08:37 -0700 Subject: intel-iommu: fix VT-d PMR disable for TXT on S3 resume This patch is a follow on to https://lkml.org/lkml/2011/3/21/239, which was merged as commit 51a63e67da6056c13b5b597dcc9e1b3bd7ceaa55. This patch adds support for S3, as pointed out by Chris Wright. Signed-off-by: Joseph Cihula Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index d552d2c7784..4e4e0202e59 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -141,6 +141,12 @@ static struct intel_iommu **g_iommus; static void __init check_tylersburg_isoch(void); static int rwbf_quirk; +/* + * set to 1 to panic kernel if can't successfully enable VT-d + * (used when kernel is launched w/ TXT) + */ +static int force_on = 0; + /* * 0: Present * 1-11: Reserved @@ -2217,7 +2223,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw) return 0; } -static int __init init_dmars(int force_on) +static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; struct dmar_rmrr_unit *rmrr; @@ -3117,7 +3123,17 @@ static int init_iommu_hw(void) if (iommu->qi) dmar_reenable_qi(iommu); - for_each_active_iommu(iommu, drhd) { + for_each_iommu(iommu, drhd) { + if (drhd->ignored) { + /* + * we always have to disable PMRs or DMA may fail on + * this device + */ + if (force_on) + iommu_disable_protect_mem_regions(iommu); + continue; + } + iommu_flush_write_buffer(iommu); iommu_set_root_entry(iommu); @@ -3126,7 +3142,8 @@ static int init_iommu_hw(void) DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - iommu_enable_translation(iommu); + if (iommu_enable_translation(iommu)) + return 1; iommu_disable_protect_mem_regions(iommu); } @@ -3193,7 +3210,10 @@ static void iommu_resume(void) unsigned long flag; if (init_iommu_hw()) { - WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); + if (force_on) + panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); + else + WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); return; } @@ -3270,7 +3290,6 @@ static struct notifier_block device_nb = { int __init intel_iommu_init(void) { int ret = 0; - int force_on = 0; /* VT-d is required for a TXT/tboot launch, so enforce that */ force_on = tboot_force_iommu(); @@ -3308,7 +3327,7 @@ int __init intel_iommu_init(void) init_no_remapping_devices(); - ret = init_dmars(force_on); + ret = init_dmars(); if (ret) { if (force_on) panic("tboot: Failed to initialize DMARs\n"); -- cgit v1.2.3-70-g09d2 From b3a530e4e734cab7043a3ab7d135f539042443a7 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 15 May 2011 12:34:55 +0200 Subject: intel-iommu: Remove obsolete comment from detect_intel_iommu Since cacd4213d8, this comment no longer applies. Signed-off-by: Jan Kiszka Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 12e02bf92c4..3dc9befa5ae 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -698,12 +698,7 @@ int __init detect_intel_iommu(void) { #ifdef CONFIG_INTR_REMAP struct acpi_table_dmar *dmar; - /* - * for now we will disable dma-remapping when interrupt - * remapping is enabled. - * When support for queued invalidation for IOTLB invalidation - * is added, we will not need this any more. - */ + dmar = (struct acpi_table_dmar *) dmar_tbl; if (ret && cpu_has_x2apic && dmar->flags & 0x1) printk(KERN_INFO -- cgit v1.2.3-70-g09d2 From 7b668357810ecb5fdda4418689d50f5d95aea6a8 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 24 May 2011 12:02:41 +0100 Subject: intel-iommu: Flush unmaps at domain_exit We typically batch unmaps to be lazily flushed out at regular intervals. When we destroy a domain, we need to force a flush of these lazy unmaps to be sure none reference the domain we're about to free. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=35062 Signed-off-by: Alex Williamson Signed-off-by: David Woodhouse Cc: stable@kernel.org --- drivers/pci/intel-iommu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4e4e0202e59..395f253c049 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1422,6 +1422,10 @@ static void domain_exit(struct dmar_domain *domain) if (!domain) return; + /* Flush any lazy unmaps that may reference this domain */ + if (!intel_iommu_strict) + flush_unmaps_timeout(0); + domain_remove_dev_info(domain); /* destroy iovas */ put_iova_domain(&domain->iovad); -- cgit v1.2.3-70-g09d2 From 7ccafc5f75c87853f3c49845d5a884f2376e03ce Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 May 2011 16:29:26 -0700 Subject: x86, cpufeature: Update CPU feature RDRND to RDRAND The Intel manual changed the name of the CPUID bit to match the instruction name. We should follow suit for sanity's sake. (See Intel SDM Volume 2, Table 3-20 "Feature Information Returned in the ECX Register".) [ hpa: we can only do this at this time because there are currently no CPUs with this feature on the market, hence this is pre-hardware enabling. However, Cc:'ing stable so that stable can present a consistent ABI. ] Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/20110524232926.GA27728@outflux.net Signed-off-by: H. Peter Anvin Cc: Fenghua Yu Cc: v2.6.36-39 --- arch/x86/include/asm/cpufeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 5dc6acc98db..71cc3800712 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -125,7 +125,7 @@ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ #define X86_FEATURE_F16C (4*32+29) /* 16-bit fp conversions */ -#define X86_FEATURE_RDRND (4*32+30) /* The RDRAND instruction */ +#define X86_FEATURE_RDRAND (4*32+30) /* The RDRAND instruction */ #define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -- cgit v1.2.3-70-g09d2 From 1c7fcbed1adee44708dafb65ac40b186c203d7e8 Mon Sep 17 00:00:00 2001 From: Damian Date: Tue, 24 May 2011 07:01:22 +0000 Subject: sh_mobile_meram: MERAM platform data for LCDC Based on the patch by Takanari Hayama Add the necessary platform data to add MERAM functionality to LCDC Includes platform data for both the AP4EVB and mackerel Signed-off-by: Damian Hobson-Garcia Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ap4evb.c | 56 +++++++++++++++++++++++++++++++ arch/arm/mach-shmobile/board-mackerel.c | 58 +++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 1e35fa976d6..c9aad5eefcc 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -249,6 +249,29 @@ static int slot_cn7_get_cd(struct platform_device *pdev) { return !gpio_get_value(GPIO_PORT41); } +/* MERAM */ +static struct sh_mobile_meram_info meram_info = { + .addr_mode = SH_MOBILE_MERAM_MODE1, +}; + +static struct resource meram_resources[] = { + [0] = { + .name = "MERAM", + .start = 0xe8000000, + .end = 0xe81fffff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device meram_device = { + .name = "sh_mobile_meram", + .id = 0, + .num_resources = ARRAY_SIZE(meram_resources), + .resource = meram_resources, + .dev = { + .platform_data = &meram_info, + }, +}; /* SH_MMCIF */ static struct resource sh_mmcif_resources[] = { @@ -431,13 +454,29 @@ const static struct fb_videomode ap4evb_lcdc_modes[] = { #endif }, }; +static struct sh_mobile_meram_cfg lcd_meram_cfg = { + .icb[0] = { + .marker_icb = 28, + .cache_icb = 24, + .meram_offset = 0x0, + .meram_size = 0x40, + }, + .icb[1] = { + .marker_icb = 29, + .cache_icb = 25, + .meram_offset = 0x40, + .meram_size = 0x40, + }, +}; static struct sh_mobile_lcdc_info lcdc_info = { + .meram_dev = &meram_info, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .lcd_cfg = ap4evb_lcdc_modes, .num_cfg = ARRAY_SIZE(ap4evb_lcdc_modes), + .meram_cfg = &lcd_meram_cfg, } }; @@ -708,15 +747,31 @@ static struct platform_device fsi_device = { static struct platform_device fsi_ak4643_device = { .name = "sh_fsi2_a_ak4643", }; +static struct sh_mobile_meram_cfg hdmi_meram_cfg = { + .icb[0] = { + .marker_icb = 30, + .cache_icb = 26, + .meram_offset = 0x80, + .meram_size = 0x100, + }, + .icb[1] = { + .marker_icb = 31, + .cache_icb = 27, + .meram_offset = 0x180, + .meram_size = 0x100, + }, +}; static struct sh_mobile_lcdc_info sh_mobile_lcdc1_info = { .clock_source = LCDC_CLK_EXTERNAL, + .meram_dev = &meram_info, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .interface_type = RGB24, .clock_divider = 1, .flags = LCDC_FLAGS_DWPOL, + .meram_cfg = &hdmi_meram_cfg, } }; @@ -945,6 +1000,7 @@ static struct platform_device *ap4evb_devices[] __initdata = { &csi2_device, &ceu_device, &ap4evb_camera, + &meram_device, }; static void __init hdmi_init_pm_clock(void) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index 7da2ca24229..a165a9e70ee 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -279,6 +279,30 @@ static struct platform_device smc911x_device = { }, }; +/* MERAM */ +static struct sh_mobile_meram_info mackerel_meram_info = { + .addr_mode = SH_MOBILE_MERAM_MODE1, +}; + +static struct resource meram_resources[] = { + [0] = { + .name = "MERAM", + .start = 0xe8000000, + .end = 0xe81fffff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device meram_device = { + .name = "sh_mobile_meram", + .id = 0, + .num_resources = ARRAY_SIZE(meram_resources), + .resource = meram_resources, + .dev = { + .platform_data = &mackerel_meram_info, + }, +}; + /* LCDC */ static struct fb_videomode mackerel_lcdc_modes[] = { { @@ -307,7 +331,23 @@ static int mackerel_get_brightness(void *board_data) return gpio_get_value(GPIO_PORT31); } +static struct sh_mobile_meram_cfg lcd_meram_cfg = { + .icb[0] = { + .marker_icb = 28, + .cache_icb = 24, + .meram_offset = 0x0, + .meram_size = 0x40, + }, + .icb[1] = { + .marker_icb = 29, + .cache_icb = 25, + .meram_offset = 0x40, + .meram_size = 0x40, + }, +}; + static struct sh_mobile_lcdc_info lcdc_info = { + .meram_dev = &mackerel_meram_info, .clock_source = LCDC_CLK_BUS, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, @@ -327,6 +367,7 @@ static struct sh_mobile_lcdc_info lcdc_info = { .name = "sh_mobile_lcdc_bl", .max_brightness = 1, }, + .meram_cfg = &lcd_meram_cfg, } }; @@ -353,8 +394,23 @@ static struct platform_device lcdc_device = { }, }; +static struct sh_mobile_meram_cfg hdmi_meram_cfg = { + .icb[0] = { + .marker_icb = 30, + .cache_icb = 26, + .meram_offset = 0x80, + .meram_size = 0x100, + }, + .icb[1] = { + .marker_icb = 31, + .cache_icb = 27, + .meram_offset = 0x180, + .meram_size = 0x100, + }, +}; /* HDMI */ static struct sh_mobile_lcdc_info hdmi_lcdc_info = { + .meram_dev = &mackerel_meram_info, .clock_source = LCDC_CLK_EXTERNAL, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, @@ -362,6 +418,7 @@ static struct sh_mobile_lcdc_info hdmi_lcdc_info = { .interface_type = RGB24, .clock_divider = 1, .flags = LCDC_FLAGS_DWPOL, + .meram_cfg = &hdmi_meram_cfg, } }; @@ -949,6 +1006,7 @@ static struct platform_device *mackerel_devices[] __initdata = { &mackerel_camera, &hdmi_lcdc_device, &hdmi_device, + &meram_device, }; /* Keypad Initialization */ -- cgit v1.2.3-70-g09d2 From 54525552c6ccfd867e819845da14be994e303218 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Tue, 24 May 2011 10:23:59 +0000 Subject: sh: mark DMA slave ID 0 as invalid This makes it possible to leave DMA slave IDs in the platform data at default 0 value without hitting DMA channel allocation error paths. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- arch/sh/include/cpu-sh4/cpu/sh7722.h | 1 + arch/sh/include/cpu-sh4/cpu/sh7724.h | 1 + arch/sh/include/cpu-sh4/cpu/sh7757.h | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/sh/include/cpu-sh4/cpu/sh7722.h b/arch/sh/include/cpu-sh4/cpu/sh7722.h index 7a5b8a331b4..bd0622788d6 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7722.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7722.h @@ -236,6 +236,7 @@ enum { }; enum { + SHDMA_SLAVE_INVALID, SHDMA_SLAVE_SCIF0_TX, SHDMA_SLAVE_SCIF0_RX, SHDMA_SLAVE_SCIF1_TX, diff --git a/arch/sh/include/cpu-sh4/cpu/sh7724.h b/arch/sh/include/cpu-sh4/cpu/sh7724.h index 7eb43599942..3daef8ecbc6 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7724.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7724.h @@ -285,6 +285,7 @@ enum { }; enum { + SHDMA_SLAVE_INVALID, SHDMA_SLAVE_SCIF0_TX, SHDMA_SLAVE_SCIF0_RX, SHDMA_SLAVE_SCIF1_TX, diff --git a/arch/sh/include/cpu-sh4/cpu/sh7757.h b/arch/sh/include/cpu-sh4/cpu/sh7757.h index 05b8196c775..41f9f8b9db7 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7757.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7757.h @@ -252,6 +252,7 @@ enum { }; enum { + SHDMA_SLAVE_INVALID, SHDMA_SLAVE_SDHI_TX, SHDMA_SLAVE_SDHI_RX, SHDMA_SLAVE_MMCIF_TX, -- cgit v1.2.3-70-g09d2 From 2a919596c16b4333af851ff473ebf96e289ab90c Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 11 May 2011 12:50:28 -0500 Subject: x86, UV: Add support for SGI UV2 hub chip This patch adds support for a new version of the SGI UV hub chip. The hub chip is the node controller that connects multiple blades into a larger coherent SSI. For the most part, UV2 is compatible with UV1. The majority of the changes are in the addresses of MMRs and in a few cases, the contents of MMRs. These changes are the result in changes in the system topology such as node configuration, processor types, maximum nodes, physical address sizes, etc. Signed-off-by: Jack Steiner Link: http://lkml.kernel.org/r/20110511175028.GA18006@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 42 +- arch/x86/include/asm/uv/uv_hub.h | 71 ++- arch/x86/include/asm/uv/uv_mmrs.h | 1012 +++++++++++++++++++++++++++++------- arch/x86/kernel/apic/x2apic_uv_x.c | 40 +- arch/x86/platform/uv/tlb_uv.c | 132 ++++- arch/x86/platform/uv/uv_time.c | 16 +- 6 files changed, 1075 insertions(+), 238 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 130f1eeee5f..0652a5a9fd6 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -44,7 +44,10 @@ #define UV_ACT_STATUS_SIZE 2 #define UV_DISTRIBUTION_SIZE 256 #define UV_SW_ACK_NPENDING 8 -#define UV_NET_ENDPOINT_INTD 0x38 +#define UV1_NET_ENDPOINT_INTD 0x38 +#define UV2_NET_ENDPOINT_INTD 0x28 +#define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \ + UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD) #define UV_DESC_BASE_PNODE_SHIFT 49 #define UV_PAYLOADQ_PNODE_SHIFT 49 #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" @@ -53,10 +56,22 @@ #define UV_BAU_TUNABLES_FILE "bau_tunables" #define WHITESPACE " \t\n" #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) -#define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 -#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 -#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x0000000009UL + + /* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */ +/* + * UV2: Bit 19 selects between + * (0): 10 microsecond timebase and + * (1): 80 microseconds + * we're using 655us, similar to UV1: 65 units of 10us + */ +#define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL) +#define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (65*10UL) + +#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD (is_uv1_hub() ? \ + UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD : \ + UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD) + #define BAU_MISC_CONTROL_MULT_MASK 3 #define UVH_AGING_PRESCALE_SEL 0x000000b000UL @@ -76,6 +91,16 @@ #define DESC_STATUS_ACTIVE 1 #define DESC_STATUS_DESTINATION_TIMEOUT 2 #define DESC_STATUS_SOURCE_TIMEOUT 3 +/* + * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2 + * values 1 and 5 will not occur + */ +#define UV2H_DESC_IDLE 0 +#define UV2H_DESC_DEST_TIMEOUT 2 +#define UV2H_DESC_DEST_STRONG_NACK 3 +#define UV2H_DESC_BUSY 4 +#define UV2H_DESC_SOURCE_TIMEOUT 6 +#define UV2H_DESC_DEST_PUT_ERR 7 /* * delay for 'plugged' timeout retries, in microseconds @@ -96,6 +121,15 @@ #define UV_LB_SUBNODEID 0x10 +/* these two are the same for UV1 and UV2: */ +#define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT +#define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK +/* 4 bits of software ack period */ +#define UV2_ACK_MASK 0x7UL +#define UV2_ACK_UNITS_SHFT 3 +#define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT +#define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT + /* * number of entries in the destination side payload queue */ diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 4298002d0c8..f26544a1521 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -77,8 +77,9 @@ * * 1111110000000000 * 5432109876543210 - * pppppppppplc0cch Nehalem-EX - * ppppppppplcc0cch Westmere-EX + * pppppppppplc0cch Nehalem-EX (12 bits in hdw reg) + * ppppppppplcc0cch Westmere-EX (12 bits in hdw reg) + * pppppppppppcccch SandyBridge (15 bits in hdw reg) * sssssssssss * * p = pnode bits @@ -87,7 +88,7 @@ * h = hyperthread * s = bits that are in the SOCKET_ID CSR * - * Note: Processor only supports 12 bits in the APICID register. The ACPI + * Note: Processor may support fewer bits in the APICID register. The ACPI * tables hold all 16 bits. Software needs to be aware of this. * * Unless otherwise specified, all references to APICID refer to @@ -138,6 +139,8 @@ struct uv_hub_info_s { unsigned long global_mmr_base; unsigned long gpa_mask; unsigned int gnode_extra; + unsigned char hub_revision; + unsigned char apic_pnode_shift; unsigned long gnode_upper; unsigned long lowmem_remap_top; unsigned long lowmem_remap_base; @@ -149,13 +152,31 @@ struct uv_hub_info_s { unsigned char m_val; unsigned char n_val; struct uv_scir_s scir; - unsigned char apic_pnode_shift; }; DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) +/* + * Hub revisions less than UV2_HUB_REVISION_BASE are UV1 hubs. All UV2 + * hubs have revision numbers greater than or equal to UV2_HUB_REVISION_BASE. + * This is a software convention - NOT the hardware revision numbers in + * the hub chip. + */ +#define UV1_HUB_REVISION_BASE 1 +#define UV2_HUB_REVISION_BASE 3 + +static inline int is_uv1_hub(void) +{ + return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE; +} + +static inline int is_uv2_hub(void) +{ + return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE; +} + union uvh_apicid { unsigned long v; struct uvh_apicid_s { @@ -180,11 +201,25 @@ union uvh_apicid { #define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) #define UV_PNODE_TO_NASID(p) (UV_PNODE_TO_GNODE(p) << 1) -#define UV_LOCAL_MMR_BASE 0xf4000000UL -#define UV_GLOBAL_MMR32_BASE 0xf8000000UL +#define UV1_LOCAL_MMR_BASE 0xf4000000UL +#define UV1_GLOBAL_MMR32_BASE 0xf8000000UL +#define UV1_LOCAL_MMR_SIZE (64UL * 1024 * 1024) +#define UV1_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024) + +#define UV2_LOCAL_MMR_BASE 0xfa000000UL +#define UV2_GLOBAL_MMR32_BASE 0xfc000000UL +#define UV2_LOCAL_MMR_SIZE (32UL * 1024 * 1024) +#define UV2_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024) + +#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE \ + : UV2_LOCAL_MMR_BASE) +#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE \ + : UV2_GLOBAL_MMR32_BASE) +#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \ + UV2_LOCAL_MMR_SIZE) +#define UV_GLOBAL_MMR32_SIZE (is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\ + UV2_GLOBAL_MMR32_SIZE) #define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) -#define UV_LOCAL_MMR_SIZE (64UL * 1024 * 1024) -#define UV_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024) #define UV_GLOBAL_GRU_MMR_BASE 0x4000000 @@ -300,6 +335,17 @@ static inline int uv_apicid_to_pnode(int apicid) return (apicid >> uv_hub_info->apic_pnode_shift); } +/* + * Convert an apicid to the socket number on the blade + */ +static inline int uv_apicid_to_socket(int apicid) +{ + if (is_uv1_hub()) + return (apicid >> (uv_hub_info->apic_pnode_shift - 1)) & 1; + else + return 0; +} + /* * Access global MMRs using the low memory MMR32 space. This region supports * faster MMR access but not all MMRs are accessible in this space. @@ -519,14 +565,13 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) /* * Get the minimum revision number of the hub chips within the partition. - * 1 - initial rev 1.0 silicon - * 2 - rev 2.0 production silicon + * 1 - UV1 rev 1.0 initial silicon + * 2 - UV1 rev 2.0 production silicon + * 3 - UV2 rev 1.0 initial silicon */ static inline int uv_get_min_hub_revision_id(void) { - extern int uv_min_hub_revision_id; - - return uv_min_hub_revision_id; + return uv_hub_info->hub_revision; } #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index f5bb64a823d..4be52c86344 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -11,13 +11,64 @@ #ifndef _ASM_X86_UV_UV_MMRS_H #define _ASM_X86_UV_UV_MMRS_H +/* + * This file contains MMR definitions for both UV1 & UV2 hubs. + * + * In general, MMR addresses and structures are identical on both hubs. + * These MMRs are identified as: + * #define UVH_xxx
+ * union uvh_xxx { + * unsigned long v; + * struct uvh_int_cmpd_s { + * } s; + * }; + * + * If the MMR exists on both hub type but has different addresses or + * contents, the MMR definition is similar to: + * #define UV1H_xxx + * #define UV2H_xxx + * #define UVH_xxx (is_uv1_hub() ? UV1H_xxx : UV2H_xxx) + * union uvh_xxx { + * unsigned long v; + * struct uv1h_int_cmpd_s { (Common fields only) + * } s; + * struct uv1h_int_cmpd_s { (Full UV1 definition) + * } s1; + * struct uv2h_int_cmpd_s { (Full UV2 definition) + * } s2; + * }; + * + * Only essential difference are enumerated. For example, if the address is + * the same for both UV1 & UV2, only a single #define is generated. Likewise, + * if the contents is the same for both hubs, only the "s" structure is + * generated. + * + * If the MMR exists on ONLY 1 type of hub, no generic definition is + * generated: + * #define UVnH_xxx + * union uvnh_xxx { + * unsigned long v; + * struct uvh_int_cmpd_s { + * } sn; + * }; + */ + #define UV_MMR_ENABLE (1UL << 63) +#define UV1_HUB_PART_NUMBER 0x88a5 +#define UV2_HUB_PART_NUMBER 0x8eb8 + +/* Compat: if this #define is present, UV headers support UV2 */ +#define UV2_HUB_IS_SUPPORTED 1 + +/* KABI compat: if this #define is present, KABI hacks are present */ +#define UV2_HUB_KABI_HACKS 1 + /* ========================================================================= */ /* UVH_BAU_DATA_BROADCAST */ /* ========================================================================= */ #define UVH_BAU_DATA_BROADCAST 0x61688UL -#define UVH_BAU_DATA_BROADCAST_32 0x0440 +#define UVH_BAU_DATA_BROADCAST_32 0x440 #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL @@ -34,7 +85,7 @@ union uvh_bau_data_broadcast_u { /* UVH_BAU_DATA_CONFIG */ /* ========================================================================= */ #define UVH_BAU_DATA_CONFIG 0x61680UL -#define UVH_BAU_DATA_CONFIG_32 0x0438 +#define UVH_BAU_DATA_CONFIG_32 0x438 #define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 #define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL @@ -73,125 +124,245 @@ union uvh_bau_data_config_u { /* UVH_EVENT_OCCURRED0 */ /* ========================================================================= */ #define UVH_EVENT_OCCURRED0 0x70000UL -#define UVH_EVENT_OCCURRED0_32 0x005e8 - -#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 -#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL -#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 -#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL -#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 -#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL -#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3 -#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL -#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4 -#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL -#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5 -#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL -#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6 -#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL -#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 -#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL -#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 -#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL -#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 -#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL -#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 -#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL -#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 -#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL -#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 -#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL -#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 -#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL -#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 -#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL -#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 -#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL -#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 -#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL -#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 -#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL -#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 -#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL -#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 -#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL -#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 -#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL -#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 -#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL -#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 -#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL -#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 -#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL -#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 -#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL -#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 -#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL -#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 -#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL -#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43 -#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL -#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 -#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL -#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45 -#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 -#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 -#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 -#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 -#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL -#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 -#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL -#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51 -#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL -#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52 -#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL -#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53 -#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL -#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54 -#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL -#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55 -#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL -#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 -#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL +#define UVH_EVENT_OCCURRED0_32 0x5e8 + +#define UV1H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UV1H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL +#define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 +#define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL +#define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 +#define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL +#define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3 +#define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL +#define UV1H_EVENT_OCCURRED0_RH_HCERR_SHFT 4 +#define UV1H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL +#define UV1H_EVENT_OCCURRED0_XN_HCERR_SHFT 5 +#define UV1H_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL +#define UV1H_EVENT_OCCURRED0_SI_HCERR_SHFT 6 +#define UV1H_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL +#define UV1H_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 +#define UV1H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL +#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 +#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL +#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 +#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL +#define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 +#define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL +#define UV1H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UV1H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 +#define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL +#define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 +#define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL +#define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 +#define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL +#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 +#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL +#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 +#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL +#define UV1H_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 +#define UV1H_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL +#define UV1H_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 +#define UV1H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL +#define UV1H_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 +#define UV1H_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL +#define UV1H_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 +#define UV1H_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL +#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 +#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL +#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 +#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL +#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 +#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL +#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 +#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL +#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 +#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL +#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 +#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL +#define UV1H_EVENT_OCCURRED0_LTC_INT_SHFT 43 +#define UV1H_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL +#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 +#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL +#define UV1H_EVENT_OCCURRED0_IPI_INT_SHFT 45 +#define UV1H_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL +#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 +#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL +#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 +#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL +#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 +#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL +#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 +#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL +#define UV1H_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 +#define UV1H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL +#define UV1H_EVENT_OCCURRED0_RTC0_SHFT 51 +#define UV1H_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL +#define UV1H_EVENT_OCCURRED0_RTC1_SHFT 52 +#define UV1H_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL +#define UV1H_EVENT_OCCURRED0_RTC2_SHFT 53 +#define UV1H_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL +#define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54 +#define UV1H_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL +#define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55 +#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL +#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 +#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL + +#define UV2H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UV2H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL +#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 +#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED0_RH_HCERR_SHFT 2 +#define UV2H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL +#define UV2H_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 +#define UV2H_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL +#define UV2H_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 +#define UV2H_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL +#define UV2H_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 +#define UV2H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL +#define UV2H_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 +#define UV2H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL +#define UV2H_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 +#define UV2H_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL +#define UV2H_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 +#define UV2H_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL +#define UV2H_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 +#define UV2H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL +#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 +#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UV2H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 +#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL +#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 +#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL +#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 +#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL +#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 +#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL +#define UV2H_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 +#define UV2H_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL +#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 +#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 +#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 +#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 +#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 +#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 +#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 +#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 +#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 +#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 +#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 +#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 +#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 +#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 +#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 +#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL +#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 +#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL +#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 +#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL +#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 +#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL +#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 +#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL +#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 +#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL +#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53 +#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL +#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 +#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL + union uvh_event_occurred0_u { unsigned long v; - struct uvh_event_occurred0_s { + struct uv1h_event_occurred0_s { unsigned long lb_hcerr : 1; /* RW, W1C */ unsigned long gr0_hcerr : 1; /* RW, W1C */ unsigned long gr1_hcerr : 1; /* RW, W1C */ @@ -250,14 +421,76 @@ union uvh_event_occurred0_u { unsigned long bau_data : 1; /* RW, W1C */ unsigned long power_management_req : 1; /* RW, W1C */ unsigned long rsvd_57_63 : 7; /* */ - } s; + } s1; + struct uv2h_event_occurred0_s { + unsigned long lb_hcerr : 1; /* RW */ + unsigned long qp_hcerr : 1; /* RW */ + unsigned long rh_hcerr : 1; /* RW */ + unsigned long lh0_hcerr : 1; /* RW */ + unsigned long lh1_hcerr : 1; /* RW */ + unsigned long gr0_hcerr : 1; /* RW */ + unsigned long gr1_hcerr : 1; /* RW */ + unsigned long ni0_hcerr : 1; /* RW */ + unsigned long ni1_hcerr : 1; /* RW */ + unsigned long lb_aoerr0 : 1; /* RW */ + unsigned long qp_aoerr0 : 1; /* RW */ + unsigned long rh_aoerr0 : 1; /* RW */ + unsigned long lh0_aoerr0 : 1; /* RW */ + unsigned long lh1_aoerr0 : 1; /* RW */ + unsigned long gr0_aoerr0 : 1; /* RW */ + unsigned long gr1_aoerr0 : 1; /* RW */ + unsigned long xb_aoerr0 : 1; /* RW */ + unsigned long rt_aoerr0 : 1; /* RW */ + unsigned long ni0_aoerr0 : 1; /* RW */ + unsigned long ni1_aoerr0 : 1; /* RW */ + unsigned long lb_aoerr1 : 1; /* RW */ + unsigned long qp_aoerr1 : 1; /* RW */ + unsigned long rh_aoerr1 : 1; /* RW */ + unsigned long lh0_aoerr1 : 1; /* RW */ + unsigned long lh1_aoerr1 : 1; /* RW */ + unsigned long gr0_aoerr1 : 1; /* RW */ + unsigned long gr1_aoerr1 : 1; /* RW */ + unsigned long xb_aoerr1 : 1; /* RW */ + unsigned long rt_aoerr1 : 1; /* RW */ + unsigned long ni0_aoerr1 : 1; /* RW */ + unsigned long ni1_aoerr1 : 1; /* RW */ + unsigned long system_shutdown_int : 1; /* RW */ + unsigned long lb_irq_int_0 : 1; /* RW */ + unsigned long lb_irq_int_1 : 1; /* RW */ + unsigned long lb_irq_int_2 : 1; /* RW */ + unsigned long lb_irq_int_3 : 1; /* RW */ + unsigned long lb_irq_int_4 : 1; /* RW */ + unsigned long lb_irq_int_5 : 1; /* RW */ + unsigned long lb_irq_int_6 : 1; /* RW */ + unsigned long lb_irq_int_7 : 1; /* RW */ + unsigned long lb_irq_int_8 : 1; /* RW */ + unsigned long lb_irq_int_9 : 1; /* RW */ + unsigned long lb_irq_int_10 : 1; /* RW */ + unsigned long lb_irq_int_11 : 1; /* RW */ + unsigned long lb_irq_int_12 : 1; /* RW */ + unsigned long lb_irq_int_13 : 1; /* RW */ + unsigned long lb_irq_int_14 : 1; /* RW */ + unsigned long lb_irq_int_15 : 1; /* RW */ + unsigned long l1_nmi_int : 1; /* RW */ + unsigned long stop_clock : 1; /* RW */ + unsigned long asic_to_l1 : 1; /* RW */ + unsigned long l1_to_asic : 1; /* RW */ + unsigned long la_seq_trigger : 1; /* RW */ + unsigned long ipi_int : 1; /* RW */ + unsigned long extio_int0 : 1; /* RW */ + unsigned long extio_int1 : 1; /* RW */ + unsigned long extio_int2 : 1; /* RW */ + unsigned long extio_int3 : 1; /* RW */ + unsigned long profile_int : 1; /* RW */ + unsigned long rsvd_59_63 : 5; /* */ + } s2; }; /* ========================================================================= */ /* UVH_EVENT_OCCURRED0_ALIAS */ /* ========================================================================= */ #define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL -#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0 +#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0 /* ========================================================================= */ /* UVH_GR0_TLB_INT0_CONFIG */ @@ -432,8 +665,16 @@ union uvh_int_cmpb_u { /* ========================================================================= */ #define UVH_INT_CMPC 0x22100UL -#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 -#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL +#define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0 +#define UV2H_INT_CMPC_REAL_TIME_CMPC_SHFT 0 +#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT (is_uv1_hub() ? \ + UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT : \ + UV2H_INT_CMPC_REAL_TIME_CMPC_SHFT) +#define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0xffffffffffffffUL +#define UV2H_INT_CMPC_REAL_TIME_CMPC_MASK 0xffffffffffffffUL +#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK (is_uv1_hub() ? \ + UV1H_INT_CMPC_REAL_TIME_CMPC_MASK : \ + UV2H_INT_CMPC_REAL_TIME_CMPC_MASK) union uvh_int_cmpc_u { unsigned long v; @@ -448,8 +689,16 @@ union uvh_int_cmpc_u { /* ========================================================================= */ #define UVH_INT_CMPD 0x22180UL -#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 -#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL +#define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0 +#define UV2H_INT_CMPD_REAL_TIME_CMPD_SHFT 0 +#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT (is_uv1_hub() ? \ + UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT : \ + UV2H_INT_CMPD_REAL_TIME_CMPD_SHFT) +#define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0xffffffffffffffUL +#define UV2H_INT_CMPD_REAL_TIME_CMPD_MASK 0xffffffffffffffUL +#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK (is_uv1_hub() ? \ + UV1H_INT_CMPD_REAL_TIME_CMPD_MASK : \ + UV2H_INT_CMPD_REAL_TIME_CMPD_MASK) union uvh_int_cmpd_u { unsigned long v; @@ -463,7 +712,7 @@ union uvh_int_cmpd_u { /* UVH_IPI_INT */ /* ========================================================================= */ #define UVH_IPI_INT 0x60500UL -#define UVH_IPI_INT_32 0x0348 +#define UVH_IPI_INT_32 0x348 #define UVH_IPI_INT_VECTOR_SHFT 0 #define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL @@ -493,7 +742,7 @@ union uvh_ipi_int_u { /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x009c0 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL @@ -515,7 +764,7 @@ union uvh_lb_bau_intd_payload_queue_first_u { /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x009c8 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL @@ -533,7 +782,7 @@ union uvh_lb_bau_intd_payload_queue_last_u { /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x009d0 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL @@ -551,7 +800,7 @@ union uvh_lb_bau_intd_payload_queue_tail_u { /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x0a68 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL @@ -585,6 +834,7 @@ union uvh_lb_bau_intd_payload_queue_tail_u { #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL + union uvh_lb_bau_intd_software_acknowledge_u { unsigned long v; struct uvh_lb_bau_intd_software_acknowledge_s { @@ -612,13 +862,13 @@ union uvh_lb_bau_intd_software_acknowledge_u { /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70 /* ========================================================================= */ /* UVH_LB_BAU_MISC_CONTROL */ /* ========================================================================= */ #define UVH_LB_BAU_MISC_CONTROL 0x320170UL -#define UVH_LB_BAU_MISC_CONTROL_32 0x00a10 +#define UVH_LB_BAU_MISC_CONTROL_32 0xa10 #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL @@ -628,8 +878,8 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL -#define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_SHFT 11 -#define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL +#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 +#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 @@ -650,8 +900,86 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL -#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48 -#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL + +#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 +#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL +#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 +#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL +#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 +#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL +#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 +#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL +#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 +#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL +#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 +#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL +#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 +#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL +#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 +#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL +#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 +#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL +#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 +#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL +#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 +#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL +#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 +#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL +#define UV1H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 +#define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL + +#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 +#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL +#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 +#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL +#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 +#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL +#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 +#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL +#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 +#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL +#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 +#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL +#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 +#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL +#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 +#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL +#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 +#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL +#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 +#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL +#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 +#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL +#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30 +#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31 +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34 +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL +#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35 +#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL +#define UV2H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 +#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL union uvh_lb_bau_misc_control_u { unsigned long v; @@ -660,7 +988,25 @@ union uvh_lb_bau_misc_control_u { unsigned long apic_mode : 1; /* RW */ unsigned long force_broadcast : 1; /* RW */ unsigned long force_lock_nop : 1; /* RW */ - unsigned long csi_agent_presence_vector : 3; /* RW */ + unsigned long qpi_agent_presence_vector : 3; /* RW */ + unsigned long descriptor_fetch_mode : 1; /* RW */ + unsigned long enable_intd_soft_ack_mode : 1; /* RW */ + unsigned long intd_soft_ack_timeout_period : 4; /* RW */ + unsigned long enable_dual_mapping_mode : 1; /* RW */ + unsigned long vga_io_port_decode_enable : 1; /* RW */ + unsigned long vga_io_port_16_bit_decode : 1; /* RW */ + unsigned long suppress_dest_registration : 1; /* RW */ + unsigned long programmed_initial_priority : 3; /* RW */ + unsigned long use_incoming_priority : 1; /* RW */ + unsigned long enable_programmed_initial_priority : 1; /* RW */ + unsigned long rsvd_29_63 : 35; + } s; + struct uv1h_lb_bau_misc_control_s { + unsigned long rejection_delay : 8; /* RW */ + unsigned long apic_mode : 1; /* RW */ + unsigned long force_broadcast : 1; /* RW */ + unsigned long force_lock_nop : 1; /* RW */ + unsigned long qpi_agent_presence_vector : 3; /* RW */ unsigned long descriptor_fetch_mode : 1; /* RW */ unsigned long enable_intd_soft_ack_mode : 1; /* RW */ unsigned long intd_soft_ack_timeout_period : 4; /* RW */ @@ -673,14 +1019,40 @@ union uvh_lb_bau_misc_control_u { unsigned long enable_programmed_initial_priority : 1; /* RW */ unsigned long rsvd_29_47 : 19; /* */ unsigned long fun : 16; /* RW */ - } s; + } s1; + struct uv2h_lb_bau_misc_control_s { + unsigned long rejection_delay : 8; /* RW */ + unsigned long apic_mode : 1; /* RW */ + unsigned long force_broadcast : 1; /* RW */ + unsigned long force_lock_nop : 1; /* RW */ + unsigned long qpi_agent_presence_vector : 3; /* RW */ + unsigned long descriptor_fetch_mode : 1; /* RW */ + unsigned long enable_intd_soft_ack_mode : 1; /* RW */ + unsigned long intd_soft_ack_timeout_period : 4; /* RW */ + unsigned long enable_dual_mapping_mode : 1; /* RW */ + unsigned long vga_io_port_decode_enable : 1; /* RW */ + unsigned long vga_io_port_16_bit_decode : 1; /* RW */ + unsigned long suppress_dest_registration : 1; /* RW */ + unsigned long programmed_initial_priority : 3; /* RW */ + unsigned long use_incoming_priority : 1; /* RW */ + unsigned long enable_programmed_initial_priority : 1; /* RW */ + unsigned long enable_automatic_apic_mode_selection : 1; /* RW */ + unsigned long apic_mode_status : 1; /* RO */ + unsigned long suppress_interrupts_to_self : 1; /* RW */ + unsigned long enable_lock_based_system_flush : 1; /* RW */ + unsigned long enable_extended_sb_status : 1; /* RW */ + unsigned long suppress_int_prio_udt_to_self : 1; /* RW */ + unsigned long use_legacy_descriptor_formats : 1; /* RW */ + unsigned long rsvd_36_47 : 12; /* */ + unsigned long fun : 16; /* RW */ + } s2; }; /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x009a8 +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL @@ -703,7 +1075,7 @@ union uvh_lb_bau_sb_activation_control_u { /* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x009b0 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL @@ -719,7 +1091,7 @@ union uvh_lb_bau_sb_activation_status_0_u { /* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x009b8 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL @@ -735,7 +1107,7 @@ union uvh_lb_bau_sb_activation_status_1_u { /* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ /* ========================================================================= */ #define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x009a0 +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL @@ -753,23 +1125,6 @@ union uvh_lb_bau_sb_descriptor_base_u { } s; }; -/* ========================================================================= */ -/* UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK */ -/* ========================================================================= */ -#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL -#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0 - -#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0 -#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL - -union uvh_lb_target_physical_apic_id_mask_u { - unsigned long v; - struct uvh_lb_target_physical_apic_id_mask_s { - unsigned long bit_enables : 32; /* RW */ - unsigned long rsvd_32_63 : 32; /* */ - } s; -}; - /* ========================================================================= */ /* UVH_NODE_ID */ /* ========================================================================= */ @@ -785,14 +1140,48 @@ union uvh_lb_target_physical_apic_id_mask_u { #define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL #define UVH_NODE_ID_NODE_ID_SHFT 32 #define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL -#define UVH_NODE_ID_NODES_PER_BIT_SHFT 48 -#define UVH_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL -#define UVH_NODE_ID_NI_PORT_SHFT 56 -#define UVH_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL + +#define UV1H_NODE_ID_FORCE1_SHFT 0 +#define UV1H_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UV1H_NODE_ID_MANUFACTURER_SHFT 1 +#define UV1H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UV1H_NODE_ID_PART_NUMBER_SHFT 12 +#define UV1H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UV1H_NODE_ID_REVISION_SHFT 28 +#define UV1H_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UV1H_NODE_ID_NODE_ID_SHFT 32 +#define UV1H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UV1H_NODE_ID_NODES_PER_BIT_SHFT 48 +#define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL +#define UV1H_NODE_ID_NI_PORT_SHFT 56 +#define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL + +#define UV2H_NODE_ID_FORCE1_SHFT 0 +#define UV2H_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UV2H_NODE_ID_MANUFACTURER_SHFT 1 +#define UV2H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UV2H_NODE_ID_PART_NUMBER_SHFT 12 +#define UV2H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UV2H_NODE_ID_REVISION_SHFT 28 +#define UV2H_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UV2H_NODE_ID_NODE_ID_SHFT 32 +#define UV2H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UV2H_NODE_ID_NODES_PER_BIT_SHFT 50 +#define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL +#define UV2H_NODE_ID_NI_PORT_SHFT 57 +#define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL union uvh_node_id_u { unsigned long v; struct uvh_node_id_s { + unsigned long force1 : 1; /* RO */ + unsigned long manufacturer : 11; /* RO */ + unsigned long part_number : 16; /* RO */ + unsigned long revision : 4; /* RO */ + unsigned long node_id : 15; /* RW */ + unsigned long rsvd_47_63 : 17; + } s; + struct uv1h_node_id_s { unsigned long force1 : 1; /* RO */ unsigned long manufacturer : 11; /* RO */ unsigned long part_number : 16; /* RO */ @@ -803,7 +1192,18 @@ union uvh_node_id_u { unsigned long rsvd_55 : 1; /* */ unsigned long ni_port : 4; /* RO */ unsigned long rsvd_60_63 : 4; /* */ - } s; + } s1; + struct uv2h_node_id_s { + unsigned long force1 : 1; /* RO */ + unsigned long manufacturer : 11; /* RO */ + unsigned long part_number : 16; /* RO */ + unsigned long revision : 4; /* RO */ + unsigned long node_id : 15; /* RW */ + unsigned long rsvd_47_49 : 3; /* */ + unsigned long nodes_per_bit : 7; /* RO */ + unsigned long ni_port : 5; /* RO */ + unsigned long rsvd_62_63 : 2; /* */ + } s2; }; /* ========================================================================= */ @@ -954,18 +1354,38 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { #define UVH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL #define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 #define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL -#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12 -#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL + +#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 +#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL +#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 +#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL +#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12 +#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL + +#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 +#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL +#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 +#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL union uvh_rh_gam_config_mmr_u { unsigned long v; struct uvh_rh_gam_config_mmr_s { + unsigned long m_skt : 6; /* RW */ + unsigned long n_skt : 4; /* RW */ + unsigned long rsvd_10_63 : 54; + } s; + struct uv1h_rh_gam_config_mmr_s { unsigned long m_skt : 6; /* RW */ unsigned long n_skt : 4; /* RW */ unsigned long rsvd_10_11: 2; /* */ unsigned long mmiol_cfg : 1; /* RW */ unsigned long rsvd_13_63: 51; /* */ - } s; + } s1; + struct uv2h_rh_gam_config_mmr_s { + unsigned long m_skt : 6; /* RW */ + unsigned long n_skt : 4; /* RW */ + unsigned long rsvd_10_63: 54; /* */ + } s2; }; /* ========================================================================= */ @@ -975,16 +1395,32 @@ union uvh_rh_gam_config_mmr_u { #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL union uvh_rh_gam_gru_overlay_config_mmr_u { unsigned long v; struct uvh_rh_gam_gru_overlay_config_mmr_s { + unsigned long rsvd_0_27: 28; /* */ + unsigned long base : 18; /* RW */ + unsigned long rsvd_46_62 : 17; + unsigned long enable : 1; /* RW */ + } s; + struct uv1h_rh_gam_gru_overlay_config_mmr_s { unsigned long rsvd_0_27: 28; /* */ unsigned long base : 18; /* RW */ unsigned long rsvd_46_47: 2; /* */ @@ -993,7 +1429,15 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { unsigned long n_gru : 4; /* RW */ unsigned long rsvd_56_62: 7; /* */ unsigned long enable : 1; /* RW */ - } s; + } s1; + struct uv2h_rh_gam_gru_overlay_config_mmr_s { + unsigned long rsvd_0_27: 28; /* */ + unsigned long base : 18; /* RW */ + unsigned long rsvd_46_51: 6; /* */ + unsigned long n_gru : 4; /* RW */ + unsigned long rsvd_56_62: 7; /* */ + unsigned long enable : 1; /* RW */ + } s2; }; /* ========================================================================= */ @@ -1001,25 +1445,42 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { /* ========================================================================= */ #define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 27 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff8000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL union uvh_rh_gam_mmioh_overlay_config_mmr_u { unsigned long v; - struct uvh_rh_gam_mmioh_overlay_config_mmr_s { + struct uv1h_rh_gam_mmioh_overlay_config_mmr_s { unsigned long rsvd_0_29: 30; /* */ unsigned long base : 16; /* RW */ unsigned long m_io : 6; /* RW */ unsigned long n_io : 4; /* RW */ unsigned long rsvd_56_62: 7; /* */ unsigned long enable : 1; /* RW */ - } s; + } s1; + struct uv2h_rh_gam_mmioh_overlay_config_mmr_s { + unsigned long rsvd_0_26: 27; /* */ + unsigned long base : 19; /* RW */ + unsigned long m_io : 6; /* RW */ + unsigned long n_io : 4; /* RW */ + unsigned long rsvd_56_62: 7; /* */ + unsigned long enable : 1; /* RW */ + } s2; }; /* ========================================================================= */ @@ -1029,20 +1490,40 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u { #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL union uvh_rh_gam_mmr_overlay_config_mmr_u { unsigned long v; struct uvh_rh_gam_mmr_overlay_config_mmr_s { + unsigned long rsvd_0_25: 26; /* */ + unsigned long base : 20; /* RW */ + unsigned long rsvd_46_62 : 17; + unsigned long enable : 1; /* RW */ + } s; + struct uv1h_rh_gam_mmr_overlay_config_mmr_s { unsigned long rsvd_0_25: 26; /* */ unsigned long base : 20; /* RW */ unsigned long dual_hub : 1; /* RW */ unsigned long rsvd_47_62: 16; /* */ unsigned long enable : 1; /* RW */ - } s; + } s1; + struct uv2h_rh_gam_mmr_overlay_config_mmr_s { + unsigned long rsvd_0_25: 26; /* */ + unsigned long base : 20; /* RW */ + unsigned long rsvd_46_62: 17; /* */ + unsigned long enable : 1; /* RW */ + } s2; }; /* ========================================================================= */ @@ -1103,10 +1584,11 @@ union uvh_rtc1_int_config_u { /* UVH_SCRATCH5 */ /* ========================================================================= */ #define UVH_SCRATCH5 0x2d0200UL -#define UVH_SCRATCH5_32 0x00778 +#define UVH_SCRATCH5_32 0x778 #define UVH_SCRATCH5_SCRATCH5_SHFT 0 #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + union uvh_scratch5_u { unsigned long v; struct uvh_scratch5_s { @@ -1114,4 +1596,154 @@ union uvh_scratch5_u { } s; }; +/* ========================================================================= */ +/* UV2H_EVENT_OCCURRED2 */ +/* ========================================================================= */ +#define UV2H_EVENT_OCCURRED2 0x70100UL +#define UV2H_EVENT_OCCURRED2_32 0xb68 + +#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 +#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL +#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 +#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2 +#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL +#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3 +#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL +#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4 +#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL +#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5 +#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL +#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6 +#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL +#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7 +#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL +#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8 +#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL +#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9 +#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL +#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10 +#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11 +#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL +#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12 +#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL +#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13 +#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL +#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14 +#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL +#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15 +#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL +#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16 +#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL +#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17 +#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18 +#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19 +#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20 +#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21 +#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22 +#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23 +#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24 +#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25 +#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26 +#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27 +#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28 +#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29 +#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30 +#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31 +#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL + +union uv2h_event_occurred2_u { + unsigned long v; + struct uv2h_event_occurred2_s { + unsigned long rtc_0 : 1; /* RW */ + unsigned long rtc_1 : 1; /* RW */ + unsigned long rtc_2 : 1; /* RW */ + unsigned long rtc_3 : 1; /* RW */ + unsigned long rtc_4 : 1; /* RW */ + unsigned long rtc_5 : 1; /* RW */ + unsigned long rtc_6 : 1; /* RW */ + unsigned long rtc_7 : 1; /* RW */ + unsigned long rtc_8 : 1; /* RW */ + unsigned long rtc_9 : 1; /* RW */ + unsigned long rtc_10 : 1; /* RW */ + unsigned long rtc_11 : 1; /* RW */ + unsigned long rtc_12 : 1; /* RW */ + unsigned long rtc_13 : 1; /* RW */ + unsigned long rtc_14 : 1; /* RW */ + unsigned long rtc_15 : 1; /* RW */ + unsigned long rtc_16 : 1; /* RW */ + unsigned long rtc_17 : 1; /* RW */ + unsigned long rtc_18 : 1; /* RW */ + unsigned long rtc_19 : 1; /* RW */ + unsigned long rtc_20 : 1; /* RW */ + unsigned long rtc_21 : 1; /* RW */ + unsigned long rtc_22 : 1; /* RW */ + unsigned long rtc_23 : 1; /* RW */ + unsigned long rtc_24 : 1; /* RW */ + unsigned long rtc_25 : 1; /* RW */ + unsigned long rtc_26 : 1; /* RW */ + unsigned long rtc_27 : 1; /* RW */ + unsigned long rtc_28 : 1; /* RW */ + unsigned long rtc_29 : 1; /* RW */ + unsigned long rtc_30 : 1; /* RW */ + unsigned long rtc_31 : 1; /* RW */ + unsigned long rsvd_32_63: 32; /* */ + } s1; +}; + +/* ========================================================================= */ +/* UV2H_EVENT_OCCURRED2_ALIAS */ +/* ========================================================================= */ +#define UV2H_EVENT_OCCURRED2_ALIAS 0x70108UL +#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70 + +/* ========================================================================= */ +/* UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 */ +/* ========================================================================= */ +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 + +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL + +union uv2h_lb_bau_sb_activation_status_2_u { + unsigned long v; + struct uv2h_lb_bau_sb_activation_status_2_s { + unsigned long aux_error : 64; /* RW */ + } s1; +}; + +/* ========================================================================= */ +/* UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK */ +/* ========================================================================= */ +#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL +#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x9f0 + +#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0 +#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL + +union uv1h_lb_target_physical_apic_id_mask_u { + unsigned long v; + struct uv1h_lb_target_physical_apic_id_mask_s { + unsigned long bit_enables : 32; /* RW */ + unsigned long rsvd_32_63 : 32; /* */ + } s1; +}; + + #endif /* __ASM_UV_MMRS_X86_H__ */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f450b683dfc..b511a011b7d 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -91,6 +91,10 @@ static int __init early_get_pnodeid(void) m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); uv_min_hub_revision_id = node_id.s.revision; + if (node_id.s.part_number == UV2_HUB_PART_NUMBER) + uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; + + uv_hub_info->hub_revision = uv_min_hub_revision_id; pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); return pnode; } @@ -112,17 +116,25 @@ static void __init early_get_apic_pnode_shift(void) */ static void __init uv_set_apicid_hibit(void) { - union uvh_lb_target_physical_apic_id_mask_u apicid_mask; + union uv1h_lb_target_physical_apic_id_mask_u apicid_mask; - apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK); - uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; + if (is_uv1_hub()) { + apicid_mask.v = + uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK); + uv_apicid_hibits = + apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK; + } } static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - int pnodeid; + int pnodeid, is_uv1, is_uv2; - if (!strcmp(oem_id, "SGI")) { + is_uv1 = !strcmp(oem_id, "SGI"); + is_uv2 = !strcmp(oem_id, "SGI2"); + if (is_uv1 || is_uv2) { + uv_hub_info->hub_revision = + is_uv1 ? UV1_HUB_REVISION_BASE : UV2_HUB_REVISION_BASE; pnodeid = early_get_pnodeid(); early_get_apic_pnode_shift(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; @@ -484,12 +496,19 @@ static __init void map_mmr_high(int max_pnode) static __init void map_mmioh_high(int max_pnode) { union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; - int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + int shift; mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); - if (mmioh.s.enable) - map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io, + if (is_uv1_hub() && mmioh.s1.enable) { + shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + map_high("MMIOH", mmioh.s1.base, shift, mmioh.s1.m_io, + max_pnode, map_uc); + } + if (is_uv2_hub() && mmioh.s2.enable) { + shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + map_high("MMIOH", mmioh.s2.base, shift, mmioh.s2.m_io, max_pnode, map_uc); + } } static __init void map_low_mmrs(void) @@ -736,13 +755,14 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask, pnode_io_mask; + printk(KERN_INFO "UV: Found %s hub\n", is_uv1_hub() ? "UV1" : "UV2"); map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); - n_io = mmioh.s.n_io; + n_io = is_uv1_hub() ? mmioh.s1.n_io : mmioh.s2.n_io; mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE; @@ -811,6 +831,8 @@ void __init uv_system_init(void) */ uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; + uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision; + pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); lcpu = uv_blade_info[blade].nr_possible_cpus; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index c58e0ea39ef..a9856c09c42 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -397,16 +397,13 @@ end_uvhub_quiesce(struct bau_control *hmaster) * Wait for completion of a broadcast software ack message * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP */ -static int uv_wait_completion(struct bau_desc *bau_desc, +static int uv1_wait_completion(struct bau_desc *bau_desc, unsigned long mmr_offset, int right_shift, int this_cpu, struct bau_control *bcp, struct bau_control *smaster, long try) { unsigned long descriptor_status; cycles_t ttime; struct ptc_stats *stat = bcp->statp; - struct bau_control *hmaster; - - hmaster = bcp->uvhub_master; /* spin on the status MMR, waiting for it to go idle */ while ((descriptor_status = (((unsigned long) @@ -414,16 +411,76 @@ static int uv_wait_completion(struct bau_desc *bau_desc, right_shift) & UV_ACT_STATUS_MASK)) != DESC_STATUS_IDLE) { /* - * Our software ack messages may be blocked because there are - * no swack resources available. As long as none of them - * has timed out hardware will NACK our message and its - * state will stay IDLE. + * Our software ack messages may be blocked because + * there are no swack resources available. As long + * as none of them has timed out hardware will NACK + * our message and its state will stay IDLE. */ if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { stat->s_stimeout++; return FLUSH_GIVEUP; } else if (descriptor_status == - DESC_STATUS_DESTINATION_TIMEOUT) { + DESC_STATUS_DESTINATION_TIMEOUT) { + stat->s_dtimeout++; + ttime = get_cycles(); + + /* + * Our retries may be blocked by all destination + * swack resources being consumed, and a timeout + * pending. In that case hardware returns the + * ERROR that looks like a destination timeout. + */ + if (cycles_2_us(ttime - bcp->send_message) < + timeout_us) { + bcp->conseccompletes = 0; + return FLUSH_RETRY_PLUGGED; + } + + bcp->conseccompletes = 0; + return FLUSH_RETRY_TIMEOUT; + } else { + /* + * descriptor_status is still BUSY + */ + cpu_relax(); + } + } + bcp->conseccompletes++; + return FLUSH_COMPLETE; +} + +static int uv2_wait_completion(struct bau_desc *bau_desc, + unsigned long mmr_offset, int right_shift, int this_cpu, + struct bau_control *bcp, struct bau_control *smaster, long try) +{ + unsigned long descriptor_status; + unsigned long descriptor_status2; + int cpu; + cycles_t ttime; + struct ptc_stats *stat = bcp->statp; + + /* UV2 has an extra bit of status */ + cpu = bcp->uvhub_cpu; + /* spin on the status MMR, waiting for it to go idle */ + descriptor_status = (((unsigned long)(uv_read_local_mmr + (mmr_offset)) >> right_shift) & UV_ACT_STATUS_MASK); + descriptor_status2 = (((unsigned long)uv_read_local_mmr + (UV2H_LB_BAU_SB_ACTIVATION_STATUS_2) >> cpu) & 0x1UL); + descriptor_status = (descriptor_status << 1) | + descriptor_status2; + while (descriptor_status != UV2H_DESC_IDLE) { + /* + * Our software ack messages may be blocked because + * there are no swack resources available. As long + * as none of them has timed out hardware will NACK + * our message and its state will stay IDLE. + */ + if ((descriptor_status == UV2H_DESC_SOURCE_TIMEOUT) || + (descriptor_status == UV2H_DESC_DEST_STRONG_NACK) || + (descriptor_status == UV2H_DESC_DEST_PUT_ERR)) { + stat->s_stimeout++; + return FLUSH_GIVEUP; + } else if (descriptor_status == UV2H_DESC_DEST_TIMEOUT) { stat->s_dtimeout++; ttime = get_cycles(); @@ -447,11 +504,31 @@ static int uv_wait_completion(struct bau_desc *bau_desc, */ cpu_relax(); } + descriptor_status = (((unsigned long)(uv_read_local_mmr + (mmr_offset)) >> right_shift) & + UV_ACT_STATUS_MASK); + descriptor_status2 = (((unsigned long)uv_read_local_mmr + (UV2H_LB_BAU_SB_ACTIVATION_STATUS_2) >> cpu) & + 0x1UL); + descriptor_status = (descriptor_status << 1) | + descriptor_status2; } bcp->conseccompletes++; return FLUSH_COMPLETE; } +static int uv_wait_completion(struct bau_desc *bau_desc, + unsigned long mmr_offset, int right_shift, int this_cpu, + struct bau_control *bcp, struct bau_control *smaster, long try) +{ + if (is_uv1_hub()) + return uv1_wait_completion(bau_desc, mmr_offset, right_shift, + this_cpu, bcp, smaster, try); + else + return uv2_wait_completion(bau_desc, mmr_offset, right_shift, + this_cpu, bcp, smaster, try); +} + static inline cycles_t sec_2_cycles(unsigned long sec) { @@ -585,7 +662,8 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc, struct bau_control *smaster = bcp->socket_master; struct bau_control *hmaster = bcp->uvhub_master; - if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, + if (is_uv1_hub() && + !atomic_inc_unless_ge(&hmaster->uvhub_lock, &hmaster->active_descriptor_count, hmaster->max_bau_concurrent)) { stat->s_throttles++; @@ -899,12 +977,17 @@ static void __init uv_enable_timeouts(void) uv_write_global_mmr64 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); /* + * UV1: * Subsequent reversals of the timebase bit (3) cause an * immediate timeout of one or all INTD resources as * indicated in bits 2:0 (7 causes all of them to timeout). */ mmr_image |= ((unsigned long)1 << UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); + if (is_uv2_hub()) { + mmr_image |= ((unsigned long)1 << UV2_LEG_SHFT); + mmr_image |= ((unsigned long)1 << UV2_EXT_SHFT); + } uv_write_global_mmr64 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); } @@ -1486,14 +1569,27 @@ calculate_destination_timeout(void) int ret; unsigned long ts_ns; - mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; - mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); - index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; - mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); - mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; - base = timeout_base_ns[index]; - ts_ns = base * mult1 * mult2; - ret = ts_ns / 1000; + if (is_uv1_hub()) { + mult1 = UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD & + BAU_MISC_CONTROL_MULT_MASK; + mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); + index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; + mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); + mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; + base = timeout_base_ns[index]; + ts_ns = base * mult1 * mult2; + ret = ts_ns / 1000; + } else { + /* 4 bits 0/1 for 10/80us, 3 bits of multiplier */ + mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); + mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT; + if (mmr_image & ((unsigned long)1 << UV2_ACK_UNITS_SHFT)) + mult1 = 80; + else + mult1 = 10; + base = mmr_image & UV2_ACK_MASK; + ret = mult1 * base; + } return ret; } diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 0eb90184515..9f29a01ee1b 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -99,8 +99,12 @@ static void uv_rtc_send_IPI(int cpu) /* Check for an RTC interrupt pending */ static int uv_intr_pending(int pnode) { - return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & - UVH_EVENT_OCCURRED0_RTC1_MASK; + if (is_uv1_hub()) + return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & + UV1H_EVENT_OCCURRED0_RTC1_MASK; + else + return uv_read_global_mmr64(pnode, UV2H_EVENT_OCCURRED2) & + UV2H_EVENT_OCCURRED2_RTC_1_MASK; } /* Setup interrupt and return non-zero if early expiration occurred. */ @@ -114,8 +118,12 @@ static int uv_setup_intr(int cpu, u64 expires) UVH_RTC1_INT_CONFIG_M_MASK); uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L); - uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, - UVH_EVENT_OCCURRED0_RTC1_MASK); + if (is_uv1_hub()) + uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, + UV1H_EVENT_OCCURRED0_RTC1_MASK); + else + uv_write_global_mmr64(pnode, UV2H_EVENT_OCCURRED2_ALIAS, + UV2H_EVENT_OCCURRED2_RTC_1_MASK); val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); -- cgit v1.2.3-70-g09d2 From f073cc8f39b48fdf4c8cd9520a6028fe69199b60 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Tue, 24 May 2011 13:07:36 -0500 Subject: x86, UV: Clean up uv_tlb.c SGI UV's uv_tlb.c driver has become rather hard to read, with overly large functions, non-standard coding style and (way) too long variable, constant and function names and non-obvious code flow sequences. This patch improves the readability and maintainability of the driver significantly, by doing the following strict code cleanups with no side effects: - Split long functions into shorter logical functions. - Shortened some variable and structure member names. - Added special functions for reads and writes of MMR regs with very long names. - Added the 'tunables' table to shortened tunables_write(). - Added the 'stat_description' table to shorten uv_ptc_proc_write(). - Pass fewer 'stat' arguments where it can be derived from the 'bcp' argument. - Function definitions consistent on one line, and inline in few (short) cases. - Moved some small structures and an atomic inline function to the header file. - Moved some local variables to the blocks where they are used. - Updated the copyright date. - Shortened uv_write_global_mmr64() etc. using some aliasing; no line breaks. Renamed many uv_.. functions that are not exported. - Aligned structure fields. [ note that not all structures are aligned the same way though; I'd like to keep the extensive commenting in some of them. ] - Shortened some long structure names. - Standard pass/fail exit from init_per_cpu() - Vertical alignment for mass initializations. - More separation between blocks of code. Tested on a 16-processor Altix UV. Signed-off-by: Cliff Wickman Cc: penberg@kernel.org Link: http://lkml.kernel.org/r/E1QOw12-0004MN-Lp@eag09.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 554 +++++++++------ arch/x86/platform/uv/tlb_uv.c | 1444 ++++++++++++++++++++------------------ 2 files changed, 1099 insertions(+), 899 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 0652a5a9fd6..a291c40efd4 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -5,7 +5,7 @@ * * SGI UV Broadcast Assist Unit definitions * - * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2008-2011 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_BAU_H @@ -35,9 +35,9 @@ #define MAX_CPUS_PER_UVHUB 64 #define MAX_CPUS_PER_SOCKET 32 -#define UV_ADP_SIZE 64 /* hardware-provided max. */ -#define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */ -#define UV_ITEMS_PER_DESCRIPTOR 8 +#define ADP_SZ 64 /* hardware-provided max. */ +#define UV_CPUS_PER_AS 32 /* hardware-provided max. */ +#define ITEMS_PER_DESC 8 /* the 'throttle' to prevent the hardware stay-busy bug */ #define MAX_BAU_CONCURRENT 3 #define UV_ACT_STATUS_MASK 0x3 @@ -48,7 +48,7 @@ #define UV2_NET_ENDPOINT_INTD 0x28 #define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \ UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD) -#define UV_DESC_BASE_PNODE_SHIFT 49 +#define UV_DESC_PSHIFT 49 #define UV_PAYLOADQ_PNODE_SHIFT 49 #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" #define UV_BAU_BASENAME "sgi_uv/bau_tunables" @@ -56,7 +56,8 @@ #define UV_BAU_TUNABLES_FILE "bau_tunables" #define WHITESPACE " \t\n" #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) - +#define cpubit_isset(cpu, bau_local_cpumask) \ + test_bit((cpu), (bau_local_cpumask).bits) /* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */ /* @@ -72,25 +73,37 @@ UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD : \ UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD) -#define BAU_MISC_CONTROL_MULT_MASK 3 +#define BAU_MISC_CONTROL_MULT_MASK 3 -#define UVH_AGING_PRESCALE_SEL 0x000000b000UL +#define UVH_AGING_PRESCALE_SEL 0x000000b000UL /* [30:28] URGENCY_7 an index into a table of times */ -#define BAU_URGENCY_7_SHIFT 28 -#define BAU_URGENCY_7_MASK 7 +#define BAU_URGENCY_7_SHIFT 28 +#define BAU_URGENCY_7_MASK 7 -#define UVH_TRANSACTION_TIMEOUT 0x000000b200UL +#define UVH_TRANSACTION_TIMEOUT 0x000000b200UL /* [45:40] BAU - BAU transaction timeout select - a multiplier */ -#define BAU_TRANS_SHIFT 40 -#define BAU_TRANS_MASK 0x3f +#define BAU_TRANS_SHIFT 40 +#define BAU_TRANS_MASK 0x3f + +/* + * shorten some awkward names + */ +#define AS_PUSH_SHIFT UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT +#define SOFTACK_MSHIFT UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT +#define SOFTACK_PSHIFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT +#define SOFTACK_TIMEOUT_PERIOD UV_INTD_SOFT_ACK_TIMEOUT_PERIOD +#define write_gmmr uv_write_global_mmr64 +#define write_lmmr uv_write_local_mmr +#define read_lmmr uv_read_local_mmr +#define read_gmmr uv_read_global_mmr64 /* * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 */ -#define DESC_STATUS_IDLE 0 -#define DESC_STATUS_ACTIVE 1 -#define DESC_STATUS_DESTINATION_TIMEOUT 2 -#define DESC_STATUS_SOURCE_TIMEOUT 3 +#define DS_IDLE 0 +#define DS_ACTIVE 1 +#define DS_DESTINATION_TIMEOUT 2 +#define DS_SOURCE_TIMEOUT 3 /* * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2 * values 1 and 5 will not occur @@ -111,22 +124,22 @@ * threshholds at which to use IPI to free resources */ /* after this # consecutive 'plugged' timeouts, use IPI to release resources */ -#define PLUGSB4RESET 100 +#define PLUGSB4RESET 100 /* after this many consecutive timeouts, use IPI to release resources */ -#define TIMEOUTSB4RESET 1 +#define TIMEOUTSB4RESET 1 /* at this number uses of IPI to release resources, giveup the request */ -#define IPI_RESET_LIMIT 1 +#define IPI_RESET_LIMIT 1 /* after this # consecutive successes, bump up the throttle if it was lowered */ -#define COMPLETE_THRESHOLD 5 +#define COMPLETE_THRESHOLD 5 -#define UV_LB_SUBNODEID 0x10 +#define UV_LB_SUBNODEID 0x10 /* these two are the same for UV1 and UV2: */ #define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT #define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK /* 4 bits of software ack period */ -#define UV2_ACK_MASK 0x7UL -#define UV2_ACK_UNITS_SHFT 3 +#define UV2_ACK_MASK 0x7UL +#define UV2_ACK_UNITS_SHFT 3 #define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT #define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT @@ -149,9 +162,16 @@ /* * tuning the action when the numalink network is extremely delayed */ -#define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in microseconds */ -#define CONGESTED_REPS 10 /* long delays averaged over this many broadcasts */ -#define CONGESTED_PERIOD 30 /* time for the bau to be disabled, in seconds */ +#define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in + microseconds */ +#define CONGESTED_REPS 10 /* long delays averaged over + this many broadcasts */ +#define CONGESTED_PERIOD 30 /* time for the bau to be + disabled, in seconds */ +/* see msg_type: */ +#define MSG_NOOP 0 +#define MSG_REGULAR 1 +#define MSG_RETRY 2 /* * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) @@ -163,8 +183,8 @@ * 'base_dest_nasid' field of the header corresponds to the * destination nodeID associated with that specified bit. */ -struct bau_target_uvhubmask { - unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; +struct bau_targ_hubmask { + unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; }; /* @@ -173,7 +193,7 @@ struct bau_target_uvhubmask { * enough bits for max. cpu's per uvhub) */ struct bau_local_cpumask { - unsigned long bits; + unsigned long bits; }; /* @@ -194,14 +214,14 @@ struct bau_local_cpumask { * The payload is software-defined for INTD transactions */ struct bau_msg_payload { - unsigned long address; /* signifies a page or all TLB's - of the cpu */ + unsigned long address; /* signifies a page or all + TLB's of the cpu */ /* 64 bits */ - unsigned short sending_cpu; /* filled in by sender */ + unsigned short sending_cpu; /* filled in by sender */ /* 16 bits */ - unsigned short acknowledge_count;/* filled in by destination */ + unsigned short acknowledge_count; /* filled in by destination */ /* 16 bits */ - unsigned int reserved1:32; /* not usable */ + unsigned int reserved1:32; /* not usable */ }; @@ -210,93 +230,96 @@ struct bau_msg_payload { * see table 4.2.3.0.1 in broacast_assist spec. */ struct bau_msg_header { - unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ + unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ /* bits 5:0 */ - unsigned int base_dest_nasid:15; /* nasid of the */ - /* bits 20:6 */ /* first bit in uvhub map */ - unsigned int command:8; /* message type */ + unsigned int base_dest_nasid:15; /* nasid of the first bit */ + /* bits 20:6 */ /* in uvhub map */ + unsigned int command:8; /* message type */ /* bits 28:21 */ - /* 0x38: SN3net EndPoint Message */ - unsigned int rsvd_1:3; /* must be zero */ + /* 0x38: SN3net EndPoint Message */ + unsigned int rsvd_1:3; /* must be zero */ /* bits 31:29 */ - /* int will align on 32 bits */ - unsigned int rsvd_2:9; /* must be zero */ + /* int will align on 32 bits */ + unsigned int rsvd_2:9; /* must be zero */ /* bits 40:32 */ - /* Suppl_A is 56-41 */ - unsigned int sequence:16;/* message sequence number */ - /* bits 56:41 */ /* becomes bytes 16-17 of msg */ - /* Address field (96:57) is never used as an - address (these are address bits 42:3) */ - - unsigned int rsvd_3:1; /* must be zero */ + /* Suppl_A is 56-41 */ + unsigned int sequence:16; /* message sequence number */ + /* bits 56:41 */ /* becomes bytes 16-17 of msg */ + /* Address field (96:57) is + never used as an address + (these are address bits + 42:3) */ + + unsigned int rsvd_3:1; /* must be zero */ /* bit 57 */ - /* address bits 27:4 are payload */ + /* address bits 27:4 are payload */ /* these next 24 (58-81) bits become bytes 12-14 of msg */ - /* bits 65:58 land in byte 12 */ - unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ + unsigned int replied_to:1; /* sent as 0 by the source to + byte 12 */ /* bit 58 */ - unsigned int msg_type:3; /* software type of the message*/ + unsigned int msg_type:3; /* software type of the + message */ /* bits 61:59 */ - unsigned int canceled:1; /* message canceled, resource to be freed*/ + unsigned int canceled:1; /* message canceled, resource + is to be freed*/ /* bit 62 */ - unsigned int payload_1a:1;/* not currently used */ + unsigned int payload_1a:1; /* not currently used */ /* bit 63 */ - unsigned int payload_1b:2;/* not currently used */ + unsigned int payload_1b:2; /* not currently used */ /* bits 65:64 */ /* bits 73:66 land in byte 13 */ - unsigned int payload_1ca:6;/* not currently used */ + unsigned int payload_1ca:6; /* not currently used */ /* bits 71:66 */ - unsigned int payload_1c:2;/* not currently used */ + unsigned int payload_1c:2; /* not currently used */ /* bits 73:72 */ /* bits 81:74 land in byte 14 */ - unsigned int payload_1d:6;/* not currently used */ + unsigned int payload_1d:6; /* not currently used */ /* bits 79:74 */ - unsigned int payload_1e:2;/* not currently used */ + unsigned int payload_1e:2; /* not currently used */ /* bits 81:80 */ - unsigned int rsvd_4:7; /* must be zero */ + unsigned int rsvd_4:7; /* must be zero */ /* bits 88:82 */ - unsigned int sw_ack_flag:1;/* software acknowledge flag */ + unsigned int swack_flag:1; /* software acknowledge flag */ /* bit 89 */ - /* INTD trasactions at destination are to - wait for software acknowledge */ - unsigned int rsvd_5:6; /* must be zero */ + /* INTD trasactions at + destination are to wait for + software acknowledge */ + unsigned int rsvd_5:6; /* must be zero */ /* bits 95:90 */ - unsigned int rsvd_6:5; /* must be zero */ + unsigned int rsvd_6:5; /* must be zero */ /* bits 100:96 */ - unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */ + unsigned int int_both:1; /* if 1, interrupt both sockets + on the uvhub */ /* bit 101*/ - unsigned int fairness:3;/* usually zero */ + unsigned int fairness:3; /* usually zero */ /* bits 104:102 */ - unsigned int multilevel:1; /* multi-level multicast format */ + unsigned int multilevel:1; /* multi-level multicast + format */ /* bit 105 */ - /* 0 for TLB: endpoint multi-unicast messages */ - unsigned int chaining:1;/* next descriptor is part of this activation*/ + /* 0 for TLB: endpoint multi-unicast messages */ + unsigned int chaining:1; /* next descriptor is part of + this activation*/ /* bit 106 */ - unsigned int rsvd_7:21; /* must be zero */ + unsigned int rsvd_7:21; /* must be zero */ /* bits 127:107 */ }; -/* see msg_type: */ -#define MSG_NOOP 0 -#define MSG_REGULAR 1 -#define MSG_RETRY 2 - /* * The activation descriptor: * The format of the message to send, plus all accompanying control * Should be 64 bytes */ struct bau_desc { - struct bau_target_uvhubmask distribution; + struct bau_targ_hubmask distribution; /* * message template, consisting of header and payload: */ - struct bau_msg_header header; - struct bau_msg_payload payload; + struct bau_msg_header header; + struct bau_msg_payload payload; }; /* * -payload-- ---------header------ @@ -315,59 +338,51 @@ struct bau_desc { * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17 * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120) * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from - * sw_ack_vector and payload_2) + * swack_vec and payload_2) * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload * operation." */ -struct bau_payload_queue_entry { - unsigned long address; /* signifies a page or all TLB's - of the cpu */ +struct bau_pq_entry { + unsigned long address; /* signifies a page or all TLB's + of the cpu */ /* 64 bits, bytes 0-7 */ - - unsigned short sending_cpu; /* cpu that sent the message */ + unsigned short sending_cpu; /* cpu that sent the message */ /* 16 bits, bytes 8-9 */ - - unsigned short acknowledge_count; /* filled in by destination */ + unsigned short acknowledge_count; /* filled in by destination */ /* 16 bits, bytes 10-11 */ - /* these next 3 bytes come from bits 58-81 of the message header */ - unsigned short replied_to:1; /* sent as 0 by the source */ - unsigned short msg_type:3; /* software message type */ - unsigned short canceled:1; /* sent as 0 by the source */ - unsigned short unused1:3; /* not currently using */ + unsigned short replied_to:1; /* sent as 0 by the source */ + unsigned short msg_type:3; /* software message type */ + unsigned short canceled:1; /* sent as 0 by the source */ + unsigned short unused1:3; /* not currently using */ /* byte 12 */ - - unsigned char unused2a; /* not currently using */ + unsigned char unused2a; /* not currently using */ /* byte 13 */ - unsigned char unused2; /* not currently using */ + unsigned char unused2; /* not currently using */ /* byte 14 */ - - unsigned char sw_ack_vector; /* filled in by the hardware */ + unsigned char swack_vec; /* filled in by the hardware */ /* byte 15 (bits 127:120) */ - - unsigned short sequence; /* message sequence number */ + unsigned short sequence; /* message sequence number */ /* bytes 16-17 */ - unsigned char unused4[2]; /* not currently using bytes 18-19 */ + unsigned char unused4[2]; /* not currently using bytes 18-19 */ /* bytes 18-19 */ - - int number_of_cpus; /* filled in at destination */ + int number_of_cpus; /* filled in at destination */ /* 32 bits, bytes 20-23 (aligned) */ - - unsigned char unused5[8]; /* not using */ + unsigned char unused5[8]; /* not using */ /* bytes 24-31 */ }; struct msg_desc { - struct bau_payload_queue_entry *msg; - int msg_slot; - int sw_ack_slot; - struct bau_payload_queue_entry *va_queue_first; - struct bau_payload_queue_entry *va_queue_last; + struct bau_pq_entry *msg; + int msg_slot; + int swack_slot; + struct bau_pq_entry *queue_first; + struct bau_pq_entry *queue_last; }; struct reset_args { - int sender; + int sender; }; /* @@ -375,112 +390,226 @@ struct reset_args { */ struct ptc_stats { /* sender statistics */ - unsigned long s_giveup; /* number of fall backs to IPI-style flushes */ - unsigned long s_requestor; /* number of shootdown requests */ - unsigned long s_stimeout; /* source side timeouts */ - unsigned long s_dtimeout; /* destination side timeouts */ - unsigned long s_time; /* time spent in sending side */ - unsigned long s_retriesok; /* successful retries */ - unsigned long s_ntargcpu; /* total number of cpu's targeted */ - unsigned long s_ntargself; /* times the sending cpu was targeted */ - unsigned long s_ntarglocals; /* targets of cpus on the local blade */ - unsigned long s_ntargremotes; /* targets of cpus on remote blades */ - unsigned long s_ntarglocaluvhub; /* targets of the local hub */ - unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ - unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ - unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ - unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ - unsigned long s_ntarguvhub4; /* number of times target hubs >= 4 */ - unsigned long s_ntarguvhub2; /* number of times target hubs >= 2 */ - unsigned long s_ntarguvhub1; /* number of times target hubs == 1 */ - unsigned long s_resets_plug; /* ipi-style resets from plug state */ - unsigned long s_resets_timeout; /* ipi-style resets from timeouts */ - unsigned long s_busy; /* status stayed busy past s/w timer */ - unsigned long s_throttles; /* waits in throttle */ - unsigned long s_retry_messages; /* retry broadcasts */ - unsigned long s_bau_reenabled; /* for bau enable/disable */ - unsigned long s_bau_disabled; /* for bau enable/disable */ + unsigned long s_giveup; /* number of fall backs to + IPI-style flushes */ + unsigned long s_requestor; /* number of shootdown + requests */ + unsigned long s_stimeout; /* source side timeouts */ + unsigned long s_dtimeout; /* destination side timeouts */ + unsigned long s_time; /* time spent in sending side */ + unsigned long s_retriesok; /* successful retries */ + unsigned long s_ntargcpu; /* total number of cpu's + targeted */ + unsigned long s_ntargself; /* times the sending cpu was + targeted */ + unsigned long s_ntarglocals; /* targets of cpus on the local + blade */ + unsigned long s_ntargremotes; /* targets of cpus on remote + blades */ + unsigned long s_ntarglocaluvhub; /* targets of the local hub */ + unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ + unsigned long s_ntarguvhub; /* total number of uvhubs + targeted */ + unsigned long s_ntarguvhub16; /* number of times target + hubs >= 16*/ + unsigned long s_ntarguvhub8; /* number of times target + hubs >= 8 */ + unsigned long s_ntarguvhub4; /* number of times target + hubs >= 4 */ + unsigned long s_ntarguvhub2; /* number of times target + hubs >= 2 */ + unsigned long s_ntarguvhub1; /* number of times target + hubs == 1 */ + unsigned long s_resets_plug; /* ipi-style resets from plug + state */ + unsigned long s_resets_timeout; /* ipi-style resets from + timeouts */ + unsigned long s_busy; /* status stayed busy past + s/w timer */ + unsigned long s_throttles; /* waits in throttle */ + unsigned long s_retry_messages; /* retry broadcasts */ + unsigned long s_bau_reenabled; /* for bau enable/disable */ + unsigned long s_bau_disabled; /* for bau enable/disable */ /* destination statistics */ - unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ - unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ - unsigned long d_multmsg; /* interrupts with multiple messages */ - unsigned long d_nomsg; /* interrupts with no message */ - unsigned long d_time; /* time spent on destination side */ - unsigned long d_requestee; /* number of messages processed */ - unsigned long d_retries; /* number of retry messages processed */ - unsigned long d_canceled; /* number of messages canceled by retries */ - unsigned long d_nocanceled; /* retries that found nothing to cancel */ - unsigned long d_resets; /* number of ipi-style requests processed */ - unsigned long d_rcanceled; /* number of messages canceled by resets */ + unsigned long d_alltlb; /* times all tlb's on this + cpu were flushed */ + unsigned long d_onetlb; /* times just one tlb on this + cpu was flushed */ + unsigned long d_multmsg; /* interrupts with multiple + messages */ + unsigned long d_nomsg; /* interrupts with no message */ + unsigned long d_time; /* time spent on destination + side */ + unsigned long d_requestee; /* number of messages + processed */ + unsigned long d_retries; /* number of retry messages + processed */ + unsigned long d_canceled; /* number of messages canceled + by retries */ + unsigned long d_nocanceled; /* retries that found nothing + to cancel */ + unsigned long d_resets; /* number of ipi-style requests + processed */ + unsigned long d_rcanceled; /* number of messages canceled + by resets */ +}; + +struct tunables { + int *tunp; + int deflt; }; struct hub_and_pnode { - short uvhub; - short pnode; + short uvhub; + short pnode; +}; + +struct socket_desc { + short num_cpus; + short cpu_number[MAX_CPUS_PER_SOCKET]; +}; + +struct uvhub_desc { + unsigned short socket_mask; + short num_cpus; + short uvhub; + short pnode; + struct socket_desc socket[2]; }; + /* * one per-cpu; to locate the software tables */ struct bau_control { - struct bau_desc *descriptor_base; - struct bau_payload_queue_entry *va_queue_first; - struct bau_payload_queue_entry *va_queue_last; - struct bau_payload_queue_entry *bau_msg_head; - struct bau_control *uvhub_master; - struct bau_control *socket_master; - struct ptc_stats *statp; - unsigned long timeout_interval; - unsigned long set_bau_on_time; - atomic_t active_descriptor_count; - int plugged_tries; - int timeout_tries; - int ipi_attempts; - int conseccompletes; - int baudisabled; - int set_bau_off; - short cpu; - short osnode; - short uvhub_cpu; - short uvhub; - short cpus_in_socket; - short cpus_in_uvhub; - short partition_base_pnode; - unsigned short message_number; - unsigned short uvhub_quiesce; - short socket_acknowledge_count[DEST_Q_SIZE]; - cycles_t send_message; - spinlock_t uvhub_lock; - spinlock_t queue_lock; + struct bau_desc *descriptor_base; + struct bau_pq_entry *queue_first; + struct bau_pq_entry *queue_last; + struct bau_pq_entry *bau_msg_head; + struct bau_control *uvhub_master; + struct bau_control *socket_master; + struct ptc_stats *statp; + unsigned long timeout_interval; + unsigned long set_bau_on_time; + atomic_t active_descriptor_count; + int plugged_tries; + int timeout_tries; + int ipi_attempts; + int conseccompletes; + int baudisabled; + int set_bau_off; + short cpu; + short osnode; + short uvhub_cpu; + short uvhub; + short cpus_in_socket; + short cpus_in_uvhub; + short partition_base_pnode; + unsigned short message_number; + unsigned short uvhub_quiesce; + short socket_acknowledge_count[DEST_Q_SIZE]; + cycles_t send_message; + spinlock_t uvhub_lock; + spinlock_t queue_lock; /* tunables */ - int max_bau_concurrent; - int max_bau_concurrent_constant; - int plugged_delay; - int plugsb4reset; - int timeoutsb4reset; - int ipi_reset_limit; - int complete_threshold; - int congested_response_us; - int congested_reps; - int congested_period; - cycles_t period_time; - long period_requests; - struct hub_and_pnode *target_hub_and_pnode; + int max_concurr; + int max_concurr_const; + int plugged_delay; + int plugsb4reset; + int timeoutsb4reset; + int ipi_reset_limit; + int complete_threshold; + int cong_response_us; + int cong_reps; + int cong_period; + cycles_t period_time; + long period_requests; + struct hub_and_pnode *thp; }; -static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) +static unsigned long read_mmr_uv2_status(void) +{ + return read_lmmr(UV2H_LB_BAU_SB_ACTIVATION_STATUS_2); +} + +static void write_mmr_data_broadcast(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image); +} + +static void write_mmr_descriptor_base(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image); +} + +static void write_mmr_activation(unsigned long index) +{ + write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); +} + +static void write_gmmr_activation(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image); +} + +static void write_mmr_payload_first(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image); +} + +static void write_mmr_payload_tail(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image); +} + +static void write_mmr_payload_last(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image); +} + +static void write_mmr_misc_control(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); +} + +static unsigned long read_mmr_misc_control(int pnode) +{ + return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL); +} + +static void write_mmr_sw_ack(unsigned long mr) +{ + uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr); +} + +static unsigned long read_mmr_sw_ack(void) +{ + return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); +} + +static unsigned long read_gmmr_sw_ack(int pnode) +{ + return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); +} + +static void write_mmr_data_config(int pnode, unsigned long mr) +{ + uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr); +} + +static inline int bau_uvhub_isset(int uvhub, struct bau_targ_hubmask *dstp) { return constant_test_bit(uvhub, &dstp->bits[0]); } -static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) +static inline void bau_uvhub_set(int pnode, struct bau_targ_hubmask *dstp) { __set_bit(pnode, &dstp->bits[0]); } -static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, +static inline void bau_uvhubs_clear(struct bau_targ_hubmask *dstp, int nbits) { bitmap_zero(&dstp->bits[0], nbits); } -static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp) +static inline int bau_uvhub_weight(struct bau_targ_hubmask *dstp) { return bitmap_weight((unsigned long *)&dstp->bits[0], UV_DISTRIBUTION_SIZE); @@ -491,9 +620,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) bitmap_zero(&dstp->bits, nbits); } -#define cpubit_isset(cpu, bau_local_cpumask) \ - test_bit((cpu), (bau_local_cpumask).bits) - extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); @@ -501,7 +627,7 @@ struct atomic_short { short counter; }; -/** +/* * atomic_read_short - read a short atomic variable * @v: pointer of type atomic_short * @@ -512,14 +638,14 @@ static inline int atomic_read_short(const struct atomic_short *v) return v->counter; } -/** - * atomic_add_short_return - add and return a short int +/* + * atom_asr - add and return a short int * @i: short value to add * @v: pointer of type atomic_short * * Atomically adds @i to @v and returns @i + @v */ -static inline int atomic_add_short_return(short i, struct atomic_short *v) +static inline int atom_asr(short i, struct atomic_short *v) { short __i = i; asm volatile(LOCK_PREFIX "xaddw %0, %1" @@ -528,4 +654,26 @@ static inline int atomic_add_short_return(short i, struct atomic_short *v) return i + __i; } +/* + * conditionally add 1 to *v, unless *v is >= u + * return 0 if we cannot add 1 to *v because it is >= u + * return 1 if we can add 1 to *v because it is < u + * the add is atomic + * + * This is close to atomic_add_unless(), but this allows the 'u' value + * to be lowered below the current 'v'. atomic_add_unless can only stop + * on equal. + */ +static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) +{ + spin_lock(lock); + if (atomic_read(v) >= u) { + spin_unlock(lock); + return 0; + } + atomic_inc(v); + spin_unlock(lock); + return 1; +} + #endif /* _ASM_X86_UV_UV_BAU_H */ diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index a9856c09c42..68e467f69fe 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1,7 +1,7 @@ /* * SGI UltraViolet TLB flush routines. * - * (c) 2008-2010 Cliff Wickman , SGI. + * (c) 2008-2011 Cliff Wickman , SGI. * * This code is released under the GNU General Public License version 2 or * later. @@ -35,6 +35,7 @@ static int timeout_base_ns[] = { 5242880, 167772160 }; + static int timeout_us; static int nobau; static int baudisabled; @@ -42,20 +43,70 @@ static spinlock_t disable_lock; static cycles_t congested_cycles; /* tunables: */ -static int max_bau_concurrent = MAX_BAU_CONCURRENT; -static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT; -static int plugged_delay = PLUGGED_DELAY; -static int plugsb4reset = PLUGSB4RESET; -static int timeoutsb4reset = TIMEOUTSB4RESET; -static int ipi_reset_limit = IPI_RESET_LIMIT; -static int complete_threshold = COMPLETE_THRESHOLD; -static int congested_response_us = CONGESTED_RESPONSE_US; -static int congested_reps = CONGESTED_REPS; -static int congested_period = CONGESTED_PERIOD; +static int max_concurr = MAX_BAU_CONCURRENT; +static int max_concurr_const = MAX_BAU_CONCURRENT; +static int plugged_delay = PLUGGED_DELAY; +static int plugsb4reset = PLUGSB4RESET; +static int timeoutsb4reset = TIMEOUTSB4RESET; +static int ipi_reset_limit = IPI_RESET_LIMIT; +static int complete_threshold = COMPLETE_THRESHOLD; +static int congested_respns_us = CONGESTED_RESPONSE_US; +static int congested_reps = CONGESTED_REPS; +static int congested_period = CONGESTED_PERIOD; + +static struct tunables tunables[] = { + {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ + {&plugged_delay, PLUGGED_DELAY}, + {&plugsb4reset, PLUGSB4RESET}, + {&timeoutsb4reset, TIMEOUTSB4RESET}, + {&ipi_reset_limit, IPI_RESET_LIMIT}, + {&complete_threshold, COMPLETE_THRESHOLD}, + {&congested_respns_us, CONGESTED_RESPONSE_US}, + {&congested_reps, CONGESTED_REPS}, + {&congested_period, CONGESTED_PERIOD} +}; + static struct dentry *tunables_dir; static struct dentry *tunables_file; -static int __init setup_nobau(char *arg) +/* these correspond to the statistics printed by ptc_seq_show() */ +static char *stat_description[] = { + "sent: number of shootdown messages sent", + "stime: time spent sending messages", + "numuvhubs: number of hubs targeted with shootdown", + "numuvhubs16: number times 16 or more hubs targeted", + "numuvhubs8: number times 8 or more hubs targeted", + "numuvhubs4: number times 4 or more hubs targeted", + "numuvhubs2: number times 2 or more hubs targeted", + "numuvhubs1: number times 1 hub targeted", + "numcpus: number of cpus targeted with shootdown", + "dto: number of destination timeouts", + "retries: destination timeout retries sent", + "rok: : destination timeouts successfully retried", + "resetp: ipi-style resource resets for plugs", + "resett: ipi-style resource resets for timeouts", + "giveup: fall-backs to ipi-style shootdowns", + "sto: number of source timeouts", + "bz: number of stay-busy's", + "throt: number times spun in throttle", + "swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE", + "recv: shootdown messages received", + "rtime: time spent processing messages", + "all: shootdown all-tlb messages", + "one: shootdown one-tlb messages", + "mult: interrupts that found multiple messages", + "none: interrupts that found no messages", + "retry: number of retry messages processed", + "canc: number messages canceled by retries", + "nocan: number retries that found nothing to cancel", + "reset: number of ipi-style reset requests processed", + "rcan: number messages canceled by reset requests", + "disable: number times use of the BAU was disabled", + "enable: number times use of the BAU was re-enabled" +}; + +static int __init +setup_nobau(char *arg) { nobau = 1; return 0; @@ -63,7 +114,7 @@ static int __init setup_nobau(char *arg) early_param("nobau", setup_nobau); /* base pnode in this partition */ -static int uv_partition_base_pnode __read_mostly; +static int uv_base_pnode __read_mostly; /* position of pnode (which is nasid>>1): */ static int uv_nshift __read_mostly; static unsigned long uv_mmask __read_mostly; @@ -109,60 +160,52 @@ static int __init uvhub_to_first_apicid(int uvhub) * clear of the Timeout bit (as well) will free the resource. No reply will * be sent (the hardware will only do one reply per message). */ -static inline void uv_reply_to_message(struct msg_desc *mdp, - struct bau_control *bcp) +static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp) { unsigned long dw; - struct bau_payload_queue_entry *msg; + struct bau_pq_entry *msg; msg = mdp->msg; if (!msg->canceled) { - dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | - msg->sw_ack_vector; - uv_write_local_mmr( - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); + dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec; + write_mmr_sw_ack(dw); } msg->replied_to = 1; - msg->sw_ack_vector = 0; + msg->swack_vec = 0; } /* * Process the receipt of a RETRY message */ -static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, - struct bau_control *bcp) +static void bau_process_retry_msg(struct msg_desc *mdp, + struct bau_control *bcp) { int i; int cancel_count = 0; - int slot2; unsigned long msg_res; unsigned long mmr = 0; - struct bau_payload_queue_entry *msg; - struct bau_payload_queue_entry *msg2; - struct ptc_stats *stat; + struct bau_pq_entry *msg = mdp->msg; + struct bau_pq_entry *msg2; + struct ptc_stats *stat = bcp->statp; - msg = mdp->msg; - stat = bcp->statp; stat->d_retries++; /* * cancel any message from msg+1 to the retry itself */ for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { - if (msg2 > mdp->va_queue_last) - msg2 = mdp->va_queue_first; + if (msg2 > mdp->queue_last) + msg2 = mdp->queue_first; if (msg2 == msg) break; - /* same conditions for cancellation as uv_do_reset */ + /* same conditions for cancellation as do_reset */ if ((msg2->replied_to == 0) && (msg2->canceled == 0) && - (msg2->sw_ack_vector) && ((msg2->sw_ack_vector & - msg->sw_ack_vector) == 0) && + (msg2->swack_vec) && ((msg2->swack_vec & + msg->swack_vec) == 0) && (msg2->sending_cpu == msg->sending_cpu) && (msg2->msg_type != MSG_NOOP)) { - slot2 = msg2 - mdp->va_queue_first; - mmr = uv_read_local_mmr - (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); - msg_res = msg2->sw_ack_vector; + mmr = read_mmr_sw_ack(); + msg_res = msg2->swack_vec; /* * This is a message retry; clear the resources held * by the previous message only if they timed out. @@ -170,6 +213,7 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, * situation to report. */ if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { + unsigned long mr; /* * is the resource timed out? * make everyone ignore the cancelled message. @@ -177,10 +221,8 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, msg2->canceled = 1; stat->d_canceled++; cancel_count++; - uv_write_local_mmr( - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, - (msg_res << UV_SW_ACK_NPENDING) | - msg_res); + mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; + write_mmr_sw_ack(mr); } } } @@ -192,20 +234,19 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, * Do all the things a cpu should do for a TLB shootdown message. * Other cpu's may come here at the same time for this message. */ -static void uv_bau_process_message(struct msg_desc *mdp, - struct bau_control *bcp) +static void bau_process_message(struct msg_desc *mdp, + struct bau_control *bcp) { - int msg_ack_count; short socket_ack_count = 0; - struct ptc_stats *stat; - struct bau_payload_queue_entry *msg; + short *sp; + struct atomic_short *asp; + struct ptc_stats *stat = bcp->statp; + struct bau_pq_entry *msg = mdp->msg; struct bau_control *smaster = bcp->socket_master; /* * This must be a normal message, or retry of a normal message */ - msg = mdp->msg; - stat = bcp->statp; if (msg->address == TLB_FLUSH_ALL) { local_flush_tlb(); stat->d_alltlb++; @@ -222,30 +263,32 @@ static void uv_bau_process_message(struct msg_desc *mdp, * cpu number. */ if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) - uv_bau_process_retry_msg(mdp, bcp); + bau_process_retry_msg(mdp, bcp); /* - * This is a sw_ack message, so we have to reply to it. + * This is a swack message, so we have to reply to it. * Count each responding cpu on the socket. This avoids * pinging the count's cache line back and forth between * the sockets. */ - socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) - &smaster->socket_acknowledge_count[mdp->msg_slot]); + sp = &smaster->socket_acknowledge_count[mdp->msg_slot]; + asp = (struct atomic_short *)sp; + socket_ack_count = atom_asr(1, asp); if (socket_ack_count == bcp->cpus_in_socket) { + int msg_ack_count; /* * Both sockets dump their completed count total into * the message's count. */ smaster->socket_acknowledge_count[mdp->msg_slot] = 0; - msg_ack_count = atomic_add_short_return(socket_ack_count, - (struct atomic_short *)&msg->acknowledge_count); + asp = (struct atomic_short *)&msg->acknowledge_count; + msg_ack_count = atom_asr(socket_ack_count, asp); if (msg_ack_count == bcp->cpus_in_uvhub) { /* * All cpus in uvhub saw it; reply */ - uv_reply_to_message(mdp, bcp); + reply_to_message(mdp, bcp); } } @@ -268,62 +311,51 @@ static int uvhub_to_first_cpu(int uvhub) * Last resort when we get a large number of destination timeouts is * to clear resources held by a given cpu. * Do this with IPI so that all messages in the BAU message queue - * can be identified by their nonzero sw_ack_vector field. + * can be identified by their nonzero swack_vec field. * * This is entered for a single cpu on the uvhub. * The sender want's this uvhub to free a specific message's - * sw_ack resources. + * swack resources. */ -static void -uv_do_reset(void *ptr) +static void do_reset(void *ptr) { int i; - int slot; - int count = 0; - unsigned long mmr; - unsigned long msg_res; - struct bau_control *bcp; - struct reset_args *rap; - struct bau_payload_queue_entry *msg; - struct ptc_stats *stat; + struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id()); + struct reset_args *rap = (struct reset_args *)ptr; + struct bau_pq_entry *msg; + struct ptc_stats *stat = bcp->statp; - bcp = &per_cpu(bau_control, smp_processor_id()); - rap = (struct reset_args *)ptr; - stat = bcp->statp; stat->d_resets++; - /* * We're looking for the given sender, and - * will free its sw_ack resource. + * will free its swack resource. * If all cpu's finally responded after the timeout, its * message 'replied_to' was set. */ - for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { - /* uv_do_reset: same conditions for cancellation as - uv_bau_process_retry_msg() */ + for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { + unsigned long msg_res; + /* do_reset: same conditions for cancellation as + bau_process_retry_msg() */ if ((msg->replied_to == 0) && (msg->canceled == 0) && (msg->sending_cpu == rap->sender) && - (msg->sw_ack_vector) && + (msg->swack_vec) && (msg->msg_type != MSG_NOOP)) { + unsigned long mmr; + unsigned long mr; /* * make everyone else ignore this message */ msg->canceled = 1; - slot = msg - bcp->va_queue_first; - count++; /* * only reset the resource if it is still pending */ - mmr = uv_read_local_mmr - (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); - msg_res = msg->sw_ack_vector; + mmr = read_mmr_sw_ack(); + msg_res = msg->swack_vec; + mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; if (mmr & msg_res) { stat->d_rcanceled++; - uv_write_local_mmr( - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, - (msg_res << UV_SW_ACK_NPENDING) | - msg_res); + write_mmr_sw_ack(mr); } } } @@ -334,39 +366,38 @@ uv_do_reset(void *ptr) * Use IPI to get all target uvhubs to release resources held by * a given sending cpu number. */ -static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, - int sender) +static void reset_with_ipi(struct bau_targ_hubmask *distribution, int sender) { int uvhub; - int cpu; + int maskbits; cpumask_t mask; struct reset_args reset_args; reset_args.sender = sender; - cpus_clear(mask); /* find a single cpu for each uvhub in this distribution mask */ - for (uvhub = 0; - uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; - uvhub++) { + maskbits = sizeof(struct bau_targ_hubmask) * BITSPERBYTE; + for (uvhub = 0; uvhub < maskbits; uvhub++) { + int cpu; if (!bau_uvhub_isset(uvhub, distribution)) continue; /* find a cpu for this uvhub */ cpu = uvhub_to_first_cpu(uvhub); cpu_set(cpu, mask); } - /* IPI all cpus; Preemption is already disabled */ - smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); + + /* IPI all cpus; preemption is already disabled */ + smp_call_function_many(&mask, do_reset, (void *)&reset_args, 1); return; } -static inline unsigned long -cycles_2_us(unsigned long long cyc) +static inline unsigned long cycles_2_us(unsigned long long cyc) { unsigned long long ns; unsigned long us; - ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) - >> CYC2NS_SCALE_FACTOR; + int cpu = smp_processor_id(); + + ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR; us = ns / 1000; return us; } @@ -376,21 +407,27 @@ cycles_2_us(unsigned long long cyc) * leaves uvhub_quiesce set so that no new broadcasts are started by * bau_flush_send_and_wait() */ -static inline void -quiesce_local_uvhub(struct bau_control *hmaster) +static inline void quiesce_local_uvhub(struct bau_control *hmaster) { - atomic_add_short_return(1, (struct atomic_short *) - &hmaster->uvhub_quiesce); + atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce); } /* * mark this quiet-requestor as done */ -static inline void -end_uvhub_quiesce(struct bau_control *hmaster) +static inline void end_uvhub_quiesce(struct bau_control *hmaster) { - atomic_add_short_return(-1, (struct atomic_short *) - &hmaster->uvhub_quiesce); + atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce); +} + +static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift) +{ + unsigned long descriptor_status; + + descriptor_status = uv_read_local_mmr(mmr_offset); + descriptor_status >>= right_shift; + descriptor_status &= UV_ACT_STATUS_MASK; + return descriptor_status; } /* @@ -398,31 +435,28 @@ end_uvhub_quiesce(struct bau_control *hmaster) * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP */ static int uv1_wait_completion(struct bau_desc *bau_desc, - unsigned long mmr_offset, int right_shift, int this_cpu, - struct bau_control *bcp, struct bau_control *smaster, long try) + unsigned long mmr_offset, int right_shift, + struct bau_control *bcp, long try) { unsigned long descriptor_status; - cycles_t ttime; + cycles_t ttm; struct ptc_stats *stat = bcp->statp; + descriptor_status = uv1_read_status(mmr_offset, right_shift); /* spin on the status MMR, waiting for it to go idle */ - while ((descriptor_status = (((unsigned long) - uv_read_local_mmr(mmr_offset) >> - right_shift) & UV_ACT_STATUS_MASK)) != - DESC_STATUS_IDLE) { + while ((descriptor_status != DS_IDLE)) { /* * Our software ack messages may be blocked because * there are no swack resources available. As long * as none of them has timed out hardware will NACK * our message and its state will stay IDLE. */ - if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { + if (descriptor_status == DS_SOURCE_TIMEOUT) { stat->s_stimeout++; return FLUSH_GIVEUP; - } else if (descriptor_status == - DESC_STATUS_DESTINATION_TIMEOUT) { + } else if (descriptor_status == DS_DESTINATION_TIMEOUT) { stat->s_dtimeout++; - ttime = get_cycles(); + ttm = get_cycles(); /* * Our retries may be blocked by all destination @@ -430,8 +464,7 @@ static int uv1_wait_completion(struct bau_desc *bau_desc, * pending. In that case hardware returns the * ERROR that looks like a destination timeout. */ - if (cycles_2_us(ttime - bcp->send_message) < - timeout_us) { + if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { bcp->conseccompletes = 0; return FLUSH_RETRY_PLUGGED; } @@ -444,93 +477,106 @@ static int uv1_wait_completion(struct bau_desc *bau_desc, */ cpu_relax(); } + descriptor_status = uv1_read_status(mmr_offset, right_shift); } bcp->conseccompletes++; return FLUSH_COMPLETE; } -static int uv2_wait_completion(struct bau_desc *bau_desc, - unsigned long mmr_offset, int right_shift, int this_cpu, - struct bau_control *bcp, struct bau_control *smaster, long try) +/* + * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. + */ +static unsigned long uv2_read_status(unsigned long offset, int rshft, int cpu) { unsigned long descriptor_status; unsigned long descriptor_status2; - int cpu; - cycles_t ttime; + + descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); + descriptor_status2 = (read_mmr_uv2_status() >> cpu) & 0x1UL; + descriptor_status = (descriptor_status << 1) | descriptor_status2; + return descriptor_status; +} + +static int uv2_wait_completion(struct bau_desc *bau_desc, + unsigned long mmr_offset, int right_shift, + struct bau_control *bcp, long try) +{ + unsigned long descriptor_stat; + cycles_t ttm; + int cpu = bcp->uvhub_cpu; struct ptc_stats *stat = bcp->statp; - /* UV2 has an extra bit of status */ - cpu = bcp->uvhub_cpu; + descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); + /* spin on the status MMR, waiting for it to go idle */ - descriptor_status = (((unsigned long)(uv_read_local_mmr - (mmr_offset)) >> right_shift) & UV_ACT_STATUS_MASK); - descriptor_status2 = (((unsigned long)uv_read_local_mmr - (UV2H_LB_BAU_SB_ACTIVATION_STATUS_2) >> cpu) & 0x1UL); - descriptor_status = (descriptor_status << 1) | - descriptor_status2; - while (descriptor_status != UV2H_DESC_IDLE) { + while (descriptor_stat != UV2H_DESC_IDLE) { /* * Our software ack messages may be blocked because * there are no swack resources available. As long * as none of them has timed out hardware will NACK * our message and its state will stay IDLE. */ - if ((descriptor_status == UV2H_DESC_SOURCE_TIMEOUT) || - (descriptor_status == UV2H_DESC_DEST_STRONG_NACK) || - (descriptor_status == UV2H_DESC_DEST_PUT_ERR)) { + if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || + (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) || + (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { stat->s_stimeout++; return FLUSH_GIVEUP; - } else if (descriptor_status == UV2H_DESC_DEST_TIMEOUT) { + } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { stat->s_dtimeout++; - ttime = get_cycles(); - + ttm = get_cycles(); /* * Our retries may be blocked by all destination * swack resources being consumed, and a timeout * pending. In that case hardware returns the * ERROR that looks like a destination timeout. */ - if (cycles_2_us(ttime - bcp->send_message) < - timeout_us) { + if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { bcp->conseccompletes = 0; return FLUSH_RETRY_PLUGGED; } - bcp->conseccompletes = 0; return FLUSH_RETRY_TIMEOUT; } else { /* - * descriptor_status is still BUSY + * descriptor_stat is still BUSY */ cpu_relax(); } - descriptor_status = (((unsigned long)(uv_read_local_mmr - (mmr_offset)) >> right_shift) & - UV_ACT_STATUS_MASK); - descriptor_status2 = (((unsigned long)uv_read_local_mmr - (UV2H_LB_BAU_SB_ACTIVATION_STATUS_2) >> cpu) & - 0x1UL); - descriptor_status = (descriptor_status << 1) | - descriptor_status2; + descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); } bcp->conseccompletes++; return FLUSH_COMPLETE; } -static int uv_wait_completion(struct bau_desc *bau_desc, - unsigned long mmr_offset, int right_shift, int this_cpu, - struct bau_control *bcp, struct bau_control *smaster, long try) +/* + * There are 2 status registers; each and array[32] of 2 bits. Set up for + * which register to read and position in that register based on cpu in + * current hub. + */ +static int wait_completion(struct bau_desc *bau_desc, + struct bau_control *bcp, long try) { + int right_shift; + unsigned long mmr_offset; + int cpu = bcp->uvhub_cpu; + + if (cpu < UV_CPUS_PER_AS) { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; + right_shift = cpu * UV_ACT_STATUS_SIZE; + } else { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; + right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE); + } + if (is_uv1_hub()) return uv1_wait_completion(bau_desc, mmr_offset, right_shift, - this_cpu, bcp, smaster, try); + bcp, try); else return uv2_wait_completion(bau_desc, mmr_offset, right_shift, - this_cpu, bcp, smaster, try); + bcp, try); } -static inline cycles_t -sec_2_cycles(unsigned long sec) +static inline cycles_t sec_2_cycles(unsigned long sec) { unsigned long ns; cycles_t cyc; @@ -541,63 +587,50 @@ sec_2_cycles(unsigned long sec) } /* - * conditionally add 1 to *v, unless *v is >= u - * return 0 if we cannot add 1 to *v because it is >= u - * return 1 if we can add 1 to *v because it is < u - * the add is atomic - * - * This is close to atomic_add_unless(), but this allows the 'u' value - * to be lowered below the current 'v'. atomic_add_unless can only stop - * on equal. - */ -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) -{ - spin_lock(lock); - if (atomic_read(v) >= u) { - spin_unlock(lock); - return 0; - } - atomic_inc(v); - spin_unlock(lock); - return 1; -} - -/* - * Our retries are blocked by all destination swack resources being + * Our retries are blocked by all destination sw ack resources being * in use, and a timeout is pending. In that case hardware immediately * returns the ERROR that looks like a destination timeout. */ -static void -destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp, +static void destination_plugged(struct bau_desc *bau_desc, + struct bau_control *bcp, struct bau_control *hmaster, struct ptc_stats *stat) { udelay(bcp->plugged_delay); bcp->plugged_tries++; + if (bcp->plugged_tries >= bcp->plugsb4reset) { bcp->plugged_tries = 0; + quiesce_local_uvhub(hmaster); + spin_lock(&hmaster->queue_lock); - uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); + reset_with_ipi(&bau_desc->distribution, bcp->cpu); spin_unlock(&hmaster->queue_lock); + end_uvhub_quiesce(hmaster); + bcp->ipi_attempts++; stat->s_resets_plug++; } } -static void -destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, - struct bau_control *hmaster, struct ptc_stats *stat) +static void destination_timeout(struct bau_desc *bau_desc, + struct bau_control *bcp, struct bau_control *hmaster, + struct ptc_stats *stat) { - hmaster->max_bau_concurrent = 1; + hmaster->max_concurr = 1; bcp->timeout_tries++; if (bcp->timeout_tries >= bcp->timeoutsb4reset) { bcp->timeout_tries = 0; + quiesce_local_uvhub(hmaster); + spin_lock(&hmaster->queue_lock); - uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); + reset_with_ipi(&bau_desc->distribution, bcp->cpu); spin_unlock(&hmaster->queue_lock); + end_uvhub_quiesce(hmaster); + bcp->ipi_attempts++; stat->s_resets_timeout++; } @@ -607,34 +640,104 @@ destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, * Completions are taking a very long time due to a congested numalink * network. */ -static void -disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) +static void disable_for_congestion(struct bau_control *bcp, + struct ptc_stats *stat) { - int tcpu; - struct bau_control *tbcp; - /* let only one cpu do this disabling */ spin_lock(&disable_lock); + if (!baudisabled && bcp->period_requests && ((bcp->period_time / bcp->period_requests) > congested_cycles)) { + int tcpu; + struct bau_control *tbcp; /* it becomes this cpu's job to turn on the use of the BAU again */ baudisabled = 1; bcp->set_bau_off = 1; - bcp->set_bau_on_time = get_cycles() + - sec_2_cycles(bcp->congested_period); + bcp->set_bau_on_time = get_cycles(); + bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period); stat->s_bau_disabled++; for_each_present_cpu(tcpu) { tbcp = &per_cpu(bau_control, tcpu); - tbcp->baudisabled = 1; + tbcp->baudisabled = 1; } } + spin_unlock(&disable_lock); } -/** - * uv_flush_send_and_wait - * +static void count_max_concurr(int stat, struct bau_control *bcp, + struct bau_control *hmaster) +{ + bcp->plugged_tries = 0; + bcp->timeout_tries = 0; + if (stat != FLUSH_COMPLETE) + return; + if (bcp->conseccompletes <= bcp->complete_threshold) + return; + if (hmaster->max_concurr >= hmaster->max_concurr_const) + return; + hmaster->max_concurr++; +} + +static void record_send_stats(cycles_t time1, cycles_t time2, + struct bau_control *bcp, struct ptc_stats *stat, + int completion_status, int try) +{ + cycles_t elapsed; + + if (time2 > time1) { + elapsed = time2 - time1; + stat->s_time += elapsed; + + if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { + bcp->period_requests++; + bcp->period_time += elapsed; + if ((elapsed > congested_cycles) && + (bcp->period_requests > bcp->cong_reps)) + disable_for_congestion(bcp, stat); + } + } else + stat->s_requestor--; + + if (completion_status == FLUSH_COMPLETE && try > 1) + stat->s_retriesok++; + else if (completion_status == FLUSH_GIVEUP) + stat->s_giveup++; +} + +/* + * Because of a uv1 hardware bug only a limited number of concurrent + * requests can be made. + */ +static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) +{ + spinlock_t *lock = &hmaster->uvhub_lock; + atomic_t *v; + + v = &hmaster->active_descriptor_count; + if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) { + stat->s_throttles++; + do { + cpu_relax(); + } while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)); + } +} + +/* + * Handle the completion status of a message send. + */ +static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, + struct bau_control *bcp, struct bau_control *hmaster, + struct ptc_stats *stat) +{ + if (completion_status == FLUSH_RETRY_PLUGGED) + destination_plugged(bau_desc, bcp, hmaster, stat); + else if (completion_status == FLUSH_RETRY_TIMEOUT) + destination_timeout(bau_desc, bcp, hmaster, stat); +} + +/* * Send a broadcast and wait for it to complete. * * The flush_mask contains the cpus the broadcast is to be sent to including @@ -645,45 +748,23 @@ disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) * returned to the kernel. */ int uv_flush_send_and_wait(struct bau_desc *bau_desc, - struct cpumask *flush_mask, struct bau_control *bcp) + struct cpumask *flush_mask, struct bau_control *bcp) { - int right_shift; - int completion_status = 0; int seq_number = 0; + int completion_stat = 0; long try = 0; - int cpu = bcp->uvhub_cpu; - int this_cpu = bcp->cpu; - unsigned long mmr_offset; unsigned long index; cycles_t time1; cycles_t time2; - cycles_t elapsed; struct ptc_stats *stat = bcp->statp; - struct bau_control *smaster = bcp->socket_master; struct bau_control *hmaster = bcp->uvhub_master; - if (is_uv1_hub() && - !atomic_inc_unless_ge(&hmaster->uvhub_lock, - &hmaster->active_descriptor_count, - hmaster->max_bau_concurrent)) { - stat->s_throttles++; - do { - cpu_relax(); - } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, - &hmaster->active_descriptor_count, - hmaster->max_bau_concurrent)); - } + if (is_uv1_hub()) + uv1_throttle(hmaster, stat); + while (hmaster->uvhub_quiesce) cpu_relax(); - if (cpu < UV_CPUS_PER_ACT_STATUS) { - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; - right_shift = cpu * UV_ACT_STATUS_SIZE; - } else { - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; - right_shift = - ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); - } time1 = get_cycles(); do { if (try == 0) { @@ -693,64 +774,134 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc, bau_desc->header.msg_type = MSG_RETRY; stat->s_retry_messages++; } + bau_desc->header.sequence = seq_number; - index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | - bcp->uvhub_cpu; + index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; bcp->send_message = get_cycles(); - uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); + + write_mmr_activation(index); + try++; - completion_status = uv_wait_completion(bau_desc, mmr_offset, - right_shift, this_cpu, bcp, smaster, try); + completion_stat = wait_completion(bau_desc, bcp, try); + + handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); - if (completion_status == FLUSH_RETRY_PLUGGED) { - destination_plugged(bau_desc, bcp, hmaster, stat); - } else if (completion_status == FLUSH_RETRY_TIMEOUT) { - destination_timeout(bau_desc, bcp, hmaster, stat); - } if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { bcp->ipi_attempts = 0; - completion_status = FLUSH_GIVEUP; + completion_stat = FLUSH_GIVEUP; break; } cpu_relax(); - } while ((completion_status == FLUSH_RETRY_PLUGGED) || - (completion_status == FLUSH_RETRY_TIMEOUT)); + } while ((completion_stat == FLUSH_RETRY_PLUGGED) || + (completion_stat == FLUSH_RETRY_TIMEOUT)); + time2 = get_cycles(); - bcp->plugged_tries = 0; - bcp->timeout_tries = 0; - if ((completion_status == FLUSH_COMPLETE) && - (bcp->conseccompletes > bcp->complete_threshold) && - (hmaster->max_bau_concurrent < - hmaster->max_bau_concurrent_constant)) - hmaster->max_bau_concurrent++; + + count_max_concurr(completion_stat, bcp, hmaster); + while (hmaster->uvhub_quiesce) cpu_relax(); + atomic_dec(&hmaster->active_descriptor_count); - if (time2 > time1) { - elapsed = time2 - time1; - stat->s_time += elapsed; - if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { - bcp->period_requests++; - bcp->period_time += elapsed; - if ((elapsed > congested_cycles) && - (bcp->period_requests > bcp->congested_reps)) { - disable_for_congestion(bcp, stat); + + record_send_stats(time1, time2, bcp, stat, completion_stat, try); + + if (completion_stat == FLUSH_GIVEUP) + return 1; + return 0; +} + +/* + * The BAU is disabled. When the disabled time period has expired, the cpu + * that disabled it must re-enable it. + * Return 0 if it is re-enabled for all cpus. + */ +static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) +{ + int tcpu; + struct bau_control *tbcp; + + if (bcp->set_bau_off) { + if (get_cycles() >= bcp->set_bau_on_time) { + stat->s_bau_reenabled++; + baudisabled = 0; + for_each_present_cpu(tcpu) { + tbcp = &per_cpu(bau_control, tcpu); + tbcp->baudisabled = 0; + tbcp->period_requests = 0; + tbcp->period_time = 0; } + return 0; } + } + return -1; +} + +static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs, + int remotes, struct bau_desc *bau_desc) +{ + stat->s_requestor++; + stat->s_ntargcpu += remotes + locals; + stat->s_ntargremotes += remotes; + stat->s_ntarglocals += locals; + + /* uvhub statistics */ + hubs = bau_uvhub_weight(&bau_desc->distribution); + if (locals) { + stat->s_ntarglocaluvhub++; + stat->s_ntargremoteuvhub += (hubs - 1); } else - stat->s_requestor--; - if (completion_status == FLUSH_COMPLETE && try > 1) - stat->s_retriesok++; - else if (completion_status == FLUSH_GIVEUP) { - stat->s_giveup++; - return 1; + stat->s_ntargremoteuvhub += hubs; + + stat->s_ntarguvhub += hubs; + + if (hubs >= 16) + stat->s_ntarguvhub16++; + else if (hubs >= 8) + stat->s_ntarguvhub8++; + else if (hubs >= 4) + stat->s_ntarguvhub4++; + else if (hubs >= 2) + stat->s_ntarguvhub2++; + else + stat->s_ntarguvhub1++; +} + +/* + * Translate a cpu mask to the uvhub distribution mask in the BAU + * activation descriptor. + */ +static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp, + struct bau_desc *bau_desc, int *localsp, int *remotesp) +{ + int cpu; + int pnode; + int cnt = 0; + struct hub_and_pnode *hpp; + + for_each_cpu(cpu, flush_mask) { + /* + * The distribution vector is a bit map of pnodes, relative + * to the partition base pnode (and the partition base nasid + * in the header). + * Translate cpu to pnode and hub using a local memory array. + */ + hpp = &bcp->socket_master->thp[cpu]; + pnode = hpp->pnode - bcp->partition_base_pnode; + bau_uvhub_set(pnode, &bau_desc->distribution); + cnt++; + if (hpp->uvhub == bcp->uvhub) + (*localsp)++; + else + (*remotesp)++; } + if (!cnt) + return 1; return 0; } -/** - * uv_flush_tlb_others - globally purge translation cache of a virtual - * address or all TLB's +/* + * globally purge translation cache of a virtual address or all TLB's * @cpumask: mask of all cpu's in which the address is to be removed * @mm: mm_struct containing virtual address range * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) @@ -774,20 +925,16 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc, * done. The returned pointer is valid till preemption is re-enabled. */ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long va, unsigned int cpu) + struct mm_struct *mm, unsigned long va, + unsigned int cpu) { int locals = 0; int remotes = 0; int hubs = 0; - int tcpu; - int tpnode; struct bau_desc *bau_desc; struct cpumask *flush_mask; struct ptc_stats *stat; struct bau_control *bcp; - struct bau_control *tbcp; - struct hub_and_pnode *hpp; /* kernel was booted 'nobau' */ if (nobau) @@ -798,20 +945,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, /* bau was disabled due to slow response */ if (bcp->baudisabled) { - /* the cpu that disabled it must re-enable it */ - if (bcp->set_bau_off) { - if (get_cycles() >= bcp->set_bau_on_time) { - stat->s_bau_reenabled++; - baudisabled = 0; - for_each_present_cpu(tcpu) { - tbcp = &per_cpu(bau_control, tcpu); - tbcp->baudisabled = 0; - tbcp->period_requests = 0; - tbcp->period_time = 0; - } - } - } - return cpumask; + if (check_enable(bcp, stat)) + return cpumask; } /* @@ -822,59 +957,20 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); /* don't actually do a shootdown of the local cpu */ cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); + if (cpu_isset(cpu, *cpumask)) stat->s_ntargself++; bau_desc = bcp->descriptor_base; - bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; + bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu; bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); - - for_each_cpu(tcpu, flush_mask) { - /* - * The distribution vector is a bit map of pnodes, relative - * to the partition base pnode (and the partition base nasid - * in the header). - * Translate cpu to pnode and hub using an array stored - * in local memory. - */ - hpp = &bcp->socket_master->target_hub_and_pnode[tcpu]; - tpnode = hpp->pnode - bcp->partition_base_pnode; - bau_uvhub_set(tpnode, &bau_desc->distribution); - if (hpp->uvhub == bcp->uvhub) - locals++; - else - remotes++; - } - if ((locals + remotes) == 0) + if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) return NULL; - stat->s_requestor++; - stat->s_ntargcpu += remotes + locals; - stat->s_ntargremotes += remotes; - stat->s_ntarglocals += locals; - remotes = bau_uvhub_weight(&bau_desc->distribution); - /* uvhub statistics */ - hubs = bau_uvhub_weight(&bau_desc->distribution); - if (locals) { - stat->s_ntarglocaluvhub++; - stat->s_ntargremoteuvhub += (hubs - 1); - } else - stat->s_ntargremoteuvhub += hubs; - stat->s_ntarguvhub += hubs; - if (hubs >= 16) - stat->s_ntarguvhub16++; - else if (hubs >= 8) - stat->s_ntarguvhub8++; - else if (hubs >= 4) - stat->s_ntarguvhub4++; - else if (hubs >= 2) - stat->s_ntarguvhub2++; - else - stat->s_ntarguvhub1++; + record_send_statistics(stat, locals, hubs, remotes, bau_desc); bau_desc->payload.address = va; bau_desc->payload.sending_cpu = cpu; - /* * uv_flush_send_and_wait returns 0 if all cpu's were messaged, * or 1 if it gave up and the original cpumask should be returned. @@ -903,26 +999,31 @@ void uv_bau_message_interrupt(struct pt_regs *regs) { int count = 0; cycles_t time_start; - struct bau_payload_queue_entry *msg; + struct bau_pq_entry *msg; struct bau_control *bcp; struct ptc_stats *stat; struct msg_desc msgdesc; time_start = get_cycles(); + bcp = &per_cpu(bau_control, smp_processor_id()); stat = bcp->statp; - msgdesc.va_queue_first = bcp->va_queue_first; - msgdesc.va_queue_last = bcp->va_queue_last; + + msgdesc.queue_first = bcp->queue_first; + msgdesc.queue_last = bcp->queue_last; + msg = bcp->bau_msg_head; - while (msg->sw_ack_vector) { + while (msg->swack_vec) { count++; - msgdesc.msg_slot = msg - msgdesc.va_queue_first; - msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; + + msgdesc.msg_slot = msg - msgdesc.queue_first; + msgdesc.swack_slot = ffs(msg->swack_vec) - 1; msgdesc.msg = msg; - uv_bau_process_message(&msgdesc, bcp); + bau_process_message(&msgdesc, bcp); + msg++; - if (msg > msgdesc.va_queue_last) - msg = msgdesc.va_queue_first; + if (msg > msgdesc.queue_last) + msg = msgdesc.queue_first; bcp->bau_msg_head = msg; } stat->d_time += (get_cycles() - time_start); @@ -930,18 +1031,17 @@ void uv_bau_message_interrupt(struct pt_regs *regs) stat->d_nomsg++; else if (count > 1) stat->d_multmsg++; + ack_APIC_irq(); } /* - * uv_enable_timeouts - * - * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have + * Each target uvhub (i.e. a uvhub that has cpu's) needs to have * shootdown message timeouts enabled. The timeout does not cause * an interrupt, but causes an error message to be returned to * the sender. */ -static void __init uv_enable_timeouts(void) +static void __init enable_timeouts(void) { int uvhub; int nuvhubs; @@ -955,52 +1055,44 @@ static void __init uv_enable_timeouts(void) continue; pnode = uv_blade_to_pnode(uvhub); - mmr_image = - uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); + mmr_image = read_mmr_misc_control(pnode); /* * Set the timeout period and then lock it in, in three * steps; captures and locks in the period. * * To program the period, the SOFT_ACK_MODE must be off. */ - mmr_image &= ~((unsigned long)1 << - UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); - uv_write_global_mmr64 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); + mmr_image &= ~(1L << SOFTACK_MSHIFT); + write_mmr_misc_control(pnode, mmr_image); /* * Set the 4-bit period. */ - mmr_image &= ~((unsigned long)0xf << - UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); - mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << - UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); - uv_write_global_mmr64 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); + mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT); + mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT); + write_mmr_misc_control(pnode, mmr_image); /* * UV1: * Subsequent reversals of the timebase bit (3) cause an * immediate timeout of one or all INTD resources as * indicated in bits 2:0 (7 causes all of them to timeout). */ - mmr_image |= ((unsigned long)1 << - UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); + mmr_image |= (1L << SOFTACK_MSHIFT); if (is_uv2_hub()) { - mmr_image |= ((unsigned long)1 << UV2_LEG_SHFT); - mmr_image |= ((unsigned long)1 << UV2_EXT_SHFT); + mmr_image |= (1L << UV2_LEG_SHFT); + mmr_image |= (1L << UV2_EXT_SHFT); } - uv_write_global_mmr64 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); + write_mmr_misc_control(pnode, mmr_image); } } -static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) +static void *ptc_seq_start(struct seq_file *file, loff_t *offset) { if (*offset < num_possible_cpus()) return offset; return NULL; } -static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) +static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) { (*offset)++; if (*offset < num_possible_cpus()) @@ -1008,12 +1100,11 @@ static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) return NULL; } -static void uv_ptc_seq_stop(struct seq_file *file, void *data) +static void ptc_seq_stop(struct seq_file *file, void *data) { } -static inline unsigned long long -microsec_2_cycles(unsigned long microsec) +static inline unsigned long long usec_2_cycles(unsigned long microsec) { unsigned long ns; unsigned long long cyc; @@ -1024,29 +1115,27 @@ microsec_2_cycles(unsigned long microsec) } /* - * Display the statistics thru /proc. + * Display the statistics thru /proc/sgi_uv/ptc_statistics * 'data' points to the cpu number + * Note: see the descriptions in stat_description[]. */ -static int uv_ptc_seq_show(struct seq_file *file, void *data) +static int ptc_seq_show(struct seq_file *file, void *data) { struct ptc_stats *stat; int cpu; cpu = *(loff_t *)data; - if (!cpu) { seq_printf(file, "# cpu sent stime self locals remotes ncpus localhub "); seq_printf(file, "remotehub numuvhubs numuvhubs16 numuvhubs8 "); seq_printf(file, - "numuvhubs4 numuvhubs2 numuvhubs1 dto "); + "numuvhubs4 numuvhubs2 numuvhubs1 dto retries rok "); seq_printf(file, - "retries rok resetp resett giveup sto bz throt "); + "resetp resett giveup sto bz throt swack recv rtime "); seq_printf(file, - "sw_ack recv rtime all "); - seq_printf(file, - "one mult none retry canc nocan reset rcan "); + "all one mult none retry canc nocan reset rcan "); seq_printf(file, "disable enable\n"); } @@ -1073,8 +1162,7 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) /* destination side statistics */ seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", - uv_read_global_mmr64(uv_cpu_to_pnode(cpu), - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), + read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), stat->d_requestee, cycles_2_us(stat->d_time), stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, stat->d_nomsg, stat->d_retries, stat->d_canceled, @@ -1083,7 +1171,6 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) seq_printf(file, "%ld %ld\n", stat->s_bau_disabled, stat->s_bau_reenabled); } - return 0; } @@ -1091,18 +1178,18 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) * Display the tunables thru debugfs */ static ssize_t tunables_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { char *buf; int ret; buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", - "max_bau_concurrent plugged_delay plugsb4reset", + "max_concur plugged_delay plugsb4reset", "timeoutsb4reset ipi_reset_limit complete_threshold", "congested_response_us congested_reps congested_period", - max_bau_concurrent, plugged_delay, plugsb4reset, + max_concurr, plugged_delay, plugsb4reset, timeoutsb4reset, ipi_reset_limit, complete_threshold, - congested_response_us, congested_reps, congested_period); + congested_respns_us, congested_reps, congested_period); if (!buf) return -ENOMEM; @@ -1113,13 +1200,16 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf, } /* - * -1: resetf the statistics + * handle a write to /proc/sgi_uv/ptc_statistics + * -1: reset the statistics * 0: display meaning of the statistics */ -static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, - size_t count, loff_t *data) +static ssize_t ptc_proc_write(struct file *file, const char __user *user, + size_t count, loff_t *data) { int cpu; + int i; + int elements; long input_arg; char optstr[64]; struct ptc_stats *stat; @@ -1129,79 +1219,18 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, if (copy_from_user(optstr, user, count)) return -EFAULT; optstr[count - 1] = '\0'; + if (strict_strtol(optstr, 10, &input_arg) < 0) { printk(KERN_DEBUG "%s is invalid\n", optstr); return -EINVAL; } if (input_arg == 0) { + elements = sizeof(stat_description)/sizeof(*stat_description); printk(KERN_DEBUG "# cpu: cpu number\n"); printk(KERN_DEBUG "Sender statistics:\n"); - printk(KERN_DEBUG - "sent: number of shootdown messages sent\n"); - printk(KERN_DEBUG - "stime: time spent sending messages\n"); - printk(KERN_DEBUG - "numuvhubs: number of hubs targeted with shootdown\n"); - printk(KERN_DEBUG - "numuvhubs16: number times 16 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs8: number times 8 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs4: number times 4 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs2: number times 2 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs1: number times 1 hub targeted\n"); - printk(KERN_DEBUG - "numcpus: number of cpus targeted with shootdown\n"); - printk(KERN_DEBUG - "dto: number of destination timeouts\n"); - printk(KERN_DEBUG - "retries: destination timeout retries sent\n"); - printk(KERN_DEBUG - "rok: : destination timeouts successfully retried\n"); - printk(KERN_DEBUG - "resetp: ipi-style resource resets for plugs\n"); - printk(KERN_DEBUG - "resett: ipi-style resource resets for timeouts\n"); - printk(KERN_DEBUG - "giveup: fall-backs to ipi-style shootdowns\n"); - printk(KERN_DEBUG - "sto: number of source timeouts\n"); - printk(KERN_DEBUG - "bz: number of stay-busy's\n"); - printk(KERN_DEBUG - "throt: number times spun in throttle\n"); - printk(KERN_DEBUG "Destination side statistics:\n"); - printk(KERN_DEBUG - "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); - printk(KERN_DEBUG - "recv: shootdown messages received\n"); - printk(KERN_DEBUG - "rtime: time spent processing messages\n"); - printk(KERN_DEBUG - "all: shootdown all-tlb messages\n"); - printk(KERN_DEBUG - "one: shootdown one-tlb messages\n"); - printk(KERN_DEBUG - "mult: interrupts that found multiple messages\n"); - printk(KERN_DEBUG - "none: interrupts that found no messages\n"); - printk(KERN_DEBUG - "retry: number of retry messages processed\n"); - printk(KERN_DEBUG - "canc: number messages canceled by retries\n"); - printk(KERN_DEBUG - "nocan: number retries that found nothing to cancel\n"); - printk(KERN_DEBUG - "reset: number of ipi-style reset requests processed\n"); - printk(KERN_DEBUG - "rcan: number messages canceled by reset requests\n"); - printk(KERN_DEBUG - "disable: number times use of the BAU was disabled\n"); - printk(KERN_DEBUG - "enable: number times use of the BAU was re-enabled\n"); + for (i = 0; i < elements; i++) + printk(KERN_DEBUG "%s\n", stat_description[i]); } else if (input_arg == -1) { for_each_present_cpu(cpu) { stat = &per_cpu(ptcstats, cpu); @@ -1228,27 +1257,18 @@ static int local_atoi(const char *name) } /* - * set the tunables - * 0 values reset them to defaults + * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables. + * Zero values reset them to defaults. */ -static ssize_t tunables_write(struct file *file, const char __user *user, - size_t count, loff_t *data) +static int parse_tunables_write(struct bau_control *bcp, char *instr, + int count) { - int cpu; - int cnt = 0; - int val; char *p; char *q; - char instr[64]; - struct bau_control *bcp; - - if (count == 0 || count > sizeof(instr)-1) - return -EINVAL; - if (copy_from_user(instr, user, count)) - return -EFAULT; + int cnt = 0; + int val; + int e = sizeof(tunables) / sizeof(*tunables); - instr[count] = '\0'; - /* count the fields */ p = instr + strspn(instr, WHITESPACE); q = p; for (; *p; p = q + strspn(q, WHITESPACE)) { @@ -1257,8 +1277,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user, if (q == p) break; } - if (cnt != 9) { - printk(KERN_INFO "bau tunable error: should be 9 numbers\n"); + if (cnt != e) { + printk(KERN_INFO "bau tunable error: should be %d values\n", e); return -EINVAL; } @@ -1270,97 +1290,80 @@ static ssize_t tunables_write(struct file *file, const char __user *user, switch (cnt) { case 0: if (val == 0) { - max_bau_concurrent = MAX_BAU_CONCURRENT; - max_bau_concurrent_constant = - MAX_BAU_CONCURRENT; + max_concurr = MAX_BAU_CONCURRENT; + max_concurr_const = MAX_BAU_CONCURRENT; continue; } - bcp = &per_cpu(bau_control, smp_processor_id()); if (val < 1 || val > bcp->cpus_in_uvhub) { printk(KERN_DEBUG "Error: BAU max concurrent %d is invalid\n", val); return -EINVAL; } - max_bau_concurrent = val; - max_bau_concurrent_constant = val; - continue; - case 1: - if (val == 0) - plugged_delay = PLUGGED_DELAY; - else - plugged_delay = val; + max_concurr = val; + max_concurr_const = val; continue; - case 2: - if (val == 0) - plugsb4reset = PLUGSB4RESET; - else - plugsb4reset = val; - continue; - case 3: - if (val == 0) - timeoutsb4reset = TIMEOUTSB4RESET; - else - timeoutsb4reset = val; - continue; - case 4: - if (val == 0) - ipi_reset_limit = IPI_RESET_LIMIT; - else - ipi_reset_limit = val; - continue; - case 5: - if (val == 0) - complete_threshold = COMPLETE_THRESHOLD; - else - complete_threshold = val; - continue; - case 6: - if (val == 0) - congested_response_us = CONGESTED_RESPONSE_US; - else - congested_response_us = val; - continue; - case 7: - if (val == 0) - congested_reps = CONGESTED_REPS; - else - congested_reps = val; - continue; - case 8: + default: if (val == 0) - congested_period = CONGESTED_PERIOD; + *tunables[cnt].tunp = tunables[cnt].deflt; else - congested_period = val; + *tunables[cnt].tunp = val; continue; } if (q == p) break; } + return 0; +} + +/* + * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables) + */ +static ssize_t tunables_write(struct file *file, const char __user *user, + size_t count, loff_t *data) +{ + int cpu; + int ret; + char instr[100]; + struct bau_control *bcp; + + if (count == 0 || count > sizeof(instr)-1) + return -EINVAL; + if (copy_from_user(instr, user, count)) + return -EFAULT; + + instr[count] = '\0'; + + bcp = &per_cpu(bau_control, smp_processor_id()); + + ret = parse_tunables_write(bcp, instr, count); + if (ret) + return ret; + for_each_present_cpu(cpu) { bcp = &per_cpu(bau_control, cpu); - bcp->max_bau_concurrent = max_bau_concurrent; - bcp->max_bau_concurrent_constant = max_bau_concurrent; - bcp->plugged_delay = plugged_delay; - bcp->plugsb4reset = plugsb4reset; - bcp->timeoutsb4reset = timeoutsb4reset; - bcp->ipi_reset_limit = ipi_reset_limit; - bcp->complete_threshold = complete_threshold; - bcp->congested_response_us = congested_response_us; - bcp->congested_reps = congested_reps; - bcp->congested_period = congested_period; + bcp->max_concurr = max_concurr; + bcp->max_concurr_const = max_concurr; + bcp->plugged_delay = plugged_delay; + bcp->plugsb4reset = plugsb4reset; + bcp->timeoutsb4reset = timeoutsb4reset; + bcp->ipi_reset_limit = ipi_reset_limit; + bcp->complete_threshold = complete_threshold; + bcp->cong_response_us = congested_respns_us; + bcp->cong_reps = congested_reps; + bcp->cong_period = congested_period; } return count; } static const struct seq_operations uv_ptc_seq_ops = { - .start = uv_ptc_seq_start, - .next = uv_ptc_seq_next, - .stop = uv_ptc_seq_stop, - .show = uv_ptc_seq_show + .start = ptc_seq_start, + .next = ptc_seq_next, + .stop = ptc_seq_stop, + .show = ptc_seq_show }; -static int uv_ptc_proc_open(struct inode *inode, struct file *file) +static int ptc_proc_open(struct inode *inode, struct file *file) { return seq_open(file, &uv_ptc_seq_ops); } @@ -1371,9 +1374,9 @@ static int tunables_open(struct inode *inode, struct file *file) } static const struct file_operations proc_uv_ptc_operations = { - .open = uv_ptc_proc_open, + .open = ptc_proc_open, .read = seq_read, - .write = uv_ptc_proc_write, + .write = ptc_proc_write, .llseek = seq_lseek, .release = seq_release, }; @@ -1407,7 +1410,7 @@ static int __init uv_ptc_init(void) return -EINVAL; } tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, - tunables_dir, NULL, &tunables_fops); + tunables_dir, NULL, &tunables_fops); if (!tunables_file) { printk(KERN_ERR "unable to create debugfs file %s\n", UV_BAU_TUNABLES_FILE); @@ -1419,24 +1422,24 @@ static int __init uv_ptc_init(void) /* * Initialize the sending side's sending buffers. */ -static void -uv_activation_descriptor_init(int node, int pnode, int base_pnode) +static void activation_descriptor_init(int node, int pnode, int base_pnode) { int i; int cpu; unsigned long pa; unsigned long m; unsigned long n; + size_t dsize; struct bau_desc *bau_desc; struct bau_desc *bd2; struct bau_control *bcp; /* - * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) - * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE) + * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC) + * per cpu; and one per cpu on the uvhub (ADP_SZ) */ - bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE - * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); + dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC; + bau_desc = kmalloc_node(dsize, GFP_KERNEL, node); BUG_ON(!bau_desc); pa = uv_gpa(bau_desc); /* need the real nasid*/ @@ -1444,27 +1447,25 @@ uv_activation_descriptor_init(int node, int pnode, int base_pnode) m = pa & uv_mmask; /* the 14-bit pnode */ - uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, - (n << UV_DESC_BASE_PNODE_SHIFT | m)); + write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); /* - * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each + * Initializing all 8 (ITEMS_PER_DESC) descriptors for each * cpu even though we only use the first one; one descriptor can * describe a broadcast to 256 uv hubs. */ - for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); - i++, bd2++) { + for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) { memset(bd2, 0, sizeof(struct bau_desc)); - bd2->header.sw_ack_flag = 1; + bd2->header.swack_flag = 1; /* * The base_dest_nasid set in the message header is the nasid * of the first uvhub in the partition. The bit map will * indicate destination pnode numbers relative to that base. * They may not be consecutive if nasid striding is being used. */ - bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); - bd2->header.dest_subnodeid = UV_LB_SUBNODEID; - bd2->header.command = UV_NET_ENDPOINT_INTD; - bd2->header.int_both = 1; + bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); + bd2->header.dest_subnodeid = UV_LB_SUBNODEID; + bd2->header.command = UV_NET_ENDPOINT_INTD; + bd2->header.int_both = 1; /* * all others need to be set to zero: * fairness chaining multilevel count replied_to @@ -1484,57 +1485,55 @@ uv_activation_descriptor_init(int node, int pnode, int base_pnode) * - node is first node (kernel memory notion) on the uvhub * - pnode is the uvhub's physical identifier */ -static void -uv_payload_queue_init(int node, int pnode) +static void pq_init(int node, int pnode) { - int pn; int cpu; + size_t plsize; char *cp; - unsigned long pa; - struct bau_payload_queue_entry *pqp; - struct bau_payload_queue_entry *pqp_malloc; + void *vp; + unsigned long pn; + unsigned long first; + unsigned long pn_first; + unsigned long last; + struct bau_pq_entry *pqp; struct bau_control *bcp; - pqp = kmalloc_node((DEST_Q_SIZE + 1) - * sizeof(struct bau_payload_queue_entry), - GFP_KERNEL, node); + plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry); + vp = kmalloc_node(plsize, GFP_KERNEL, node); + pqp = (struct bau_pq_entry *)vp; BUG_ON(!pqp); - pqp_malloc = pqp; cp = (char *)pqp + 31; - pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); + pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5); for_each_present_cpu(cpu) { if (pnode != uv_cpu_to_pnode(cpu)) continue; /* for every cpu on this pnode: */ bcp = &per_cpu(bau_control, cpu); - bcp->va_queue_first = pqp; - bcp->bau_msg_head = pqp; - bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); + bcp->queue_first = pqp; + bcp->bau_msg_head = pqp; + bcp->queue_last = pqp + (DEST_Q_SIZE - 1); } /* * need the pnode of where the memory was really allocated */ - pa = uv_gpa(pqp); - pn = pa >> uv_nshift; - uv_write_global_mmr64(pnode, - UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, - ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | - uv_physnodeaddr(pqp)); - uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, - uv_physnodeaddr(pqp)); - uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, - (unsigned long) - uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1))); + pn = uv_gpa(pqp) >> uv_nshift; + first = uv_physnodeaddr(pqp); + pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first; + last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)); + write_mmr_payload_first(pnode, pn_first); + write_mmr_payload_tail(pnode, first); + write_mmr_payload_last(pnode, last); + /* in effect, all msg_type's are set to MSG_NOOP */ - memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); + memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); } /* * Initialization of each UV hub's structures */ -static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) +static void __init init_uvhub(int uvhub, int vector, int base_pnode) { int node; int pnode; @@ -1542,24 +1541,24 @@ static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) node = uvhub_to_first_node(uvhub); pnode = uv_blade_to_pnode(uvhub); - uv_activation_descriptor_init(node, pnode, base_pnode); - uv_payload_queue_init(node, pnode); + + activation_descriptor_init(node, pnode, base_pnode); + + pq_init(node, pnode); /* * The below initialization can't be in firmware because the * messaging IRQ will be determined by the OS. */ apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; - uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, - ((apicid << 32) | vector)); + write_mmr_data_config(pnode, ((apicid << 32) | vector)); } /* * We will set BAU_MISC_CONTROL with a timeout period. * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. - * So the destination timeout period has be be calculated from them. + * So the destination timeout period has to be calculated from them. */ -static int -calculate_destination_timeout(void) +static int calculate_destination_timeout(void) { unsigned long mmr_image; int mult1; @@ -1570,8 +1569,7 @@ calculate_destination_timeout(void) unsigned long ts_ns; if (is_uv1_hub()) { - mult1 = UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD & - BAU_MISC_CONTROL_MULT_MASK; + mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); @@ -1583,7 +1581,7 @@ calculate_destination_timeout(void) /* 4 bits 0/1 for 10/80us, 3 bits of multiplier */ mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT; - if (mmr_image & ((unsigned long)1 << UV2_ACK_UNITS_SHFT)) + if (mmr_image & (1L << UV2_ACK_UNITS_SHFT)) mult1 = 80; else mult1 = 10; @@ -1593,62 +1591,69 @@ calculate_destination_timeout(void) return ret; } +static void __init init_per_cpu_tunables(void) +{ + int cpu; + struct bau_control *bcp; + + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + bcp->baudisabled = 0; + bcp->statp = &per_cpu(ptcstats, cpu); + /* time interval to catch a hardware stay-busy bug */ + bcp->timeout_interval = usec_2_cycles(2*timeout_us); + bcp->max_concurr = max_concurr; + bcp->max_concurr_const = max_concurr; + bcp->plugged_delay = plugged_delay; + bcp->plugsb4reset = plugsb4reset; + bcp->timeoutsb4reset = timeoutsb4reset; + bcp->ipi_reset_limit = ipi_reset_limit; + bcp->complete_threshold = complete_threshold; + bcp->cong_response_us = congested_respns_us; + bcp->cong_reps = congested_reps; + bcp->cong_period = congested_period; + } +} + /* - * initialize the bau_control structure for each cpu + * Scan all cpus to collect blade and socket summaries. */ -static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) +static int __init get_cpu_topology(int base_pnode, + struct uvhub_desc *uvhub_descs, + unsigned char *uvhub_mask) { - int i; int cpu; - int tcpu; int pnode; int uvhub; - int have_hmaster; - short socket = 0; - unsigned short socket_mask; - unsigned char *uvhub_mask; + int socket; struct bau_control *bcp; struct uvhub_desc *bdp; struct socket_desc *sdp; - struct bau_control *hmaster = NULL; - struct bau_control *smaster = NULL; - struct socket_desc { - short num_cpus; - short cpu_number[MAX_CPUS_PER_SOCKET]; - }; - struct uvhub_desc { - unsigned short socket_mask; - short num_cpus; - short uvhub; - short pnode; - struct socket_desc socket[2]; - }; - struct uvhub_desc *uvhub_descs; - timeout_us = calculate_destination_timeout(); - - uvhub_descs = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); - memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); - uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); for_each_present_cpu(cpu) { bcp = &per_cpu(bau_control, cpu); + memset(bcp, 0, sizeof(struct bau_control)); + pnode = uv_cpu_hub_info(cpu)->pnode; - if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { + if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) { printk(KERN_EMERG "cpu %d pnode %d-%d beyond %d; BAU disabled\n", - cpu, pnode, base_part_pnode, - UV_DISTRIBUTION_SIZE); + cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE); return 1; } + bcp->osnode = cpu_to_node(cpu); - bcp->partition_base_pnode = uv_partition_base_pnode; + bcp->partition_base_pnode = base_pnode; + uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); bdp = &uvhub_descs[uvhub]; + bdp->num_cpus++; bdp->uvhub = uvhub; bdp->pnode = pnode; + /* kludge: 'assuming' one node per socket, and assuming that disabling a socket just leaves a gap in node numbers */ socket = bcp->osnode & 1; @@ -1657,84 +1662,129 @@ static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) sdp->cpu_number[sdp->num_cpus] = cpu; sdp->num_cpus++; if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { - printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus); + printk(KERN_EMERG "%d cpus per socket invalid\n", + sdp->num_cpus); return 1; } } + return 0; +} + +/* + * Each socket is to get a local array of pnodes/hubs. + */ +static void make_per_cpu_thp(struct bau_control *smaster) +{ + int cpu; + size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus(); + + smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode); + memset(smaster->thp, 0, hpsz); + for_each_present_cpu(cpu) { + smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode; + smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; + } +} + +/* + * Initialize all the per_cpu information for the cpu's on a given socket, + * given what has been gathered into the socket_desc struct. + * And reports the chosen hub and socket masters back to the caller. + */ +static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, + struct bau_control **smasterp, + struct bau_control **hmasterp) +{ + int i; + int cpu; + struct bau_control *bcp; + + for (i = 0; i < sdp->num_cpus; i++) { + cpu = sdp->cpu_number[i]; + bcp = &per_cpu(bau_control, cpu); + bcp->cpu = cpu; + if (i == 0) { + *smasterp = bcp; + if (!(*hmasterp)) + *hmasterp = bcp; + } + bcp->cpus_in_uvhub = bdp->num_cpus; + bcp->cpus_in_socket = sdp->num_cpus; + bcp->socket_master = *smasterp; + bcp->uvhub = bdp->uvhub; + bcp->uvhub_master = *hmasterp; + bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; + if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { + printk(KERN_EMERG "%d cpus per uvhub invalid\n", + bcp->uvhub_cpu); + return 1; + } + } + return 0; +} + +/* + * Summarize the blade and socket topology into the per_cpu structures. + */ +static int __init summarize_uvhub_sockets(int nuvhubs, + struct uvhub_desc *uvhub_descs, + unsigned char *uvhub_mask) +{ + int socket; + int uvhub; + unsigned short socket_mask; + for (uvhub = 0; uvhub < nuvhubs; uvhub++) { + struct uvhub_desc *bdp; + struct bau_control *smaster = NULL; + struct bau_control *hmaster = NULL; + if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) continue; - have_hmaster = 0; + bdp = &uvhub_descs[uvhub]; socket_mask = bdp->socket_mask; socket = 0; while (socket_mask) { - if (!(socket_mask & 1)) - goto nextsocket; - sdp = &bdp->socket[socket]; - for (i = 0; i < sdp->num_cpus; i++) { - cpu = sdp->cpu_number[i]; - bcp = &per_cpu(bau_control, cpu); - bcp->cpu = cpu; - if (i == 0) { - smaster = bcp; - if (!have_hmaster) { - have_hmaster++; - hmaster = bcp; - } - } - bcp->cpus_in_uvhub = bdp->num_cpus; - bcp->cpus_in_socket = sdp->num_cpus; - bcp->socket_master = smaster; - bcp->uvhub = bdp->uvhub; - bcp->uvhub_master = hmaster; - bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> - blade_processor_id; - if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { - printk(KERN_EMERG - "%d cpus per uvhub invalid\n", - bcp->uvhub_cpu); + struct socket_desc *sdp; + if ((socket_mask & 1)) { + sdp = &bdp->socket[socket]; + if (scan_sock(sdp, bdp, &smaster, &hmaster)) return 1; - } } -nextsocket: socket++; socket_mask = (socket_mask >> 1); - /* each socket gets a local array of pnodes/hubs */ - bcp = smaster; - bcp->target_hub_and_pnode = kmalloc_node( - sizeof(struct hub_and_pnode) * - num_possible_cpus(), GFP_KERNEL, bcp->osnode); - memset(bcp->target_hub_and_pnode, 0, - sizeof(struct hub_and_pnode) * - num_possible_cpus()); - for_each_present_cpu(tcpu) { - bcp->target_hub_and_pnode[tcpu].pnode = - uv_cpu_hub_info(tcpu)->pnode; - bcp->target_hub_and_pnode[tcpu].uvhub = - uv_cpu_hub_info(tcpu)->numa_blade_id; - } + make_per_cpu_thp(smaster); } } + return 0; +} + +/* + * initialize the bau_control structure for each cpu + */ +static int __init init_per_cpu(int nuvhubs, int base_part_pnode) +{ + unsigned char *uvhub_mask; + void *vp; + struct uvhub_desc *uvhub_descs; + + timeout_us = calculate_destination_timeout(); + + vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); + uvhub_descs = (struct uvhub_desc *)vp; + memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); + uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); + + if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask)) + return 1; + + if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask)) + return 1; + kfree(uvhub_descs); kfree(uvhub_mask); - for_each_present_cpu(cpu) { - bcp = &per_cpu(bau_control, cpu); - bcp->baudisabled = 0; - bcp->statp = &per_cpu(ptcstats, cpu); - /* time interval to catch a hardware stay-busy bug */ - bcp->timeout_interval = microsec_2_cycles(2*timeout_us); - bcp->max_bau_concurrent = max_bau_concurrent; - bcp->max_bau_concurrent_constant = max_bau_concurrent; - bcp->plugged_delay = plugged_delay; - bcp->plugsb4reset = plugsb4reset; - bcp->timeoutsb4reset = timeoutsb4reset; - bcp->ipi_reset_limit = ipi_reset_limit; - bcp->complete_threshold = complete_threshold; - bcp->congested_response_us = congested_response_us; - bcp->congested_reps = congested_reps; - bcp->congested_period = congested_period; - } + init_per_cpu_tunables(); return 0; } @@ -1747,8 +1797,9 @@ static int __init uv_bau_init(void) int pnode; int nuvhubs; int cur_cpu; + int cpus; int vector; - unsigned long mmr; + cpumask_var_t *mask; if (!is_uv_system()) return 0; @@ -1756,24 +1807,25 @@ static int __init uv_bau_init(void) if (nobau) return 0; - for_each_possible_cpu(cur_cpu) - zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), - GFP_KERNEL, cpu_to_node(cur_cpu)); + for_each_possible_cpu(cur_cpu) { + mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); + } uv_nshift = uv_hub_info->m_val; uv_mmask = (1UL << uv_hub_info->m_val) - 1; nuvhubs = uv_num_possible_blades(); spin_lock_init(&disable_lock); - congested_cycles = microsec_2_cycles(congested_response_us); + congested_cycles = usec_2_cycles(congested_respns_us); - uv_partition_base_pnode = 0x7fffffff; + uv_base_pnode = 0x7fffffff; for (uvhub = 0; uvhub < nuvhubs; uvhub++) { - if (uv_blade_nr_possible_cpus(uvhub) && - (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) - uv_partition_base_pnode = uv_blade_to_pnode(uvhub); + cpus = uv_blade_nr_possible_cpus(uvhub); + if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode)) + uv_base_pnode = uv_blade_to_pnode(uvhub); } - if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { + if (init_per_cpu(nuvhubs, uv_base_pnode)) { nobau = 1; return 0; } @@ -1781,21 +1833,21 @@ static int __init uv_bau_init(void) vector = UV_BAU_MESSAGE; for_each_possible_blade(uvhub) if (uv_blade_nr_possible_cpus(uvhub)) - uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); + init_uvhub(uvhub, vector, uv_base_pnode); - uv_enable_timeouts(); + enable_timeouts(); alloc_intr_gate(vector, uv_bau_message_intr1); for_each_possible_blade(uvhub) { if (uv_blade_nr_possible_cpus(uvhub)) { + unsigned long val; + unsigned long mmr; pnode = uv_blade_to_pnode(uvhub); /* INIT the bau */ - uv_write_global_mmr64(pnode, - UVH_LB_BAU_SB_ACTIVATION_CONTROL, - ((unsigned long)1 << 63)); + val = 1L << 63; + write_gmmr_activation(pnode, val); mmr = 1; /* should be 1 to broadcast to both sockets */ - uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, - mmr); + write_mmr_data_broadcast(pnode, mmr); } } -- cgit v1.2.3-70-g09d2 From aecb7b64dd9e2512c7a4c7e61dd781415d3dac5a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 24 May 2011 14:04:09 +0530 Subject: dmaengine/dw_dmac: Update maintainer-ship Nobody is currently maintaining dw_dmac. We are using dw_dmac for SPEAr13xx and are currently maintaining it. After discussing with Vinod, sending this patch to update maintainer-ship of dw_dmac. Signed-off-by: Viresh Kumar Acked-by: Havard Skinnemoen Signed-off-by: Vinod Koul --- MAINTAINERS | 7 +++++++ drivers/dma/dw_dmac.c | 2 ++ drivers/dma/dw_dmac_regs.h | 1 + include/linux/dw_dmac.h | 1 + 4 files changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6b4b9cdec37..8f4413014b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5401,6 +5401,13 @@ L: linux-serial@vger.kernel.org S: Maintained F: drivers/tty/serial +SYNOPSYS DESIGNWARE DMAC DRIVER +M: Viresh Kumar +S: Maintained +F: include/linux/dw_dmac.h +F: drivers/dma/dw_dmac_regs.h +F: drivers/dma/dw_dmac.c + TIMEKEEPING, NTP M: John Stultz M: Thomas Gleixner diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index eec675bf4f9..efd836dfb65 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -3,6 +3,7 @@ * AVR32 systems.) * * Copyright (C) 2007-2008 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1575,3 +1576,4 @@ module_exit(dw_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver"); MODULE_AUTHOR("Haavard Skinnemoen "); +MODULE_AUTHOR("Viresh Kumar "); diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index c968597c32a..c3419518d70 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -2,6 +2,7 @@ * Driver for the Synopsys DesignWare AHB DMA Controller * * Copyright (C) 2005-2007 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h index 6998d9376ef..4bfe0a2f7d5 100644 --- a/include/linux/dw_dmac.h +++ b/include/linux/dw_dmac.h @@ -3,6 +3,7 @@ * AVR32 systems.) * * Copyright (C) 2007 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as -- cgit v1.2.3-70-g09d2 From af6a25f0e1ec0265c267e6ee4513925eaba6d0ed Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Tue, 24 May 2011 14:49:59 +0100 Subject: x86: Reorder mm_context_t to remove x86_64 alignment padding and thus shrink mm_struct Reorder mm_context_t to remove alignment padding on 64 bit builds shrinking its size from 64 to 56 bytes. This allows mm_struct to shrink from 840 to 832 bytes, so using one fewer cache lines, and getting more objects per slab when using slub. slabinfo mm_struct reports before :- Sizes (bytes) Slabs ----------------------------------- Object : 840 Total : 7 SlabObj: 896 Full : 1 SlabSiz: 16384 Partial: 4 Loss : 56 CpuSlab: 2 Align : 64 Objects: 18 after :- Sizes (bytes) Slabs ---------------------------------- Object : 832 Total : 7 SlabObj: 832 Full : 1 SlabSiz: 16384 Partial: 4 Loss : 0 CpuSlab: 2 Align : 64 Objects: 19 Signed-off-by: Richard Kennedy Cc: wilsons@start.ca Cc: Linus Torvalds Cc: Andrew Morton Cc: Pekka Enberg Link: http://lkml.kernel.org/r/1306244999.1999.5.camel@castor.rsk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index aeff3e89b22..5f55e696276 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -11,14 +11,14 @@ typedef struct { void *ldt; int size; - struct mutex lock; - void *vdso; #ifdef CONFIG_X86_64 /* True if mm supports a task running in 32 bit compatibility mode. */ unsigned short ia32_compat; #endif + struct mutex lock; + void *vdso; } mm_context_t; #ifdef CONFIG_SMP -- cgit v1.2.3-70-g09d2 From 8b27f2ff7a24f0735c96055e676872f05398d99b Mon Sep 17 00:00:00 2001 From: Nikhil P Rao Date: Wed, 25 May 2011 10:18:41 -0700 Subject: x86: Remove unnecessary check in detect_ht() This patch removes a check that causes incorrect scheduler domain setup (SMP instead of SMT) and bootlog warning messages when cpuid extensions for topology enumeration are not supported and the number of processors reported to the OS is smaller than smp_num_siblings. Acked-by: Suresh Siddha Signed-off-by: Nikhil P Rao Link: http://lkml.kernel.org/r/1306343921.19325.1.camel@fedora13 Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c8b41623377..53f02f5d8cc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -477,13 +477,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (smp_num_siblings <= 1) goto out; - if (smp_num_siblings > nr_cpu_ids) { - pr_warning("CPU: Unsupported number of siblings %d", - smp_num_siblings); - smp_num_siblings = 1; - return; - } - index_msb = get_count_order(smp_num_siblings); c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); -- cgit v1.2.3-70-g09d2 From 46b2903c05b248ed78304113ecfba368b4c55def Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 25 May 2011 14:49:20 -0700 Subject: dmaengine: Add API documentation for slave dma usage Signed-off-by: Vinod Koul Signed-off-by: Dan Williams --- Documentation/dmaengine.txt | 97 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt index 0c1c2f63c0a..5a0cb1ef616 100644 --- a/Documentation/dmaengine.txt +++ b/Documentation/dmaengine.txt @@ -1 +1,96 @@ -See Documentation/crypto/async-tx-api.txt + DMA Engine API Guide + ==================== + + Vinod Koul + +NOTE: For DMA Engine usage in async_tx please see: + Documentation/crypto/async-tx-api.txt + + +Below is a guide to device driver writers on how to use the Slave-DMA API of the +DMA Engine. This is applicable only for slave DMA usage only. + +The slave DMA usage consists of following steps +1. Allocate a DMA slave channel +2. Set slave and controller specific parameters +3. Get a descriptor for transaction +4. Submit the transaction and wait for callback notification + +1. Allocate a DMA slave channel +Channel allocation is slightly different in the slave DMA context, client +drivers typically need a channel from a particular DMA controller only and even +in some cases a specific channel is desired. To request a channel +dma_request_channel() API is used. + +Interface: +struct dma_chan *dma_request_channel(dma_cap_mask_t mask, + dma_filter_fn filter_fn, + void *filter_param); +where dma_filter_fn is defined as: +typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); + +When the optional 'filter_fn' parameter is set to NULL dma_request_channel +simply returns the first channel that satisfies the capability mask. Otherwise, +when the mask parameter is insufficient for specifying the necessary channel, +the filter_fn routine can be used to disposition the available channels in the +system. The filter_fn routine is called once for each free channel in the +system. Upon seeing a suitable channel filter_fn returns DMA_ACK which flags +that channel to be the return value from dma_request_channel. A channel +allocated via this interface is exclusive to the caller, until +dma_release_channel() is called. + +2. Set slave and controller specific parameters +Next step is always to pass some specific information to the DMA driver. Most of +the generic information which a slave DMA can use is in struct dma_slave_config. +It allows the clients to specify DMA direction, DMA addresses, bus widths, DMA +burst lengths etc. If some DMA controllers have more parameters to be sent then +they should try to embed struct dma_slave_config in their controller specific +structure. That gives flexibility to client to pass more parameters, if +required. + +Interface: +int dmaengine_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) + +3. Get a descriptor for transaction +For slave usage the various modes of slave transfers supported by the +DMA-engine are: +slave_sg - DMA a list of scatter gather buffers from/to a peripheral +dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the + operation is explicitly stopped. +The non NULL return of this transfer API represents a "descriptor" for the given +transaction. + +Interface: +struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_sg)( + struct dma_chan *chan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long flags); +struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_data_direction direction); + +4. Submit the transaction and wait for callback notification +To schedule the transaction to be scheduled by dma device, the "descriptor" +returned in above (3) needs to be submitted. +To tell the dma driver that a transaction is ready to be serviced, the +descriptor->submit() callback needs to be invoked. This chains the descriptor to +the pending queue. +The transactions in the pending queue can be activated by calling the +issue_pending API. If channel is idle then the first transaction in queue is +started and subsequent ones queued up. +On completion of the DMA operation the next in queue is submitted and a tasklet +triggered. The tasklet would then call the client driver completion callback +routine for notification, if set. +Interface: +void dma_async_issue_pending(struct dma_chan *chan); + +============================================================================== + +Additional usage notes for dma driver writers +1/ Although DMA engine specifies that completion callback routines cannot submit +any new operations, but typically for slave DMA subsequent transaction may not +be available for submit prior to callback routine being called. This requirement +is not a requirement for DMA-slave devices. But they should take care to drop +the spin-lock they might be holding before calling the callback routine -- cgit v1.2.3-70-g09d2 From 0714a57c6865f31ed3366e5abdfae7da580d41d2 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Tue, 24 May 2011 11:53:29 -0700 Subject: xhci: Clear stopped_td when Stop Endpoint command completes. When an URB is cancelled, the xHCI driver issues a Stop Endpoint command so that it can manipulate the ring and remove the transfer. The xHC hardware then places a transfer event with the completion code "Stopped" or "Stopped Invalid" to let the driver know what TD it was in the middle of processing. This TD and TRB is stored in ep->stopped_td and ep->stopped_trb. These pointers are also used in handling stalled endpoints. By design, the Stop Endpoint command can race with URB completion. By the time the Stop Endpoint command is handled, the URBs to be cancelled may have been given back to the driver. Unfortunately, the stopped_td and stopped_trb pointers were not getting cleared in this case. The USB core unconditionally tries to reset the toggle bits on any endpoints when a new alternate interface setting is installed. When the xHCI driver saw that ep->stopped_td was still set from the Stop Endpoint command, xhci_reset_endpoint assumed the endpoint was actually stalled, and attempted to clean up the endpoint rings. This would manifest itself in a failed Reset Endpoint command and failed Set TR dequeue Pointer command after a successful Configure Endpoint command. It may have also been causing driver oops when the stopped_td was accessed. This patch should be backported to stable kernels since 2.6.31. Before 2.6.33, stopped_td was found in the xhci_endpoint_ring, not the xhci_virt_ep. Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci-ring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 237a765f8d1..ba551239d28 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -692,6 +692,8 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci, if (list_empty(&ep->cancelled_td_list)) { xhci_stop_watchdog_timer_in_irq(xhci, ep); + ep->stopped_td = NULL; + ep->stopped_trb = NULL; ring_doorbell_for_active_rings(xhci, slot_id, ep_index); return; } -- cgit v1.2.3-70-g09d2 From fe6c6c13d8dcb32398eef143e2e8efc3fb4019bf Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Mon, 23 May 2011 16:41:17 -0700 Subject: xhci: Don't submit commands when the host is dead. When the xHCI host controller dies, the USB core may attempt to reset the devices to their default configuration before disconnecting them. This causes calls into the xHCI bandwidth allocation functions. Don't allow those functions to submit commands or work on xHCI structures if the host controller is marked as dying. Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 8f2a56ece44..58183d2a808 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1314,8 +1314,10 @@ int xhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, if (ret <= 0) return ret; xhci = hcd_to_xhci(hcd); - xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev); + if (xhci->xhc_state & XHCI_STATE_DYING) + return -ENODEV; + xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev); drop_flag = xhci_get_endpoint_flag(&ep->desc); if (drop_flag == SLOT_FLAG || drop_flag == EP0_FLAG) { xhci_dbg(xhci, "xHCI %s - can't drop slot or ep 0 %#x\n", @@ -1401,6 +1403,8 @@ int xhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, return ret; } xhci = hcd_to_xhci(hcd); + if (xhci->xhc_state & XHCI_STATE_DYING) + return -ENODEV; added_ctxs = xhci_get_endpoint_flag(&ep->desc); last_ctx = xhci_last_valid_endpoint(added_ctxs); @@ -1676,6 +1680,8 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) if (ret <= 0) return ret; xhci = hcd_to_xhci(hcd); + if (xhci->xhc_state & XHCI_STATE_DYING) + return -ENODEV; xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev); virt_dev = xhci->devs[udev->slot_id]; -- cgit v1.2.3-70-g09d2 From 380032c3c855ded74c43252a0adb8e8d7afc9f45 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Tue, 5 Apr 2011 13:33:56 -0700 Subject: xhci: STFU: Remove function tracing. Remove unnecessary debugging from the xHCI driver. We don't need to know what function we're calling or returning from. Now I know how to use markup-oops.pl to de-mystify stack dumps of crashes. Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci-ring.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index ba551239d28..4794905d38d 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2182,7 +2182,6 @@ static int xhci_handle_event(struct xhci_hcd *xhci) int update_ptrs = 1; int ret; - xhci_dbg(xhci, "In %s\n", __func__); if (!xhci->event_ring || !xhci->event_ring->dequeue) { xhci->error_bitmask |= 1 << 1; return 0; @@ -2195,7 +2194,6 @@ static int xhci_handle_event(struct xhci_hcd *xhci) xhci->error_bitmask |= 1 << 2; return 0; } - xhci_dbg(xhci, "%s - OS owns TRB\n", __func__); /* * Barrier between reading the TRB_CYCLE (valid) flag above and any @@ -2205,20 +2203,14 @@ static int xhci_handle_event(struct xhci_hcd *xhci) /* FIXME: Handle more event types. */ switch ((le32_to_cpu(event->event_cmd.flags) & TRB_TYPE_BITMASK)) { case TRB_TYPE(TRB_COMPLETION): - xhci_dbg(xhci, "%s - calling handle_cmd_completion\n", __func__); handle_cmd_completion(xhci, &event->event_cmd); - xhci_dbg(xhci, "%s - returned from handle_cmd_completion\n", __func__); break; case TRB_TYPE(TRB_PORT_STATUS): - xhci_dbg(xhci, "%s - calling handle_port_status\n", __func__); handle_port_status(xhci, event); - xhci_dbg(xhci, "%s - returned from handle_port_status\n", __func__); update_ptrs = 0; break; case TRB_TYPE(TRB_TRANSFER): - xhci_dbg(xhci, "%s - calling handle_tx_event\n", __func__); ret = handle_tx_event(xhci, &event->trans_event); - xhci_dbg(xhci, "%s - returned from handle_tx_event\n", __func__); if (ret < 0) xhci->error_bitmask |= 1 << 9; else -- cgit v1.2.3-70-g09d2 From 5153b7b39105d8beb38e1c3f26ab4b877960d8e1 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Tue, 5 Apr 2011 13:33:56 -0700 Subject: xhci: STFU: Don't print event ring dequeue pointer. Stop printing out the event ring dequeue pointer and status register in the operational register set. The host will report an OK status 99% of the time the interrupt handler is called, and usually when it's really hosed, a host controller won't even call the interrupt handler. So the line is really useless. Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci-ring.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 4794905d38d..f1ae1723403 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -167,9 +167,7 @@ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer next = ring->dequeue; } addr = (unsigned long long) xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue); - if (ring == xhci->event_ring) - xhci_dbg(xhci, "Event ring deq = 0x%llx (DMA)\n", addr); - else if (ring == xhci->cmd_ring) + if (ring == xhci->cmd_ring) xhci_dbg(xhci, "Command ring deq = 0x%llx (DMA)\n", addr); else xhci_dbg(xhci, "Ring deq = 0x%llx (DMA)\n", addr); @@ -2267,16 +2265,6 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) spin_unlock(&xhci->lock); return IRQ_NONE; } - xhci_dbg(xhci, "op reg status = %08x\n", status); - xhci_dbg(xhci, "Event ring dequeue ptr:\n"); - xhci_dbg(xhci, "@%llx %08x %08x %08x %08x\n", - (unsigned long long) - xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, trb), - lower_32_bits(le64_to_cpu(trb->link.segment_ptr)), - upper_32_bits(le64_to_cpu(trb->link.segment_ptr)), - (unsigned int) le32_to_cpu(trb->link.intr_target), - (unsigned int) le32_to_cpu(trb->link.control)); - if (status & STS_FATAL) { xhci_warn(xhci, "WARNING: Host System Error\n"); xhci_halt(xhci); -- cgit v1.2.3-70-g09d2 From f444ff27e9b8c953eef49da65c649fdcd202165a Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Tue, 5 Apr 2011 15:53:47 -0700 Subject: xhci: STFU: Be quieter during URB submission and completion. Unsurprisingly, URBs get submitted and completed a lot in the xHCI driver. If we have to print 10 lines of debug for every URB submitted or completed, then that can cause the whole system to stay in the interrupt handler too long, and can cause Missed Service completion codes for isochronous transfers. Cut down the debugging in the URB submission and completion paths: - Don't squawk about successful transfers, only unsuccessful ones. - Only print the number of bytes transferred if this was a short transfer. - Don't print the endpoint index for successful transfers (will add more debug to failed transfers to show endpoint index there later). - Stop printing MMIO writes. This debugging shows up when the endpoint doorbell is rung a to start a transfer (basically for every URB). - Don't print out the ring enqueue and dequeue pointers - Stop printing when we're pointing to a link TRB. Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci-ring.c | 46 +++++++++++++++----------------------------- drivers/usb/host/xhci.h | 6 ------ 2 files changed, 15 insertions(+), 37 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index f1ae1723403..d1498c03c4c 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -167,10 +167,6 @@ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer next = ring->dequeue; } addr = (unsigned long long) xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue); - if (ring == xhci->cmd_ring) - xhci_dbg(xhci, "Command ring deq = 0x%llx (DMA)\n", addr); - else - xhci_dbg(xhci, "Ring deq = 0x%llx (DMA)\n", addr); } /* @@ -246,12 +242,6 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring, next = ring->enqueue; } addr = (unsigned long long) xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue); - if (ring == xhci->event_ring) - xhci_dbg(xhci, "Event ring enq = 0x%llx (DMA)\n", addr); - else if (ring == xhci->cmd_ring) - xhci_dbg(xhci, "Command ring enq = 0x%llx (DMA)\n", addr); - else - xhci_dbg(xhci, "Ring enq = 0x%llx (DMA)\n", addr); } /* @@ -634,13 +624,11 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci, } } usb_hcd_unlink_urb_from_ep(hcd, urb); - xhci_dbg(xhci, "Giveback %s URB %p\n", adjective, urb); spin_unlock(&xhci->lock); usb_hcd_giveback_urb(hcd, urb, status); xhci_urb_free_priv(xhci, urb_priv); spin_lock(&xhci->lock); - xhci_dbg(xhci, "%s URB given back\n", adjective); } } @@ -1630,7 +1618,6 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, "without IOC set??\n"); *status = -ESHUTDOWN; } else { - xhci_dbg(xhci, "Successful control transfer!\n"); *status = 0; } break; @@ -1727,7 +1714,6 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, switch (trb_comp_code) { case COMP_SUCCESS: frame->status = 0; - xhci_dbg(xhci, "Successful isoc transfer!\n"); break; case COMP_SHORT_TX: frame->status = td->urb->transfer_flags & URB_SHORT_NOT_OK ? @@ -1837,12 +1823,6 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, else *status = 0; } else { - if (usb_endpoint_xfer_bulk(&td->urb->ep->desc)) - xhci_dbg(xhci, "Successful bulk " - "transfer!\n"); - else - xhci_dbg(xhci, "Successful interrupt " - "transfer!\n"); *status = 0; } break; @@ -1856,11 +1836,12 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, /* Others already handled above */ break; } - xhci_dbg(xhci, "ep %#x - asked for %d bytes, " - "%d bytes untransferred\n", - td->urb->ep->desc.bEndpointAddress, - td->urb->transfer_buffer_length, - TRB_LEN(le32_to_cpu(event->transfer_len))); + if (trb_comp_code == COMP_SHORT_TX) + xhci_dbg(xhci, "ep %#x - asked for %d bytes, " + "%d bytes untransferred\n", + td->urb->ep->desc.bEndpointAddress, + td->urb->transfer_buffer_length, + TRB_LEN(le32_to_cpu(event->transfer_len))); /* Fast path - was this the last TRB in the TD for this URB? */ if (event_trb == td->last_trb) { if (TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) { @@ -1954,7 +1935,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, /* Endpoint ID is 1 based, our index is zero based */ ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1; - xhci_dbg(xhci, "%s - ep index = %d\n", __func__, ep_index); ep = &xdev->eps[ep_index]; ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer)); ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index); @@ -2149,9 +2129,15 @@ cleanup: xhci_urb_free_priv(xhci, urb_priv); usb_hcd_unlink_urb_from_ep(bus_to_hcd(urb->dev->bus), urb); - xhci_dbg(xhci, "Giveback URB %p, len = %d, " - "status = %d\n", - urb, urb->actual_length, status); + if ((urb->actual_length != urb->transfer_buffer_length && + (urb->transfer_flags & + URB_SHORT_NOT_OK)) || + status != 0) + xhci_dbg(xhci, "Giveback URB %p, len = %d, " + "expected = %x, status = %d\n", + urb, urb->actual_length, + urb->transfer_buffer_length, + status); spin_unlock(&xhci->lock); usb_hcd_giveback_urb(bus_to_hcd(urb->dev->bus), urb, status); spin_lock(&xhci->lock); @@ -2379,7 +2365,6 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, u32 ep_state, unsigned int num_trbs, gfp_t mem_flags) { /* Make sure the endpoint has been added to xHC schedule */ - xhci_dbg(xhci, "Endpoint state = 0x%x\n", ep_state); switch (ep_state) { case EP_STATE_DISABLED: /* @@ -2416,7 +2401,6 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, struct xhci_ring *ring = ep_ring; union xhci_trb *next; - xhci_dbg(xhci, "prepare_ring: pointing to link trb\n"); next = ring->enqueue; while (last_trb(xhci, ring, ring->enq_seg, next)) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index e12db7cfb9b..0772a8cfea2 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1338,9 +1338,6 @@ static inline unsigned int xhci_readl(const struct xhci_hcd *xhci, static inline void xhci_writel(struct xhci_hcd *xhci, const unsigned int val, __le32 __iomem *regs) { - xhci_dbg(xhci, - "`MEM_WRITE_DWORD(3'b000, 32'h%p, 32'h%0x, 4'hf);\n", - regs, val); writel(val, regs); } @@ -1368,9 +1365,6 @@ static inline void xhci_write_64(struct xhci_hcd *xhci, u32 val_lo = lower_32_bits(val); u32 val_hi = upper_32_bits(val); - xhci_dbg(xhci, - "`MEM_WRITE_DWORD(3'b000, 64'h%p, 64'h%0lx, 4'hf);\n", - regs, (long unsigned int) val); writel(val_lo, ptr); writel(val_hi, ptr + 1); } -- cgit v1.2.3-70-g09d2 From f29c50419c8d1998edd759f1990c4243a248f469 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 19 May 2011 14:35:33 -0400 Subject: maccess,probe_kernel: Make write/read src const void * The functions probe_kernel_write() and probe_kernel_read() do not modify the src pointer. Allow const pointers to be passed in without the need of a typecast. Acked-by: Mike Frysinger Acked-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/1305824936.1465.4.camel@gandalf.stny.rr.com --- arch/blackfin/mm/maccess.c | 4 ++-- arch/s390/mm/maccess.c | 4 ++-- include/linux/uaccess.h | 8 ++++---- mm/maccess.c | 8 ++++---- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/blackfin/mm/maccess.c b/arch/blackfin/mm/maccess.c index b71cebc1f8a..e2532114c5f 100644 --- a/arch/blackfin/mm/maccess.c +++ b/arch/blackfin/mm/maccess.c @@ -16,7 +16,7 @@ static int validate_memory_access_address(unsigned long addr, int size) return bfin_mem_access_type(addr, size); } -long probe_kernel_read(void *dst, void *src, size_t size) +long probe_kernel_read(void *dst, const void *src, size_t size) { unsigned long lsrc = (unsigned long)src; int mem_type; @@ -55,7 +55,7 @@ long probe_kernel_read(void *dst, void *src, size_t size) return -EFAULT; } -long probe_kernel_write(void *dst, void *src, size_t size) +long probe_kernel_write(void *dst, const void *src, size_t size) { unsigned long ldst = (unsigned long)dst; int mem_type; diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 71a4b0d34be..51e5cd9b906 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -19,7 +19,7 @@ * using the stura instruction. * Returns the number of bytes copied or -EFAULT. */ -static long probe_kernel_write_odd(void *dst, void *src, size_t size) +static long probe_kernel_write_odd(void *dst, const void *src, size_t size) { unsigned long count, aligned; int offset, mask; @@ -45,7 +45,7 @@ static long probe_kernel_write_odd(void *dst, void *src, size_t size) return rc ? rc : count; } -long probe_kernel_write(void *dst, void *src, size_t size) +long probe_kernel_write(void *dst, const void *src, size_t size) { long copied = 0; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index d512d98dfb7..5ca0951e185 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -93,8 +93,8 @@ static inline unsigned long __copy_from_user_nocache(void *to, * Safely read from address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ -extern long probe_kernel_read(void *dst, void *src, size_t size); -extern long __probe_kernel_read(void *dst, void *src, size_t size); +extern long probe_kernel_read(void *dst, const void *src, size_t size); +extern long __probe_kernel_read(void *dst, const void *src, size_t size); /* * probe_kernel_write(): safely attempt to write to a location @@ -105,7 +105,7 @@ extern long __probe_kernel_read(void *dst, void *src, size_t size); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -extern long notrace probe_kernel_write(void *dst, void *src, size_t size); -extern long notrace __probe_kernel_write(void *dst, void *src, size_t size); +extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); +extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); #endif /* __LINUX_UACCESS_H__ */ diff --git a/mm/maccess.c b/mm/maccess.c index e2b6f5634e0..4cee182ab5f 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -15,10 +15,10 @@ * happens, handle that and return -EFAULT. */ -long __weak probe_kernel_read(void *dst, void *src, size_t size) +long __weak probe_kernel_read(void *dst, const void *src, size_t size) __attribute__((alias("__probe_kernel_read"))); -long __probe_kernel_read(void *dst, void *src, size_t size) +long __probe_kernel_read(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); @@ -43,10 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long __weak probe_kernel_write(void *dst, void *src, size_t size) +long __weak probe_kernel_write(void *dst, const void *src, size_t size) __attribute__((alias("__probe_kernel_write"))); -long __probe_kernel_write(void *dst, void *src, size_t size) +long __probe_kernel_write(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); -- cgit v1.2.3-70-g09d2 From 0d098a7d1e39553e8a3f638b923551edec4868a7 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Thu, 12 May 2011 23:33:40 +0600 Subject: x86/ftrace: Fix compiler warning in ftrace.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to commit dc326fca2b64 (x86, cpu: Clean up and unify the NOP selection infrastructure), we get the following warning: arch/x86/kernel/ftrace.c: In function ‘ftrace_make_nop’: arch/x86/kernel/ftrace.c:308:6: warning: assignment discards qualifiers from pointer target type arch/x86/kernel/ftrace.c: In function ‘ftrace_make_call’: arch/x86/kernel/ftrace.c:318:6: warning: assignment discards qualifiers from pointer target type ftrace_nop_replace() now returns const unsigned char *, so change its associated function/variable to its compatible type to keep compiler clam. Signed-off-by: Rakib Mullick Link: http://lkml.kernel.org/r/1305221620.7986.4.camel@localhost.localdomain [ updated for change of const void *src in probe_kernel_write() ] Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0ba15a6cc57..c9a281f272f 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -123,7 +123,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) static atomic_t nmi_running = ATOMIC_INIT(0); static int mod_code_status; /* holds return value of text write */ static void *mod_code_ip; /* holds the IP to write to */ -static void *mod_code_newcode; /* holds the text to write to the IP */ +static const void *mod_code_newcode; /* holds the text to write to the IP */ static unsigned nmi_wait_count; static atomic_t nmi_update_count = ATOMIC_INIT(0); @@ -225,7 +225,7 @@ within(unsigned long addr, unsigned long start, unsigned long end) } static int -do_ftrace_mod_code(unsigned long ip, void *new_code) +do_ftrace_mod_code(unsigned long ip, const void *new_code) { /* * On x86_64, kernel text mappings are mapped read-only with @@ -266,8 +266,8 @@ static const unsigned char *ftrace_nop_replace(void) } static int -ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code) +ftrace_modify_code(unsigned long ip, unsigned const char *old_code, + unsigned const char *new_code) { unsigned char replaced[MCOUNT_INSN_SIZE]; @@ -301,7 +301,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned char *new, *old; + unsigned const char *new, *old; unsigned long ip = rec->ip; old = ftrace_call_replace(ip, addr); @@ -312,7 +312,7 @@ int ftrace_make_nop(struct module *mod, int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned char *new, *old; + unsigned const char *new, *old; unsigned long ip = rec->ip; old = ftrace_nop_replace(); -- cgit v1.2.3-70-g09d2 From 50d6828e898590fc5d038810334695380baa1c78 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 19 May 2011 14:41:17 -0400 Subject: scripts/tags.sh: Fix ctags for DEFINE_EVENT() The regex to handle DEFINE_EVENT() should not be the same as the TRACE_EVENT() as the first parameter in DEFINE_EVENT is the template name, not the event name. We need the second parameter as that is what the trace_... will use. Tested-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- scripts/tags.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index bd6185d529c..33b53cab5db 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -132,7 +132,7 @@ exuberant() --regex-asm='/^ENTRY\(([^)]*)\).*/\1/' \ --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \ --regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/' \ - --regex-c++='/^DEFINE_EVENT\(([^,)]*).*/trace_\1/' + --regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1/' all_kconfigs | xargs $1 -a \ --langdef=kconfig --language-force=kconfig \ -- cgit v1.2.3-70-g09d2 From 4d7a2fa876d1a615649761dc465708d0a062249a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 19 May 2011 14:43:57 -0400 Subject: scripts/tags.sh: Add magic for trace-events for etags too Seems that Peter Zijlstra treats us emacs users as second class citizens and the commit: commit 15664125f7cadcb6d725cb2d9b90f9715397848d Author: Peter Zijlstra scripts/tags.sh: Add magic for trace-events only updated ctags (for vim) and did not do the work to let us lowly emacs users benefit from such a change. Cc: Peter Zijlstra Signed-off-by: Steven Rostedt --- scripts/tags.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index 33b53cab5db..75c5d24f199 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -152,7 +152,9 @@ emacs() { all_sources | xargs $1 -a \ --regex='/^ENTRY(\([^)]*\)).*/\1/' \ - --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' + --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' \ + --regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/' \ + --regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1/' all_kconfigs | xargs $1 -a \ --regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/' -- cgit v1.2.3-70-g09d2 From 9905ce8ad7b79dddd23c7b4753d0b2cdb65bde3c Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Wed, 11 May 2011 22:53:51 +0530 Subject: ftrace/recordmcount: Avoid STT_FUNC symbols as base on ARM While find_secsym_ndx often finds the unamed local STT_SECTION, if a section has only one function in it, the ARM toolchain generates the STT_FUNC symbol before the STT_SECTION, and recordmcount finds this instead. This is problematic on ARM because in ARM ELFs, "if a [STT_FUNC] symbol addresses a Thumb instruction, its value is the address of the instruction with bit zero set (in a relocatable object, the section offset with bit zero set)". This leads to incorrect mcount addresses being recorded. Fix this by not using STT_FUNC symbols as the base on ARM. Signed-off-by: Rabin Vincent Link: http://lkml.kernel.org/r/1305134631-31617-1-git-send-email-rabin@rab.in Signed-off-by: Steven Rostedt --- scripts/recordmcount.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 4be60364a40..f40a6af6bf4 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -43,6 +43,7 @@ #undef ELF_R_INFO #undef Elf_r_info #undef ELF_ST_BIND +#undef ELF_ST_TYPE #undef fn_ELF_R_SYM #undef fn_ELF_R_INFO #undef uint_t @@ -76,6 +77,7 @@ # define ELF_R_INFO ELF64_R_INFO # define Elf_r_info Elf64_r_info # define ELF_ST_BIND ELF64_ST_BIND +# define ELF_ST_TYPE ELF64_ST_TYPE # define fn_ELF_R_SYM fn_ELF64_R_SYM # define fn_ELF_R_INFO fn_ELF64_R_INFO # define uint_t uint64_t @@ -108,6 +110,7 @@ # define ELF_R_INFO ELF32_R_INFO # define Elf_r_info Elf32_r_info # define ELF_ST_BIND ELF32_ST_BIND +# define ELF_ST_TYPE ELF32_ST_TYPE # define fn_ELF_R_SYM fn_ELF32_R_SYM # define fn_ELF_R_INFO fn_ELF32_R_INFO # define uint_t uint32_t @@ -427,6 +430,11 @@ static unsigned find_secsym_ndx(unsigned const txtndx, if (txtndx == w2(symp->st_shndx) /* avoid STB_WEAK */ && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) { + /* function symbols on ARM have quirks, avoid them */ + if (w2(ehdr->e_machine) == EM_ARM + && ELF_ST_TYPE(symp->st_info) == STT_FUNC) + continue; + *recvalp = _w(symp->st_value); return symp - sym0; } -- cgit v1.2.3-70-g09d2 From 7cbc5b8d4a775a43875a09e29c49a2a8195b5b2d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 10 May 2011 12:43:46 +0200 Subject: jump_label: Check entries limit in __jump_label_update When iterating the jump_label entries array (core or modules), the __jump_label_update function peeks over the last entry. The reason is that the end of the for loop depends on the key value of the processed entry. Thus when going through the last array entry, we will touch the memory behind the array limit. This bug probably will never be triggered, since most likely the memory behind the jump_label entries will be accesable and the entry->key will be different than the expected value. Signed-off-by: Jiri Olsa Acked-by: Jason Baron Link: http://lkml.kernel.org/r/20110510104346.GC1899@jolsa.brq.redhat.com Signed-off-by: Steven Rostedt --- kernel/jump_label.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 74d1c099fbd..fa27e750dbc 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -105,9 +105,12 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start, } static void __jump_label_update(struct jump_label_key *key, - struct jump_entry *entry, int enable) + struct jump_entry *entry, + struct jump_entry *stop, int enable) { - for (; entry->key == (jump_label_t)(unsigned long)key; entry++) { + for (; (entry < stop) && + (entry->key == (jump_label_t)(unsigned long)key); + entry++) { /* * entry->code set to 0 invalidates module init text sections * kernel_text_address() verifies we are not in core kernel @@ -181,7 +184,11 @@ static void __jump_label_mod_update(struct jump_label_key *key, int enable) struct jump_label_mod *mod = key->next; while (mod) { - __jump_label_update(key, mod->entries, enable); + struct module *m = mod->mod; + + __jump_label_update(key, mod->entries, + m->jump_entries + m->num_jump_entries, + enable); mod = mod->next; } } @@ -245,7 +252,8 @@ static int jump_label_add_module(struct module *mod) key->next = jlm; if (jump_label_enabled(key)) - __jump_label_update(key, iter, JUMP_LABEL_ENABLE); + __jump_label_update(key, iter, iter_stop, + JUMP_LABEL_ENABLE); } return 0; @@ -371,7 +379,7 @@ static void jump_label_update(struct jump_label_key *key, int enable) /* if there are no users, entry can be NULL */ if (entry) - __jump_label_update(key, entry, enable); + __jump_label_update(key, entry, __stop___jump_table, enable); #ifdef CONFIG_MODULES __jump_label_mod_update(key, enable); -- cgit v1.2.3-70-g09d2 From 916f676f8dc016103f983c7ec54c18ecdbb6e349 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 25 May 2011 09:53:13 -0400 Subject: x86, efi: Retain boot service code until after switching to virtual mode UEFI stands for "Unified Extensible Firmware Interface", where "Firmware" is an ancient African word meaning "Why do something right when you can do it so wrong that children will weep and brave adults will cower before you", and "UEI" is Celtic for "We missed DOS so we burned it into your ROMs". The UEFI specification provides for runtime services (ie, another way for the operating system to be forced to depend on the firmware) and we rely on these for certain trivial tasks such as setting up the bootloader. But some hardware fails to work if we attempt to use these runtime services from physical mode, and so we have to switch into virtual mode. So far so dreadful. The specification makes it clear that the operating system is free to do whatever it wants with boot services code after ExitBootServices() has been called. SetVirtualAddressMap() can't be called until ExitBootServices() has been. So, obviously, a whole bunch of EFI implementations call into boot services code when we do that. Since we've been charmingly naive and trusted that the specification may be somehow relevant to the real world, we've already stuffed a picture of a penguin or something in that address space. And just to make things more entertaining, we've also marked it non-executable. This patch allocates the boot services regions during EFI init and makes sure that they're executable. Then, after SetVirtualAddressMap(), it discards them and everyone lives happily ever after. Except for the ones who have to work on EFI, who live sad lives haunted by the knowledge that someone's eventually going to write yet another firmware specification. [ hpa: adding this to urgent with a stable tag since it fixes currently-broken hardware. However, I do not know what the dependencies are and so I do not know which -stable versions this may be a candidate for. ] Signed-off-by: Matthew Garrett Link: http://lkml.kernel.org/r/1306331593-28715-1-git-send-email-mjg@redhat.com Signed-off-by: H. Peter Anvin Cc: Tony Luck Cc: --- arch/x86/kernel/setup.c | 7 +++++++ arch/x86/platform/efi/efi.c | 45 +++++++++++++++++++++++++++++++++++++++++- arch/x86/platform/efi/efi_64.c | 5 +++-- include/linux/efi.h | 1 + 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 605e5ae19c7..b9ca498f560 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -910,6 +910,13 @@ void __init setup_arch(char **cmdline_p) memblock.current_limit = get_max_mapped(); memblock_x86_fill(); + /* + * The EFI specification says that boot service code won't be called + * after ExitBootServices(). This is, in fact, a lie. + */ + if (efi_enabled) + efi_reserve_boot_services(); + /* preallocate 4k for mptable mpc */ early_reserve_e820_mpc_new(); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index b30aa26a8df..0d3a4fa3456 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -304,6 +304,40 @@ static void __init print_efi_memmap(void) } #endif /* EFI_DEBUG */ +void __init efi_reserve_boot_services(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + + memblock_x86_reserve_range(start, start + size, "EFI Boot"); + } +} + +static void __init efi_free_boot_services(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + + free_bootmem_late(start, size); + } +} + void __init efi_init(void) { efi_config_table_t *config_tables; @@ -536,7 +570,9 @@ void __init efi_enter_virtual_mode(void) for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME)) + if (!(md->attribute & EFI_MEMORY_RUNTIME) && + md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) continue; size = md->num_pages << EFI_PAGE_SHIFT; @@ -592,6 +628,13 @@ void __init efi_enter_virtual_mode(void) panic("EFI call to SetVirtualAddressMap() failed!"); } + /* + * Thankfully, it does seem that no runtime services other than + * SetVirtualAddressMap() will touch boot services code, so we can + * get rid of it all at this point + */ + efi_free_boot_services(); + /* * Now that EFI is in virtual mode, update the function * pointers in the runtime service table to the new virtual addresses. diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 2649426a790..ac3aa54e265 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -49,10 +49,11 @@ static void __init early_code_mapping_set_exec(int executable) if (!(__supported_pte_mask & _PAGE_NX)) return; - /* Make EFI runtime service code area executable */ + /* Make EFI service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - if (md->type == EFI_RUNTIME_SERVICES_CODE) + if (md->type == EFI_RUNTIME_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_CODE) efi_set_executable(md, executable); } } diff --git a/include/linux/efi.h b/include/linux/efi.h index 33fa1203024..e376270cd26 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -299,6 +299,7 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern unsigned long efi_get_time(void); extern int efi_set_rtc_mmss(unsigned long nowtime); +extern void efi_reserve_boot_services(void); extern struct efi_memory_map memmap; /** -- cgit v1.2.3-70-g09d2 From a1cd6173596c6f7d1f0b41ac7d33ecf03c581edc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 May 2011 15:24:25 -0400 Subject: ftrace: Have ftrace_startup() return failure code The register_ftrace_function() returns an error code on failure except if the call to ftrace_startup() fails. Add a error return to ftrace_startup() if it fails to start, allowing register_ftrace_funtion() to return a proper error value. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d017c2c82c4..bebbc959ee8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1638,12 +1638,12 @@ static void ftrace_startup_enable(int command) ftrace_run_update_code(command); } -static void ftrace_startup(struct ftrace_ops *ops, int command) +static int ftrace_startup(struct ftrace_ops *ops, int command) { bool hash_enable = true; if (unlikely(ftrace_disabled)) - return; + return -ENODEV; ftrace_start_up++; command |= FTRACE_ENABLE_CALLS; @@ -1662,6 +1662,8 @@ static void ftrace_startup(struct ftrace_ops *ops, int command) ftrace_hash_rec_enable(ops, 1); ftrace_startup_enable(command); + + return 0; } static void ftrace_shutdown(struct ftrace_ops *ops, int command) @@ -2501,7 +2503,7 @@ static void __enable_ftrace_function_probe(void) ret = __register_ftrace_function(&trace_probe_ops); if (!ret) - ftrace_startup(&trace_probe_ops, 0); + ret = ftrace_startup(&trace_probe_ops, 0); ftrace_probe_registered = 1; } @@ -3466,7 +3468,7 @@ device_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } /* Keep as macros so we do not need to define the commands */ -# define ftrace_startup(ops, command) do { } while (0) +# define ftrace_startup(ops, command) ({0;}) # define ftrace_shutdown(ops, command) do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) @@ -3799,7 +3801,7 @@ int register_ftrace_function(struct ftrace_ops *ops) ret = __register_ftrace_function(ops); if (!ret) - ftrace_startup(ops, 0); + ret = ftrace_startup(ops, 0); out_unlock: @@ -4045,7 +4047,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, ftrace_graph_return = retfunc; ftrace_graph_entry = entryfunc; - ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); + ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); out: mutex_unlock(&ftrace_lock); -- cgit v1.2.3-70-g09d2 From 17bb615ad4f8d2d2c0f02794d27d7f83e0009ef4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 May 2011 15:27:46 -0400 Subject: tracing: Have event with function tracer check error return The self tests for event tracer does not check if the function tracing was successfully activated. It needs to before it continues the tests, otherwise the wrong errors may be reported. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 2fe11034135..686ec399f2a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1657,7 +1657,12 @@ static struct ftrace_ops trace_ops __initdata = static __init void event_trace_self_test_with_function(void) { - register_ftrace_function(&trace_ops); + int ret; + ret = register_ftrace_function(&trace_ops); + if (WARN_ON(ret < 0)) { + pr_info("Failed to enable function tracer for event tests\n"); + return; + } pr_info("Running tests again, along with the function tracer\n"); event_trace_self_tests(); unregister_ftrace_function(&trace_ops); -- cgit v1.2.3-70-g09d2 From 3b6cfdb1714a33ae4d2ca9fbc818a42cf7adee69 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 May 2011 15:33:49 -0400 Subject: ftrace: Set ops->flag to enabled even on static function tracing When dynamic ftrace is not configured, the ops->flags still needs to have its FTRACE_OPS_FL_ENABLED bit set in ftrace_startup(). Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bebbc959ee8..25949b33057 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3468,7 +3468,11 @@ device_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } /* Keep as macros so we do not need to define the commands */ -# define ftrace_startup(ops, command) ({0;}) +# define ftrace_startup(ops, command) \ + ({ \ + (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ + 0; \ + }) # define ftrace_shutdown(ops, command) do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) -- cgit v1.2.3-70-g09d2 From 2fc1b6f0d0a719e1e2a30bf076a3a799feaf6af2 Mon Sep 17 00:00:00 2001 From: liubo Date: Tue, 19 Apr 2011 09:35:28 +0800 Subject: tracing: Add __print_symbolic_u64 to avoid warnings on 32bit machine Filesystem, like Btrfs, has some "ULL" macros, and when these macros are passed to tracepoints'__print_symbolic(), there will be 64->32 truncate WARNINGS during compiling on 32bit box. Signed-off-by: Liu Bo Link: http://lkml.kernel.org/r/4DACE6E0.7000507@cn.fujitsu.com Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 12 ++++++++++++ include/trace/ftrace.h | 13 +++++++++++++ kernel/trace/trace_output.c | 27 +++++++++++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index b5a550a39a7..59d3ef100eb 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -16,6 +16,11 @@ struct trace_print_flags { const char *name; }; +struct trace_print_flags_u64 { + unsigned long long mask; + const char *name; +}; + const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, const struct trace_print_flags *flag_array); @@ -23,6 +28,13 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, const struct trace_print_flags *symbol_array); +#if BITS_PER_LONG == 32 +const char *ftrace_print_symbols_seq_u64(struct trace_seq *p, + unsigned long long val, + const struct trace_print_flags_u64 + *symbol_array); +#endif + const char *ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 3e68366d485..533c49f4804 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -205,6 +205,19 @@ ftrace_print_symbols_seq(p, value, symbols); \ }) +#undef __print_symbolic_u64 +#if BITS_PER_LONG == 32 +#define __print_symbolic_u64(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags_u64 symbols[] = \ + { symbol_array, { -1, NULL } }; \ + ftrace_print_symbols_seq_u64(p, value, symbols); \ + }) +#else +#define __print_symbolic_u64(value, symbol_array...) \ + __print_symbolic(value, symbol_array) +#endif + #undef __print_hex #define __print_hex(buf, buf_len) ftrace_print_hex_seq(p, buf, buf_len) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index cf535ccedc8..e37de492a9e 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -353,6 +353,33 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, } EXPORT_SYMBOL(ftrace_print_symbols_seq); +#if BITS_PER_LONG == 32 +const char * +ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, + const struct trace_print_flags_u64 *symbol_array) +{ + int i; + const char *ret = p->buffer + p->len; + + for (i = 0; symbol_array[i].name; i++) { + + if (val != symbol_array[i].mask) + continue; + + trace_seq_puts(p, symbol_array[i].name); + break; + } + + if (!p->len) + trace_seq_printf(p, "0x%llx", val); + + trace_seq_putc(p, 0); + + return ret; +} +EXPORT_SYMBOL(ftrace_print_symbols_seq_u64); +#endif + const char * ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) { -- cgit v1.2.3-70-g09d2 From 7f34b746f79c1e1f8fd6d09799d133263ae7a504 Mon Sep 17 00:00:00 2001 From: liubo Date: Tue, 19 Apr 2011 09:35:31 +0800 Subject: tracing: Update btrfs's tracepoints to use u64 interface To avoid 64->32 truncating WARNING, update btrfs's tracepoints. Signed-off-by: Liu Bo Link: http://lkml.kernel.org/r/4DACE6E3.8080200@cn.fujitsu.com Signed-off-by: Steven Rostedt --- include/trace/events/btrfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index f445cff66ab..4114129f079 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -28,7 +28,7 @@ struct extent_buffer; { BTRFS_SHARED_DATA_REF_KEY, "SHARED_DATA_REF" }) #define __show_root_type(obj) \ - __print_symbolic(obj, \ + __print_symbolic_u64(obj, \ { BTRFS_ROOT_TREE_OBJECTID, "ROOT_TREE" }, \ { BTRFS_EXTENT_TREE_OBJECTID, "EXTENT_TREE" }, \ { BTRFS_CHUNK_TREE_OBJECTID, "CHUNK_TREE" }, \ @@ -125,7 +125,7 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, ); #define __show_map_type(type) \ - __print_symbolic(type, \ + __print_symbolic_u64(type, \ { EXTENT_MAP_LAST_BYTE, "LAST_BYTE" }, \ { EXTENT_MAP_HOLE, "HOLE" }, \ { EXTENT_MAP_INLINE, "INLINE" }, \ -- cgit v1.2.3-70-g09d2 From b1cff0ad1062621ae63cb6c5dc4165191fe2e9f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 May 2011 14:27:43 -0400 Subject: ftrace: Add internal recursive checks Witold reported a reboot caused by the selftests of the dynamic function tracer. He sent me a config and I used ktest to do a config_bisect on it (as my config did not cause the crash). It pointed out that the problem config was CONFIG_PROVE_RCU. What happened was that if multiple callbacks are attached to the function tracer, we iterate a list of callbacks. Because the list is managed by synchronize_sched() and preempt_disable, the access to the pointers uses rcu_dereference_raw(). When PROVE_RCU is enabled, the rcu_dereference_raw() calls some debugging functions, which happen to be traced. The tracing of the debug function would then call rcu_dereference_raw() which would then call the debug function and then... well you get the idea. I first wrote two different patches to solve this bug. 1) add a __rcu_dereference_raw() that would not do any checks. 2) add notrace to the offending debug functions. Both of these patches worked. Talking with Paul McKenney on IRC, he suggested to add recursion detection instead. This seemed to be a better solution, so I decided to implement it. As the task_struct already has a trace_recursion to detect recursion in the ring buffer, and that has a very small number it allows, I decided to use that same variable to add flags that can detect the recursion inside the infrastructure of the function tracer. I plan to change it so that the task struct bit can be checked in mcount, but as that requires changes to all archs, I will hold that off to the next merge window. Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Frederic Weisbecker Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/1306348063.1465.116.camel@gandalf.stny.rr.com Reported-by: Witold Baryluk Signed-off-by: Steven Rostedt --- include/linux/sched.h | 2 +- kernel/trace/ftrace.c | 13 ++++++++++++- kernel/trace/ring_buffer.c | 10 +++++----- kernel/trace/trace.h | 15 +++++++++++++++ 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index d8b2d0bec0d..7b78d9cad47 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1513,7 +1513,7 @@ struct task_struct { #ifdef CONFIG_TRACING /* state flags for use by tracers */ unsigned long trace; - /* bitmask of trace recursion */ + /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 25949b33057..1ee417fcbfa 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -109,12 +109,18 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); static void ftrace_global_list_func(unsigned long ip, unsigned long parent_ip) { - struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/ + struct ftrace_ops *op; + + if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) + return; + trace_recursion_set(TRACE_GLOBAL_BIT); + op = rcu_dereference_raw(ftrace_global_list); /*see above*/ while (op != &ftrace_list_end) { op->func(ip, parent_ip); op = rcu_dereference_raw(op->next); /*see above*/ }; + trace_recursion_clear(TRACE_GLOBAL_BIT); } static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) @@ -3490,6 +3496,10 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) { struct ftrace_ops *op; + if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) + return; + + trace_recursion_set(TRACE_INTERNAL_BIT); /* * Some of the ops may be dynamically allocated, * they must be freed after a synchronize_sched(). @@ -3502,6 +3512,7 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) op = rcu_dereference_raw(op->next); }; preempt_enable_notrace(); + trace_recursion_clear(TRACE_INTERNAL_BIT); } static void clear_ftrace_swapper(void) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0ef7b4b2a1f..b0c7aa40794 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2216,7 +2216,7 @@ static noinline void trace_recursive_fail(void) printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" "HC[%lu]:SC[%lu]:NMI[%lu]\n", - current->trace_recursion, + trace_recursion_buffer(), hardirq_count() >> HARDIRQ_SHIFT, softirq_count() >> SOFTIRQ_SHIFT, in_nmi()); @@ -2226,9 +2226,9 @@ static noinline void trace_recursive_fail(void) static inline int trace_recursive_lock(void) { - current->trace_recursion++; + trace_recursion_inc(); - if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) + if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) return 0; trace_recursive_fail(); @@ -2238,9 +2238,9 @@ static inline int trace_recursive_lock(void) static inline void trace_recursive_unlock(void) { - WARN_ON_ONCE(!current->trace_recursion); + WARN_ON_ONCE(!trace_recursion_buffer()); - current->trace_recursion--; + trace_recursion_dec(); } #else diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6b69c4bd306..229f8591f61 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -784,4 +784,19 @@ extern const char *__stop___trace_bprintk_fmt[]; FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #include "trace_entries.h" +/* Only current can touch trace_recursion */ +#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) +#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) + +/* Ring buffer has the 10 LSB bits to count */ +#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) + +/* for function tracing recursion */ +#define TRACE_INTERNAL_BIT (1<<11) +#define TRACE_GLOBAL_BIT (1<<12) + +#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) +#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) +#define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) + #endif /* _LINUX_KERNEL_TRACE_H */ -- cgit v1.2.3-70-g09d2 From 4bff4a7ee4b9e487a1bc1d31082e77b6a381418a Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 11 May 2011 16:51:20 +0000 Subject: ARM: arch-shmobile: support SDHI card detection on mackerel, using a GPIO On sh7372 the card-detection pin of SDHI0 can also produce interrupts, when configured as GPIO. Use this feature to power down SDHI0, when no card is plugged in. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-mackerel.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index d4fe74067d6..776f20560e7 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -913,6 +914,17 @@ static int slot_cn7_get_cd(struct platform_device *pdev) } /* SDHI0 */ +static irqreturn_t mackerel_sdhi0_gpio_cd(int irq, void *arg) +{ + struct device *dev = arg; + struct sh_mobile_sdhi_info *info = dev->platform_data; + struct tmio_mmc_data *pdata = info->pdata; + + tmio_mmc_cd_wakeup(pdata); + + return IRQ_HANDLED; +} + static struct sh_mobile_sdhi_info sdhi0_info = { .dma_slave_tx = SHDMA_SLAVE_SDHI0_TX, .dma_slave_rx = SHDMA_SLAVE_SDHI0_RX, @@ -1296,6 +1308,7 @@ static void __init mackerel_init(void) { u32 srcr4; struct clk *clk; + int ret; sh7372_pinmux_init(); @@ -1401,6 +1414,13 @@ static void __init mackerel_init(void) gpio_request(GPIO_FN_SDHID0_1, NULL); gpio_request(GPIO_FN_SDHID0_0, NULL); + ret = request_irq(evt2irq(0x3340), mackerel_sdhi0_gpio_cd, + IRQF_TRIGGER_FALLING, "sdhi0 cd", &sdhi0_device.dev); + if (!ret) + sdhi0_info.tmio_flags |= TMIO_MMC_HAS_COLD_CD; + else + pr_err("Cannot get IRQ #%d: %d\n", evt2irq(0x3340), ret); + #if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) /* enable SDHI1 */ gpio_request(GPIO_FN_SDHICMD1, NULL); -- cgit v1.2.3-70-g09d2 From ea7659fb2b876337aee719d9d5ddb05531dfb334 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 26 May 2011 10:21:05 +0200 Subject: perf: Remove duplicate headers Signed-off-by: Jesper Juhl Cc: Tom Zanussi Cc: Arnaldo Carvalho de Melo Cc: trivial@kernel.org Cc: Peter Zijlstra Cc: Paul Mackerras Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1105261011290.17400@swampdragon.chaosbits.net Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 2 -- tools/perf/builtin-script.c | 1 - 2 files changed, 3 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index e18eb7ed30a..7b139e1e7e8 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -7,8 +7,6 @@ */ #include "builtin.h" -#include "util/util.h" - #include "util/util.h" #include "util/color.h" #include diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 974f6d3f4e5..22747de7234 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -10,7 +10,6 @@ #include "util/symbol.h" #include "util/thread.h" #include "util/trace-event.h" -#include "util/parse-options.h" #include "util/util.h" #include "util/evlist.h" #include "util/evsel.h" -- cgit v1.2.3-70-g09d2 From b80ef10e84d85a06bcd0b3a24a752ec32d0e0e40 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 26 May 2011 17:12:12 +0900 Subject: x86: Move do_page_fault()'s error path under unlikely() Ingo suggested SIGKILL check should be moved into slowpath function. This will reduce the page fault fastpath impact of this recent commit: 37b23e0525d3: x86,mm: make pagefault killable Suggested-by: Ingo Molnar Signed-off-by: KOSAKI Motohiro Cc: kamezawa.hiroyu@jp.fujitsu.com Cc: minchan.kim@gmail.com Cc: willy@linux.intel.com Cc: Andrew Morton Cc: Linus Torvalds Link: http://lkml.kernel.org/r/4DDE0B5C.9050907@jp.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f7a2a054a3c..2dbf6bf4c7e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -823,16 +823,30 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, force_sig_info_fault(SIGBUS, code, address, tsk, fault); } -static noinline void +static noinline int mm_fault_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) { + /* + * Pagefault was interrupted by SIGKILL. We have no reason to + * continue pagefault. + */ + if (fatal_signal_pending(current)) { + if (!(fault & VM_FAULT_RETRY)) + up_read(¤t->mm->mmap_sem); + if (!(error_code & PF_USER)) + no_context(regs, error_code, address); + return 1; + } + if (!(fault & VM_FAULT_ERROR)) + return 0; + if (fault & VM_FAULT_OOM) { /* Kernel mode? Handle exceptions or die: */ if (!(error_code & PF_USER)) { up_read(¤t->mm->mmap_sem); no_context(regs, error_code, address); - return; + return 1; } out_of_memory(regs, error_code, address); @@ -843,6 +857,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, else BUG(); } + return 1; } static int spurious_fault_check(unsigned long error_code, pte_t *pte) @@ -1133,19 +1148,9 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, flags); - if (unlikely(fault & VM_FAULT_ERROR)) { - mm_fault_error(regs, error_code, address, fault); - return; - } - - /* - * Pagefault was interrupted by SIGKILL. We have no reason to - * continue pagefault. - */ - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { - if (!(error_code & PF_USER)) - no_context(regs, error_code, address); - return; + if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { + if (mm_fault_error(regs, error_code, address, fault)) + return; } /* -- cgit v1.2.3-70-g09d2 From ec80fde746e3ccf93895d25ae1a7071c9af52585 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 May 2011 09:53:51 -0300 Subject: perf symbols: Handle /proc/sys/kernel/kptr_restrict Perf uses /proc/modules to figure out where kernel modules are loaded. With the advent of kptr_restrict, non root users get zeroes for all module start addresses. So check if kptr_restrict is non zero and don't generate the syntethic PERF_RECORD_MMAP events for them. Warn the user about it in perf record and in perf report. In perf report the reference relocation symbol being zero means that kptr_restrict was set, thus /proc/kallsyms has only zeroed addresses, so don't use it to fixup symbol addresses when using a valid kallsyms (in the buildid cache) or vmlinux (in the vmlinux path) build-id located automatically or specified by the user. Provide an explanation about it in 'perf report' if kernel samples were taken, checking if a suitable vmlinux or kallsyms was found/specified. Restricted /proc/kallsyms don't go to the buildid cache anymore. Example: [acme@emilia ~]$ perf record -F 100000 sleep 1 WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted, check /proc/sys/kernel/kptr_restrict. Samples in kernel functions may not be resolved if a suitable vmlinux file is not found in the buildid cache or in the vmlinux path. Samples in kernel modules won't be resolved at all. If some relocation was applied (e.g. kexec) symbols may be misresolved even with a suitable vmlinux or kallsyms file. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.005 MB perf.data (~231 samples) ] [acme@emilia ~]$ [acme@emilia ~]$ perf report --stdio Kernel address maps (/proc/{kallsyms,modules}) were restricted, check /proc/sys/kernel/kptr_restrict before running 'perf record'. If some relocation was applied (e.g. kexec) symbols may be misresolved. Samples in kernel modules can't be resolved as well. # Events: 13 cycles # # Overhead Command Shared Object Symbol # ........ ....... ................. ..................... # 20.24% sleep [kernel.kallsyms] [k] page_fault 20.04% sleep [kernel.kallsyms] [k] filemap_fault 19.78% sleep [kernel.kallsyms] [k] __lru_cache_add 19.69% sleep ld-2.12.so [.] memcpy 14.71% sleep [kernel.kallsyms] [k] dput 4.70% sleep [kernel.kallsyms] [k] flush_signal_handlers 0.73% sleep [kernel.kallsyms] [k] perf_event_comm 0.11% sleep [kernel.kallsyms] [k] native_write_msr_safe # # (For a higher level overview, try: perf report --sort comm,dso) # [acme@emilia ~]$ This is because it found a suitable vmlinux (build-id checked) in /lib/modules/2.6.39-rc7+/build/vmlinux (use -v in perf report to see the long file name). If we remove that file from the vmlinux path: [root@emilia ~]# mv /lib/modules/2.6.39-rc7+/build/vmlinux \ /lib/modules/2.6.39-rc7+/build/vmlinux.OFF [acme@emilia ~]$ perf report --stdio [kernel.kallsyms] with build id 57298cdbe0131f6871667ec0eaab4804dcf6f562 not found, continuing without symbols Kernel address maps (/proc/{kallsyms,modules}) were restricted, check /proc/sys/kernel/kptr_restrict before running 'perf record'. As no suitable kallsyms nor vmlinux was found, kernel samples can't be resolved. Samples in kernel modules can't be resolved as well. # Events: 13 cycles # # Overhead Command Shared Object Symbol # ........ ....... ................. ...... # 80.31% sleep [kernel.kallsyms] [k] 0xffffffff8103425a 19.69% sleep ld-2.12.so [.] memcpy # # (For a higher level overview, try: perf report --sort comm,dso) # [acme@emilia ~]$ Reported-by: Stephane Eranian Suggested-by: David Miller Cc: Dave Jones Cc: David Miller Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Kees Cook Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Link: http://lkml.kernel.org/n/tip-mt512joaxxbhhp1odop04yit@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 13 ++++++++++++ tools/perf/builtin-report.c | 26 ++++++++++++++++++++++++ tools/perf/util/event.c | 15 +++++++++++--- tools/perf/util/header.c | 8 ++++++-- tools/perf/util/symbol.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/symbol.h | 3 ++- 6 files changed, 107 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0974f957b8f..2ca107f3efd 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -823,6 +823,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) symbol__init(); + if (symbol_conf.kptr_restrict) + pr_warning("WARNING: Kernel address maps " + "(/proc/{kallsyms,modules}) are restricted, " + "check /proc/sys/kernel/kptr_restrict.\n\n" + "Samples in kernel functions may not be resolved " + "if a suitable vmlinux file is not found in the " + "buildid cache or in the vmlinux path.\n\n" + "Samples in kernel modules won't be resolved " + "at all.\n\n" + "If some relocation was applied (e.g. kexec) " + "symbols may be misresolved even with a suitable " + "vmlinux or kallsyms file.\n\n"); + if (no_buildid_cache || no_buildid) disable_buildid_cache(); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 498c6f70a74..99156c35bc6 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -116,6 +116,9 @@ static int process_sample_event(union perf_event *event, if (al.filtered || (hide_unresolved && al.sym == NULL)) return 0; + if (al.map != NULL) + al.map->dso->hit = 1; + if (perf_session__add_hist_entry(session, &al, sample, evsel)) { pr_debug("problem incrementing symbol period, skipping event\n"); return -1; @@ -249,6 +252,8 @@ static int __cmd_report(void) u64 nr_samples; struct perf_session *session; struct perf_evsel *pos; + struct map *kernel_map; + struct kmap *kernel_kmap; const char *help = "For a higher level overview, try: perf report --sort comm,dso"; signal(SIGINT, sig_handler); @@ -268,6 +273,27 @@ static int __cmd_report(void) if (ret) goto out_delete; + kernel_map = session->host_machine.vmlinux_maps[MAP__FUNCTION]; + kernel_kmap = map__kmap(kernel_map); + if (kernel_map == NULL || + (kernel_map->dso->hit && + (kernel_kmap->ref_reloc_sym == NULL || + kernel_kmap->ref_reloc_sym->addr == 0))) { + const struct dso *kdso = kernel_map->dso; + + ui__warning("Kernel address maps " + "(/proc/{kallsyms,modules}) were restricted, " + "check /proc/sys/kernel/kptr_restrict before " + "running 'perf record'.\n\n%s\n\n" + "Samples in kernel modules can't be resolved " + "as well.\n\n", + RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION]) ? + "As no suitable kallsyms nor vmlinux was found, " + "kernel samples can't be resolved." : + "If some relocation was applied (e.g. kexec) " + "symbols may be misresolved."); + } + if (dump_trace) { perf_session__fprintf_nr_events(session, stdout); goto out_delete; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6635fcd11ca..0fe9adf7637 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -553,9 +553,18 @@ static int perf_event__process_kernel_mmap(union perf_event *event, goto out_problem; perf_event__set_kernel_mmap_len(event, machine->vmlinux_maps); - perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, - symbol_name, - event->mmap.pgoff); + + /* + * Avoid using a zero address (kptr_restrict) for the ref reloc + * symbol. Effectively having zero here means that at record + * time /proc/sys/kernel/kptr_restrict was non zero. + */ + if (event->mmap.pgoff != 0) { + perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, + symbol_name, + event->mmap.pgoff); + } + if (machine__is_default_guest(machine)) { /* * preload dso of guest kernel and modules diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 0717bebc764..afb0849fe53 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -193,9 +193,13 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, *linkname = malloc(size), *targetname; int len, err = -1; - if (is_kallsyms) + if (is_kallsyms) { + if (symbol_conf.kptr_restrict) { + pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); + return 0; + } realname = (char *)name; - else + } else realname = realpath(name, NULL); if (realname == NULL || filename == NULL || linkname == NULL) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 516876dfbe5..eec196329fd 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -676,9 +676,30 @@ discard_symbol: rb_erase(&pos->rb_node, root); return count + moved; } +static bool symbol__restricted_filename(const char *filename, + const char *restricted_filename) +{ + bool restricted = false; + + if (symbol_conf.kptr_restrict) { + char *r = realpath(filename, NULL); + + if (r != NULL) { + restricted = strcmp(r, restricted_filename) == 0; + free(r); + return restricted; + } + } + + return restricted; +} + int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, symbol_filter_t filter) { + if (symbol__restricted_filename(filename, "/proc/kallsyms")) + return -1; + if (dso__load_all_kallsyms(dso, filename, map) < 0) return -1; @@ -1790,6 +1811,9 @@ static int machine__create_modules(struct machine *machine) modules = path; } + if (symbol__restricted_filename(path, "/proc/modules")) + return -1; + file = fopen(modules, "r"); if (file == NULL) return -1; @@ -2239,6 +2263,9 @@ static u64 machine__get_kernel_start_addr(struct machine *machine) } } + if (symbol__restricted_filename(filename, "/proc/kallsyms")) + return 0; + if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0) return 0; @@ -2410,6 +2437,25 @@ static int setup_list(struct strlist **list, const char *list_str, return 0; } +static bool symbol__read_kptr_restrict(void) +{ + bool value = false; + + if (geteuid() != 0) { + FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); + if (fp != NULL) { + char line[8]; + + if (fgets(line, sizeof(line), fp) != NULL) + value = atoi(line) != 0; + + fclose(fp); + } + } + + return value; +} + int symbol__init(void) { const char *symfs; @@ -2456,6 +2502,8 @@ int symbol__init(void) if (symfs != symbol_conf.symfs) free((void *)symfs); + symbol_conf.kptr_restrict = symbol__read_kptr_restrict(); + symbol_conf.initialized = true; return 0; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 242de0101a8..325ee36a9d2 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -75,7 +75,8 @@ struct symbol_conf { use_callchain, exclude_other, show_cpu_utilization, - initialized; + initialized, + kptr_restrict; const char *vmlinux_name, *kallsyms_name, *source_prefix, -- cgit v1.2.3-70-g09d2 From 75911c9bd1134f8c0b682aa1e8a8dbefec3ca07a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 May 2011 10:13:38 -0300 Subject: perf tools: Fix build on older systems Where /usr/include/linux/const.h is not present, e.g. RHEL5. Reported-by: Srikar Dronamraju Cc: Srikar Dronamraju Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Link: http://lkml.kernel.org/n/tip-ypcw2mu0w7dl1rrc6ncz3pee@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 2 ++ tools/perf/util/include/linux/const.h | 1 + 2 files changed, 3 insertions(+) create mode 100644 tools/perf/util/include/linux/const.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 1455413ec7a..032ba6398a5 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -215,11 +215,13 @@ LIB_FILE=$(OUTPUT)libperf.a LIB_H += ../../include/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h +LIB_H += ../../include/linux/const.h LIB_H += ../../include/linux/hash.h LIB_H += ../../include/linux/stringify.h LIB_H += util/include/linux/bitmap.h LIB_H += util/include/linux/bitops.h LIB_H += util/include/linux/compiler.h +LIB_H += util/include/linux/const.h LIB_H += util/include/linux/ctype.h LIB_H += util/include/linux/kernel.h LIB_H += util/include/linux/list.h diff --git a/tools/perf/util/include/linux/const.h b/tools/perf/util/include/linux/const.h new file mode 100644 index 00000000000..1b476c9ae64 --- /dev/null +++ b/tools/perf/util/include/linux/const.h @@ -0,0 +1 @@ +#include "../../../../include/linux/const.h" -- cgit v1.2.3-70-g09d2 From ba9f207c9f82115aba4ce04b22e0081af0ae300f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 20 May 2011 02:09:54 +0200 Subject: rcu: Fix unpaired rcu_irq_enter() from locking selftests HARDIRQ_ENTER() maps to irq_enter() which calls rcu_irq_enter(). But HARDIRQ_EXIT() maps to __irq_exit() which doesn't call rcu_irq_exit(). So for every locking selftest that simulates hardirq disabled, we create an imbalance in the rcu extended quiescent state internal state. As a result, after the first missing rcu_irq_exit(), subsequent irqs won't exit dyntick-idle mode after leaving the interrupt handler. This means that RCU won't see the affected CPU as being in an extended quiescent state, resulting in long grace-period delays (as in grace periods extending for hours). To fix this, just use __irq_enter() to simulate the hardirq context. This is sufficient for the locking selftests as we don't need to exit any extended quiescent state or perform any check that irqs normally do when they wake up from idle. As a side effect, this patch makes it possible to restore "rcu: Decrease memory-barrier usage based on semi-formal proof", which eventually helped finding this bug. Reported-and-tested-by: Yinghai Lu Signed-off-by: Frederic Weisbecker Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Stable Signed-off-by: Paul E. McKenney --- lib/locking-selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 619313ed6c4..507a22fab73 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -144,7 +144,7 @@ static void init_shared_classes(void) #define HARDIRQ_ENTER() \ local_irq_disable(); \ - irq_enter(); \ + __irq_enter(); \ WARN_ON(!in_irq()); #define HARDIRQ_EXIT() \ -- cgit v1.2.3-70-g09d2 From 0bbcc529fcea9c7de5e2e7243f9913b8f7302a8c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 May 2011 02:24:04 -0700 Subject: rcu: Add memory barriers Add the memory barriers added by e59fb3120b. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e486f7c3ffb..3731141d8ad 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -907,6 +907,12 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) unsigned long gp_duration; WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); + + /* + * Ensure that all grace-period and pre-grace-period activity + * is seen before the assignment to rsp->completed. + */ + smp_mb(); /* See above block comment. */ gp_duration = jiffies - rsp->gp_start; if (gp_duration > rsp->gp_max) rsp->gp_max = gp_duration; -- cgit v1.2.3-70-g09d2 From 1135633bddcf7a819a1490c18d04965c490bcc1e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 May 2011 02:44:06 -0700 Subject: rcu: Remove old memory barriers from rcu_process_callbacks() Second step of partitioning of commit e59fb3120b. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 3731141d8ad..011bf6f261a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1460,25 +1460,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) */ static void rcu_process_callbacks(void) { - /* - * Memory references from any prior RCU read-side critical sections - * executed by the interrupted code must be seen before any RCU - * grace-period manipulations below. - */ - smp_mb(); /* See above block comment. */ - __rcu_process_callbacks(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); rcu_preempt_process_callbacks(); - /* - * Memory references from any later RCU read-side critical sections - * executed by the interrupted code must be seen after any RCU - * grace-period manipulations above. - */ - smp_mb(); /* See above block comment. */ - /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ rcu_needs_cpu_flush(); } -- cgit v1.2.3-70-g09d2 From b5904090c754327ed6c2ecaefed4f7d473df393f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 May 2011 02:52:04 -0700 Subject: rcu: Don't do reschedule unless in irq Condition the set_need_resched() in rcu_irq_exit() on in_irq(). This should be a no-op, because rcu_irq_exit() should only be called from irq. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 011bf6f261a..195b3a3313e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -421,8 +421,9 @@ void rcu_irq_exit(void) WARN_ON_ONCE(rdtp->dynticks & 0x1); /* If the interrupt queued a callback, get out of dyntick mode. */ - if (__this_cpu_read(rcu_sched_data.nxtlist) || - __this_cpu_read(rcu_bh_data.nxtlist)) + if (in_irq() && + (__this_cpu_read(rcu_sched_data.nxtlist) || + __this_cpu_read(rcu_bh_data.nxtlist))) set_need_resched(); } -- cgit v1.2.3-70-g09d2 From 4305ce7894dd38b0633bfc8978437320119223bd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 May 2011 14:27:31 -0700 Subject: rcu: Make rcu_enter_nohz() pay attention to nesting The old version of rcu_enter_nohz() forced RCU into nohz mode even if the nesting count was non-zero. This change causes rcu_enter_nohz() to hold off for non-zero nesting counts. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 195b3a3313e..99c6038ad04 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -324,8 +324,8 @@ void rcu_enter_nohz(void) smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); - rdtp->dynticks++; - rdtp->dynticks_nesting--; + if (--rdtp->dynticks_nesting == 0) + rdtp->dynticks++; WARN_ON_ONCE(rdtp->dynticks & 0x1); local_irq_restore(flags); } -- cgit v1.2.3-70-g09d2 From 23b5c8fa01b723c70a20d6e4ef4ff54c7656d6e1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 7 Sep 2010 10:38:22 -0700 Subject: rcu: Decrease memory-barrier usage based on semi-formal proof (Note: this was reverted, and is now being re-applied in pieces, with this being the fifth and final piece. See below for the reason that it is now felt to be safe to re-apply this.) Commit d09b62d fixed grace-period synchronization, but left some smp_mb() invocations in rcu_process_callbacks() that are no longer needed, but sheer paranoia prevented them from being removed. This commit removes them and provides a proof of correctness in their absence. It also adds a memory barrier to rcu_report_qs_rsp() immediately before the update to rsp->completed in order to handle the theoretical possibility that the compiler or CPU might move massive quantities of code into a lock-based critical section. This also proves that the sheer paranoia was not entirely unjustified, at least from a theoretical point of view. In addition, the old dyntick-idle synchronization depended on the fact that grace periods were many milliseconds in duration, so that it could be assumed that no dyntick-idle CPU could reorder a memory reference across an entire grace period. Unfortunately for this design, the addition of expedited grace periods breaks this assumption, which has the unfortunate side-effect of requiring atomic operations in the functions that track dyntick-idle state for RCU. (There is some hope that the algorithms used in user-level RCU might be applied here, but some work is required to handle the NMIs that user-space applications can happily ignore. For the short term, better safe than sorry.) This proof assumes that neither compiler nor CPU will allow a lock acquisition and release to be reordered, as doing so can result in deadlock. The proof is as follows: 1. A given CPU declares a quiescent state under the protection of its leaf rcu_node's lock. 2. If there is more than one level of rcu_node hierarchy, the last CPU to declare a quiescent state will also acquire the ->lock of the next rcu_node up in the hierarchy, but only after releasing the lower level's lock. The acquisition of this lock clearly cannot occur prior to the acquisition of the leaf node's lock. 3. Step 2 repeats until we reach the root rcu_node structure. Please note again that only one lock is held at a time through this process. The acquisition of the root rcu_node's ->lock must occur after the release of that of the leaf rcu_node. 4. At this point, we set the ->completed field in the rcu_state structure in rcu_report_qs_rsp(). However, if the rcu_node hierarchy contains only one rcu_node, then in theory the code preceding the quiescent state could leak into the critical section. We therefore precede the update of ->completed with a memory barrier. All CPUs will therefore agree that any updates preceding any report of a quiescent state will have happened before the update of ->completed. 5. Regardless of whether a new grace period is needed, rcu_start_gp() will propagate the new value of ->completed to all of the leaf rcu_node structures, under the protection of each rcu_node's ->lock. If a new grace period is needed immediately, this propagation will occur in the same critical section that ->completed was set in, but courtesy of the memory barrier in #4 above, is still seen to follow any pre-quiescent-state activity. 6. When a given CPU invokes __rcu_process_gp_end(), it becomes aware of the end of the old grace period and therefore makes any RCU callbacks that were waiting on that grace period eligible for invocation. If this CPU is the same one that detected the end of the grace period, and if there is but a single rcu_node in the hierarchy, we will still be in the single critical section. In this case, the memory barrier in step #4 guarantees that all callbacks will be seen to execute after each CPU's quiescent state. On the other hand, if this is a different CPU, it will acquire the leaf rcu_node's ->lock, and will again be serialized after each CPU's quiescent state for the old grace period. On the strength of this proof, this commit therefore removes the memory barriers from rcu_process_callbacks() and adds one to rcu_report_qs_rsp(). The effect is to reduce the number of memory barriers by one and to reduce the frequency of execution from about once per scheduling tick per CPU to once per grace period. This was reverted do to hangs found during testing by Yinghai Lu and Ingo Molnar. Frederic Weisbecker supplied Yinghai with tracing that located the underlying problem, and Frederic also provided the fix. The underlying problem was that the HARDIRQ_ENTER() macro from lib/locking-selftest.c invoked irq_enter(), which in turn invokes rcu_irq_enter(), but HARDIRQ_EXIT() invoked __irq_exit(), which does not invoke rcu_irq_exit(). This situation resulted in calls to rcu_irq_enter() that were not balanced by the required calls to rcu_irq_exit(). Therefore, after these locking selftests completed, RCU's dyntick-idle nesting count was a large number (for example, 72), which caused RCU to to conclude that the affected CPU was not in dyntick-idle mode when in fact it was. RCU would therefore incorrectly wait for this dyntick-idle CPU, resulting in hangs. In contrast, with Frederic's patch, which replaces the irq_enter() in HARDIRQ_ENTER() with an __irq_enter(), these tests don't ever call either rcu_irq_enter() or rcu_irq_exit(), which works because the CPU running the test is already marked as not being in dyntick-idle mode. This means that the rcu_irq_enter() and rcu_irq_exit() calls and RCU then has no problem working out which CPUs are in dyntick-idle mode and which are not. The reason that the imbalance was not noticed before the barrier patch was applied is that the old implementation of rcu_enter_nohz() ignored the nesting depth. This could still result in delays, but much shorter ones. Whenever there was a delay, RCU would IPI the CPU with the unbalanced nesting level, which would eventually result in rcu_enter_nohz() being called, which in turn would force RCU to see that the CPU was in dyntick-idle mode. The reason that very few people noticed the problem is that the mismatched irq_enter() vs. __irq_exit() occured only when the kernel was built with CONFIG_DEBUG_LOCKING_API_SELFTESTS. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- Documentation/RCU/trace.txt | 17 ++----- kernel/rcutree.c | 111 ++++++++++++++++++++------------------------ kernel/rcutree.h | 9 ++-- kernel/rcutree_plugin.h | 7 ++- kernel/rcutree_trace.c | 12 ++--- 5 files changed, 67 insertions(+), 89 deletions(-) diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index c078ad48f7a..8173cec473a 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -99,18 +99,11 @@ o "qp" indicates that RCU still expects a quiescent state from o "dt" is the current value of the dyntick counter that is incremented when entering or leaving dynticks idle state, either by the - scheduler or by irq. The number after the "/" is the interrupt - nesting depth when in dyntick-idle state, or one greater than - the interrupt-nesting depth otherwise. - - This field is displayed only for CONFIG_NO_HZ kernels. - -o "dn" is the current value of the dyntick counter that is incremented - when entering or leaving dynticks idle state via NMI. If both - the "dt" and "dn" values are even, then this CPU is in dynticks - idle mode and may be ignored by RCU. If either of these two - counters is odd, then RCU must be alert to the possibility of - an RCU read-side critical section running on this CPU. + scheduler or by irq. This number is even if the CPU is in + dyntick idle mode and odd otherwise. The number after the first + "/" is the interrupt nesting depth when in dyntick-idle state, + or one greater than the interrupt-nesting depth otherwise. + The number after the second "/" is the NMI nesting depth. This field is displayed only for CONFIG_NO_HZ kernels. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 99c6038ad04..5616b17e4a2 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); #ifdef CONFIG_NO_HZ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = 1, - .dynticks = 1, + .dynticks = ATOMIC_INIT(1), }; #endif /* #ifdef CONFIG_NO_HZ */ @@ -321,13 +321,25 @@ void rcu_enter_nohz(void) unsigned long flags; struct rcu_dynticks *rdtp; - smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); - if (--rdtp->dynticks_nesting == 0) - rdtp->dynticks++; - WARN_ON_ONCE(rdtp->dynticks & 0x1); + if (--rdtp->dynticks_nesting) { + local_irq_restore(flags); + return; + } + /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ + smp_mb__before_atomic_inc(); /* See above. */ + atomic_inc(&rdtp->dynticks); + smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ + WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); local_irq_restore(flags); + + /* If the interrupt queued a callback, get out of dyntick mode. */ + if (in_irq() && + (__get_cpu_var(rcu_sched_data).nxtlist || + __get_cpu_var(rcu_bh_data).nxtlist || + rcu_preempt_needs_cpu(smp_processor_id()))) + set_need_resched(); } /* @@ -343,11 +355,16 @@ void rcu_exit_nohz(void) local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); - rdtp->dynticks++; - rdtp->dynticks_nesting++; - WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); + if (rdtp->dynticks_nesting++) { + local_irq_restore(flags); + return; + } + smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ + atomic_inc(&rdtp->dynticks); + /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ + smp_mb__after_atomic_inc(); /* See above. */ + WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); local_irq_restore(flags); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ } /** @@ -361,11 +378,15 @@ void rcu_nmi_enter(void) { struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - if (rdtp->dynticks & 0x1) + if (rdtp->dynticks_nmi_nesting == 0 && + (atomic_read(&rdtp->dynticks) & 0x1)) return; - rdtp->dynticks_nmi++; - WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ + rdtp->dynticks_nmi_nesting++; + smp_mb__before_atomic_inc(); /* Force delay from prior write. */ + atomic_inc(&rdtp->dynticks); + /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ + smp_mb__after_atomic_inc(); /* See above. */ + WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); } /** @@ -379,11 +400,14 @@ void rcu_nmi_exit(void) { struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - if (rdtp->dynticks & 0x1) + if (rdtp->dynticks_nmi_nesting == 0 || + --rdtp->dynticks_nmi_nesting != 0) return; - smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - rdtp->dynticks_nmi++; - WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); + /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ + smp_mb__before_atomic_inc(); /* See above. */ + atomic_inc(&rdtp->dynticks); + smp_mb__after_atomic_inc(); /* Force delay to next write. */ + WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); } /** @@ -394,13 +418,7 @@ void rcu_nmi_exit(void) */ void rcu_irq_enter(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - - if (rdtp->dynticks_nesting++) - return; - rdtp->dynticks++; - WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ + rcu_exit_nohz(); } /** @@ -412,19 +430,7 @@ void rcu_irq_enter(void) */ void rcu_irq_exit(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - - if (--rdtp->dynticks_nesting) - return; - smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - rdtp->dynticks++; - WARN_ON_ONCE(rdtp->dynticks & 0x1); - - /* If the interrupt queued a callback, get out of dyntick mode. */ - if (in_irq() && - (__this_cpu_read(rcu_sched_data.nxtlist) || - __this_cpu_read(rcu_bh_data.nxtlist))) - set_need_resched(); + rcu_enter_nohz(); } #ifdef CONFIG_SMP @@ -436,19 +442,8 @@ void rcu_irq_exit(void) */ static int dyntick_save_progress_counter(struct rcu_data *rdp) { - int ret; - int snap; - int snap_nmi; - - snap = rdp->dynticks->dynticks; - snap_nmi = rdp->dynticks->dynticks_nmi; - smp_mb(); /* Order sampling of snap with end of grace period. */ - rdp->dynticks_snap = snap; - rdp->dynticks_nmi_snap = snap_nmi; - ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0); - if (ret) - rdp->dynticks_fqs++; - return ret; + rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); + return 0; } /* @@ -459,16 +454,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) */ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { - long curr; - long curr_nmi; - long snap; - long snap_nmi; + unsigned long curr; + unsigned long snap; - curr = rdp->dynticks->dynticks; - snap = rdp->dynticks_snap; - curr_nmi = rdp->dynticks->dynticks_nmi; - snap_nmi = rdp->dynticks_nmi_snap; - smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); + snap = (unsigned long)rdp->dynticks_snap; /* * If the CPU passed through or entered a dynticks idle phase with @@ -478,8 +468,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * read-side critical section that started before the beginning * of the current RCU grace period. */ - if ((curr != snap || (curr & 0x1) == 0) && - (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) { + if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { rdp->dynticks_fqs++; return 1; } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 257664815d5..93d4a1c2e88 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -84,11 +84,9 @@ * Dynticks per-CPU state. */ struct rcu_dynticks { - int dynticks_nesting; /* Track nesting level, sort of. */ - int dynticks; /* Even value for dynticks-idle, else odd. */ - int dynticks_nmi; /* Even value for either dynticks-idle or */ - /* not in nmi handler, else odd. So this */ - /* remains even for nmi from irq handler. */ + int dynticks_nesting; /* Track irq/process nesting level. */ + int dynticks_nmi_nesting; /* Track NMI nesting level. */ + atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ }; /* RCU's kthread states for tracing. */ @@ -284,7 +282,6 @@ struct rcu_data { /* 3) dynticks interface. */ struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ int dynticks_snap; /* Per-GP tracking for dynticks. */ - int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ #endif /* #ifdef CONFIG_NO_HZ */ /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3f6559a5f5c..ed339702481 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1520,7 +1520,6 @@ int rcu_needs_cpu(int cpu) { int c = 0; int snap; - int snap_nmi; int thatcpu; /* Check for being in the holdoff period. */ @@ -1531,10 +1530,10 @@ int rcu_needs_cpu(int cpu) for_each_online_cpu(thatcpu) { if (thatcpu == cpu) continue; - snap = per_cpu(rcu_dynticks, thatcpu).dynticks; - snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; + snap = atomic_add_return(0, &per_cpu(rcu_dynticks, + thatcpu).dynticks); smp_mb(); /* Order sampling of snap with end of grace period. */ - if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { + if ((snap & 0x1) != 0) { per_cpu(rcu_dyntick_drain, cpu) = 0; per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; return rcu_needs_cpu_quick_check(cpu); diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index aa0fd72b4bc..9678cc3650f 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -69,10 +69,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->passed_quiesc, rdp->passed_quiesc_completed, rdp->qs_pending); #ifdef CONFIG_NO_HZ - seq_printf(m, " dt=%d/%d dn=%d df=%lu", - rdp->dynticks->dynticks, + seq_printf(m, " dt=%d/%d/%d df=%lu", + atomic_read(&rdp->dynticks->dynticks), rdp->dynticks->dynticks_nesting, - rdp->dynticks->dynticks_nmi, + rdp->dynticks->dynticks_nmi_nesting, rdp->dynticks_fqs); #endif /* #ifdef CONFIG_NO_HZ */ seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); @@ -141,9 +141,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) rdp->qs_pending); #ifdef CONFIG_NO_HZ seq_printf(m, ",%d,%d,%d,%lu", - rdp->dynticks->dynticks, + atomic_read(&rdp->dynticks->dynticks), rdp->dynticks->dynticks_nesting, - rdp->dynticks->dynticks_nmi, + rdp->dynticks->dynticks_nmi_nesting, rdp->dynticks_fqs); #endif /* #ifdef CONFIG_NO_HZ */ seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); @@ -167,7 +167,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) { seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); #ifdef CONFIG_NO_HZ - seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); + seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); #endif /* #ifdef CONFIG_NO_HZ */ seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); #ifdef CONFIG_TREE_PREEMPT_RCU -- cgit v1.2.3-70-g09d2 From e9cdd343a5e42c43bcda01e609fa23089e026470 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 26 May 2011 11:19:52 -0400 Subject: x86, amd: Do not enable ARAT feature on AMD processors below family 0x12 Commit b87cf80af3ba4b4c008b4face3c68d604e1715c6 added support for ARAT (Always Running APIC timer) on AMD processors that are not affected by erratum 400. This erratum is present on certain processor families and prevents APIC timer from waking up the CPU when it is in a deep C state, including C1E state. Determining whether a processor is affected by this erratum may have some corner cases and handling these cases is somewhat complicated. In the interest of simplicity we won't claim ARAT support on processor families below 0x12 and will go back to broadcasting timer when going idle. Signed-off-by: Boris Ostrovsky Link: http://lkml.kernel.org/r/1306423192-19774-1-git-send-email-ostr@amd64.org Tested-by: Boris Petkov Cc: Hans Rosenfeld Cc: Andreas Herrmann Cc: Chuck Ebbert Cc: stable@kernel.org # 32.x, 38.x, 39.x Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8f5cabb3c5b..b13ed393dfc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -612,8 +612,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } #endif - /* As a rule processors have APIC timer running in deep C states */ - if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400)) + /* + * Family 0x12 and above processors have APIC timer + * running in deep C states. + */ + if (c->x86 > 0x11) set_cpu_cap(c, X86_FEATURE_ARAT); /* -- cgit v1.2.3-70-g09d2 From 21bc7af6e5e684b44725b20f679e701e38ceef15 Mon Sep 17 00:00:00 2001 From: Yogesh Ashok Powar Date: Wed, 18 May 2011 12:02:03 -0700 Subject: mwifiex: correct event header length While decoding received event packet from firmware, 4 bytes of interface header are already removed unconditionally. So for handling event only 4 more bytes needs to be pulled. This is achieved by changing event header length to 4. Almost all the events, except BA stream related and AMSDU aggregation control events, do not have the payload in their event skb. Such events handling depends only on the event ID. This event ID is the first four bytes of the event skb, which is copied to a separate variable before pulling the skb header. Hence event handling worked only for those events that didn't have payload in event skb. This patch fixes the broken event path of the events with payload in their event skb without harming existing working event path for the events without payload. Signed-off-by: Yogesh Ashok Powar Signed-off-by: Kiran Divekar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/sdio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mwifiex/sdio.h b/drivers/net/wireless/mwifiex/sdio.h index a0e9bc5253e..4e97e90aa39 100644 --- a/drivers/net/wireless/mwifiex/sdio.h +++ b/drivers/net/wireless/mwifiex/sdio.h @@ -167,8 +167,8 @@ /* Rx unit register */ #define CARD_RX_UNIT_REG 0x63 -/* Event header Len*/ -#define MWIFIEX_EVENT_HEADER_LEN 8 +/* Event header len w/o 4 bytes of interface header */ +#define MWIFIEX_EVENT_HEADER_LEN 4 /* Max retry number of CMD53 write */ #define MAX_WRITE_IOMEM_RETRY 2 -- cgit v1.2.3-70-g09d2 From 208c72f4fe44fe09577e7975ba0e7fa0278f3d03 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Thu, 19 May 2011 00:43:38 +0300 Subject: nl80211: fix check for valid SSID size in scan operations In both trigger_scan and sched_scan operations, we were checking for the SSID length before assigning the value correctly. Since the memory was just kzalloc'ed, the check was always failing and SSID with over 32 characters were allowed to go through. This was causing a buffer overflow when copying the actual SSID to the proper place. This bug has been there since 2.6.29-rc4. Cc: stable@kernel.org Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ec83f413a7e..88a565f130a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3406,12 +3406,12 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) i = 0; if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { + request->ssids[i].ssid_len = nla_len(attr); if (request->ssids[i].ssid_len > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; goto out_free; } memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); - request->ssids[i].ssid_len = nla_len(attr); i++; } } @@ -3572,6 +3572,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { + request->ssids[i].ssid_len = nla_len(attr); if (request->ssids[i].ssid_len > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; @@ -3579,7 +3580,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, } memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); - request->ssids[i].ssid_len = nla_len(attr); i++; } } -- cgit v1.2.3-70-g09d2 From a9e12869758430424804dd4332e0d2afdfdf00b0 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 19 May 2011 10:17:04 -0500 Subject: rtlwifi: Fix kernel panic resulting from RX buffer allocation failure To handle amsdu_8k capability, the PCI routine of this driver must allocate receive buffers of order 2. Under heavy load, this causes fragmentation of memory. The present code releases the current buffer before checking to see if a new one is availble. Recovery from allocation failures is not possible, which results in kernel panics. The fix is to reorder the code to check that a new buffer can be allocated before the old one is released. If not possible, the received frame is dropped and the old one is reused. Without this change, it is impossible to transfer a 2 GB file without a kernel panic. Signed-off-by: Larry Finger Cc: Stable [2.6.{37,38,39}] Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index a4095284543..c2b83a57c58 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -669,11 +669,6 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) &rx_status, (u8 *) pdesc, skb); - pci_unmap_single(rtlpci->pdev, - *((dma_addr_t *) skb->cb), - rtlpci->rxbuffersize, - PCI_DMA_FROMDEVICE); - skb_put(skb, rtlpriv->cfg->ops->get_desc((u8 *) pdesc, false, HW_DESC_RXPKT_LEN)); @@ -690,6 +685,21 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) hdr = rtl_get_hdr(skb); fc = rtl_get_fc(skb); + /* try for new buffer - if allocation fails, drop + * frame and reuse old buffer + */ + new_skb = dev_alloc_skb(rtlpci->rxbuffersize); + if (unlikely(!new_skb)) { + RT_TRACE(rtlpriv, (COMP_INTR | COMP_RECV), + DBG_DMESG, + ("can't alloc skb for rx\n")); + goto done; + } + pci_unmap_single(rtlpci->pdev, + *((dma_addr_t *) skb->cb), + rtlpci->rxbuffersize, + PCI_DMA_FROMDEVICE); + if (!stats.crc || !stats.hwerror) { memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status)); @@ -758,15 +768,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) rtl_lps_leave(hw); } - new_skb = dev_alloc_skb(rtlpci->rxbuffersize); - if (unlikely(!new_skb)) { - RT_TRACE(rtlpriv, (COMP_INTR | COMP_RECV), - DBG_DMESG, - ("can't alloc skb for rx\n")); - goto done; - } skb = new_skb; - /*skb->dev = dev; */ rtlpci->rx_ring[rx_queue_idx].rx_buf[rtlpci-> rx_ring -- cgit v1.2.3-70-g09d2 From 0019a2c9277bf6d083032a5a9857249e75407a8c Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 19 May 2011 11:48:45 -0500 Subject: rtlwifi: Use order 2 RX buffer allocation only if necessary Although a previous fix handles the kernel panics that result from failure to allocate a new RX buffer, memory fragmentation can be reduced if the amsdu_8k capability is disabled as new buffers need only be of O(0), not O(2). Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index c2b83a57c58..89100e7c553 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -1115,6 +1115,13 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw) rtlpci->rx_ring[rx_queue_idx].idx = 0; + /* If amsdu_8k is disabled, set buffersize to 4096. This + * change will reduce memory fragmentation. + */ + if (rtlpci->rxbuffersize > 4096 && + rtlpriv->rtlhal.disable_amsdu_8k) + rtlpci->rxbuffersize = 4096; + for (i = 0; i < rtlpci->rxringcount; i++) { struct sk_buff *skb = dev_alloc_skb(rtlpci->rxbuffersize); -- cgit v1.2.3-70-g09d2 From fb23d86382a088d50020fd05024d40af5b00f885 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 20 May 2011 01:04:46 +0200 Subject: b43: N-PHY: initialize last var in calibration function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: Larry Finger Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- drivers/net/wireless/b43/phy_n.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c index 9ed65157bef..05960ddde24 100644 --- a/drivers/net/wireless/b43/phy_n.c +++ b/drivers/net/wireless/b43/phy_n.c @@ -3093,7 +3093,7 @@ static int b43_nphy_cal_tx_iq_lo(struct b43_wldev *dev, int freq; bool avoid = false; u8 length; - u16 tmp, core, type, count, max, numb, last, cmd; + u16 tmp, core, type, count, max, numb, last = 0, cmd; const u16 *table; bool phy6or5x; -- cgit v1.2.3-70-g09d2 From a4d86d953b8593791cb29cf2acffd48f9ee6c4f9 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 20 May 2011 17:52:10 +0530 Subject: ath9k: Reset chip on baseband hang Resetting hardware helps to recover from baseband hang/panic for AR9003 based chips. Cc: stable@kernel.org Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index bd18b1d01be..409ef5885a1 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -670,7 +670,8 @@ void ath9k_tasklet(unsigned long data) u32 status = sc->intrstatus; u32 rxmask; - if (status & ATH9K_INT_FATAL) { + if ((status & ATH9K_INT_FATAL) || + (status & ATH9K_INT_BB_WATCHDOG)) { ath_reset(sc, true); return; } @@ -737,6 +738,7 @@ irqreturn_t ath_isr(int irq, void *dev) { #define SCHED_INTR ( \ ATH9K_INT_FATAL | \ + ATH9K_INT_BB_WATCHDOG | \ ATH9K_INT_RXORN | \ ATH9K_INT_RXEOL | \ ATH9K_INT_RX | \ -- cgit v1.2.3-70-g09d2 From 51ac8cbb2176dc159ee910d7074c6796079c3068 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 20 May 2011 17:52:13 +0530 Subject: ath9k_hw: disable phy restart on baseband panic caused by RXSM While receiving unsupported rate frame rx state machine gets into a state 0xb and if phy_restart happens in that state, BB would go hang. If RXSM is in 0xb state after first bb panic, ensure to disable the phy_restart. Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9003_phy.c | 22 ++++++++++++++++++++++ drivers/net/wireless/ath/ath9k/hw.c | 5 ++++- drivers/net/wireless/ath/ath9k/hw.h | 2 ++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index eee23ecd118..892c48b1543 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -1381,3 +1381,25 @@ void ar9003_hw_bb_watchdog_dbg_info(struct ath_hw *ah) "==== BB update: done ====\n\n"); } EXPORT_SYMBOL(ar9003_hw_bb_watchdog_dbg_info); + +void ar9003_hw_disable_phy_restart(struct ath_hw *ah) +{ + u32 val; + + /* While receiving unsupported rate frame rx state machine + * gets into a state 0xb and if phy_restart happens in that + * state, BB would go hang. If RXSM is in 0xb state after + * first bb panic, ensure to disable the phy_restart. + */ + if (!((MS(ah->bb_watchdog_last_status, + AR_PHY_WATCHDOG_RX_OFDM_SM) == 0xb) || + ah->bb_hang_rx_ofdm)) + return; + + ah->bb_hang_rx_ofdm = true; + val = REG_READ(ah, AR_PHY_RESTART); + val &= ~AR_PHY_RESTART_ENA; + + REG_WRITE(ah, AR_PHY_RESTART, val); +} +EXPORT_SYMBOL(ar9003_hw_disable_phy_restart); diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 72543ce8f61..1be7c8bbef8 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -1555,9 +1555,12 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, if (ah->btcoex_hw.enabled) ath9k_hw_btcoex_enable(ah); - if (AR_SREV_9300_20_OR_LATER(ah)) + if (AR_SREV_9300_20_OR_LATER(ah)) { ar9003_hw_bb_watchdog_config(ah); + ar9003_hw_disable_phy_restart(ah); + } + ath9k_hw_apply_gpio_override(ah); return 0; diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index 57435ce6279..4b157c53d1a 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -842,6 +842,7 @@ struct ath_hw { u32 bb_watchdog_last_status; u32 bb_watchdog_timeout_ms; /* in ms, 0 to disable */ + u8 bb_hang_rx_ofdm; /* true if bb hang due to rx_ofdm */ unsigned int paprd_target_power; unsigned int paprd_training_power; @@ -990,6 +991,7 @@ void ar9002_hw_enable_wep_aggregation(struct ath_hw *ah); void ar9003_hw_bb_watchdog_config(struct ath_hw *ah); void ar9003_hw_bb_watchdog_read(struct ath_hw *ah); void ar9003_hw_bb_watchdog_dbg_info(struct ath_hw *ah); +void ar9003_hw_disable_phy_restart(struct ath_hw *ah); void ar9003_paprd_enable(struct ath_hw *ah, bool val); void ar9003_paprd_populate_single_table(struct ath_hw *ah, struct ath9k_hw_cal_data *caldata, -- cgit v1.2.3-70-g09d2 From 41e2b05b9598d6bdf91fc20280bfc538d853f769 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 20 May 2011 17:52:14 +0530 Subject: ath9k: set 40 Mhz rate only if hw is configured in ht40 Whenever there is a channel width change from 40 Mhz to 20 Mhz, the hardware is reconfigured to ht20. Meantime before doing the rate control updation, the packets are being transmitted are selected rate with IEEE80211_TX_RC_40_MHZ_WIDTH. While transmitting ht40 rate packets in ht20 mode is causing baseband panic with AR9003 based chips. ==== BB update: BB status=0x02001109 ==== ath: ** BB state: wd=1 det=1 rdar=0 rOFDM=1 rCCK=1 tOFDM=0 tCCK=0 agc=2 src=0 ** ath: ** BB WD cntl: cntl1=0xffff0085 cntl2=0x00000004 ** ath: ** BB mode: BB_gen_controls=0x000033c0 ** ath: ** BB busy times: rx_clear=99%, rx_frame=0%, tx_frame=0% ** ath: ==== BB update: done ==== Cc: stable@kernel.org Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/rc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index 133502b9c83..c0d0773baa2 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -689,7 +689,8 @@ static void ath_rc_rate_set_series(const struct ath_rate_table *rate_table, if (WLAN_RC_PHY_HT(rate_table->info[rix].phy)) { rate->flags |= IEEE80211_TX_RC_MCS; - if (WLAN_RC_PHY_40(rate_table->info[rix].phy)) + if (WLAN_RC_PHY_40(rate_table->info[rix].phy) && + conf_is_ht40(&txrc->hw->conf)) rate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (WLAN_RC_PHY_SGI(rate_table->info[rix].phy)) rate->flags |= IEEE80211_TX_RC_SHORT_GI; -- cgit v1.2.3-70-g09d2 From 1d38c16ce4156f63b45abbd09dd28ca2ef5172b4 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 20 May 2011 17:52:15 +0530 Subject: mac80211: stop queues before rate control updation Stop tx queues before updating rate control to ensure proper rate selection. Otherwise packets can be transmitted in 40 Mhz whereas hw is configured in HT20. Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4f6b2675e41..746595597b6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -232,6 +232,9 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, WARN_ON(!ieee80211_set_channel_type(local, sdata, channel_type)); } + ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); + /* channel_type change automatically detected */ ieee80211_hw_config(local, 0); @@ -245,6 +248,9 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); } + ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); + ht_opmode = le16_to_cpu(hti->operation_mode); /* if bss configuration changed store the new one */ -- cgit v1.2.3-70-g09d2 From bc9af76b1e87e4f925f97368dae6c22266922e8b Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 25 May 2011 16:56:34 +0530 Subject: dmaengine: add TODO items for future work on dma drivers Signed-off-by: Vinod Koul Signed-off-by: Dan Williams --- drivers/dma/TODO | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 drivers/dma/TODO diff --git a/drivers/dma/TODO b/drivers/dma/TODO new file mode 100644 index 00000000000..a4af8589330 --- /dev/null +++ b/drivers/dma/TODO @@ -0,0 +1,14 @@ +TODO for slave dma + +1. Move remaining drivers to use new slave interface +2. Remove old slave pointer machansim +3. Make issue_pending to start the transaction in below drivers + - mpc512x_dma + - imx-dma + - imx-sdma + - mxs-dma.c + - dw_dmac + - intel_mid_dma + - ste_dma40 +4. Check other subsystems for dma drivers and merge/move to dmaengine +5. Remove dma_slave_config's dma direction. -- cgit v1.2.3-70-g09d2 From 7fe4fc881d1340f17396f52c836e597dadadea42 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Wed, 25 May 2011 10:33:17 -0700 Subject: Intel xhci: Add PCI id for Panther Point xHCI host. This adds the PCI ID for the xHCI (USB 3.0) host controller in the Intel Panther Point chipset. It will be used by both the EHCI and xHCI driver in the following patches. Signed-off-by: Sarah Sharp --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 24787b75128..a311008af5e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2483,6 +2483,7 @@ #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX 0x1c5f #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0 0x1d40 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1 0x1d41 +#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI 0x1e31 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN 0x1e40 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX 0x1e5f #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN 0x2310 -- cgit v1.2.3-70-g09d2 From 5dbd05d46fb7d849570c8fa09d48591aa7ce1766 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 25 May 2011 23:36:30 +0530 Subject: maintainers: add dma engine tree details Signed-off-by: Vinod Koul Signed-off-by: Dan Williams --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8f4413014b2..44358930608 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2169,6 +2169,8 @@ M: Dan Williams S: Supported F: drivers/dma/ F: include/linux/dma* +T: git git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx.git +T: git git://git.infradead.org/users/vkoul/slave-dma.git (slave-dma) DME1737 HARDWARE MONITOR DRIVER M: Juerg Haefliger -- cgit v1.2.3-70-g09d2 From 19d78a61be6dd707dcec298c486303d4ba2c840a Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Fri, 6 May 2011 10:33:44 -0500 Subject: x86: poll waiting for I/OAT DMA channel status For certain system configurations a 5 usec udelay before checking I/OAT DMA channel status is sometimes not sufficient, resulting in a false failure status and unnecessary freeing of channel resources. Conversely, for many configurations 5 usec is longer than necessary. Loop for up to 20 usec waiting for successful status before failing. Signed-off-by: Dimitri Sivanich Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v2.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index effd140fc04..54d1a3c24e9 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -507,6 +507,7 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) struct ioat_ring_ent **ring; u64 status; int order; + int i = 0; /* have we already been set up? */ if (ioat->ring) @@ -547,8 +548,11 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) ioat2_start_null_desc(ioat); /* check that we got off the ground */ - udelay(5); - status = ioat_chansts(chan); + do { + udelay(1); + status = ioat_chansts(chan); + } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); + if (is_ioat_active(status) || is_ioat_idle(status)) { set_bit(IOAT_RUN, &chan->state); return 1 << ioat->alloc_order; -- cgit v1.2.3-70-g09d2 From 80b4037033c2dae31e73810d506ce93b3783be05 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 11 May 2011 20:39:05 +0800 Subject: spi/tle620x: add missing device_remove_file() This patch includes below fixes: 1. Add missing device_remove_file for dev_attr_status_show in tle62x0_remove. 2. Fix tle62x0_probe error handling: Currently, if the error happens when ptr > 0, gpio_attrs[0] is not properly remove. Signed-off-by: Axel Lin Acked-by: Ben Dooks Signed-off-by: Grant Likely --- drivers/spi/tle62x0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/tle62x0.c b/drivers/spi/tle62x0.c index a3938958147..32a40876532 100644 --- a/drivers/spi/tle62x0.c +++ b/drivers/spi/tle62x0.c @@ -283,7 +283,7 @@ static int __devinit tle62x0_probe(struct spi_device *spi) return 0; err_gpios: - for (; ptr > 0; ptr--) + while (--ptr >= 0) device_remove_file(&spi->dev, gpio_attrs[ptr]); device_remove_file(&spi->dev, &dev_attr_status_show); @@ -301,6 +301,7 @@ static int __devexit tle62x0_remove(struct spi_device *spi) for (ptr = 0; ptr < st->nr_gpio; ptr++) device_remove_file(&spi->dev, gpio_attrs[ptr]); + device_remove_file(&spi->dev, &dev_attr_status_show); kfree(st); return 0; } -- cgit v1.2.3-70-g09d2 From 9c3e737561b2473ea1fc5b83a0c1d51e1ff3d294 Mon Sep 17 00:00:00 2001 From: Cliff Cai Date: Mon, 28 Mar 2011 04:57:11 -0400 Subject: spi/spi_bfin_sport: new driver for a SPI bus via the Blackfin SPORT peripheral The Blackfin SPORT peripheral is a pretty flexible device. With enough coaching, we can make it generate SPI compatible waveforms. This is desirable as the SPORT can run at much higher clock frequencies than the dedicated on-chip SPI peripheral, and it can do full duplex DMA. It also opens up the possibility of multiple SPI buses in case someone wants to dedicate a whole bus to a specific part that does not play well with others. Signed-off-by: Cliff Cai Signed-off-by: Bryan Wu Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Signed-off-by: Grant Likely --- drivers/spi/Kconfig | 9 + drivers/spi/Makefile | 1 + drivers/spi/spi_bfin_sport.c | 952 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 962 insertions(+) create mode 100644 drivers/spi/spi_bfin_sport.c diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index fbd96b29530..de35c3ad8a6 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -80,6 +80,15 @@ config SPI_BFIN help This is the SPI controller master driver for Blackfin 5xx processor. +config SPI_BFIN_SPORT + tristate "SPI bus via Blackfin SPORT" + depends on BLACKFIN + help + Enable support for a SPI bus via the Blackfin SPORT peripheral. + + This driver can also be built as a module. If so, the module + will be called spi_bfin_sport. + config SPI_AU1550 tristate "Au1550/Au12x0 SPI Controller" depends on (SOC_AU1550 || SOC_AU1200) && EXPERIMENTAL diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index fd2fc5f6505..0f8c69b6b19 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_SPI_ALTERA) += spi_altera.o obj-$(CONFIG_SPI_ATMEL) += atmel_spi.o obj-$(CONFIG_SPI_ATH79) += ath79_spi.o obj-$(CONFIG_SPI_BFIN) += spi_bfin5xx.o +obj-$(CONFIG_SPI_BFIN_SPORT) += spi_bfin_sport.o obj-$(CONFIG_SPI_BITBANG) += spi_bitbang.o obj-$(CONFIG_SPI_AU1550) += au1550_spi.o obj-$(CONFIG_SPI_BUTTERFLY) += spi_butterfly.o diff --git a/drivers/spi/spi_bfin_sport.c b/drivers/spi/spi_bfin_sport.c new file mode 100644 index 00000000000..e557ff617b1 --- /dev/null +++ b/drivers/spi/spi_bfin_sport.c @@ -0,0 +1,952 @@ +/* + * SPI bus via the Blackfin SPORT peripheral + * + * Enter bugs at http://blackfin.uclinux.org/ + * + * Copyright 2009-2011 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define DRV_NAME "bfin-sport-spi" +#define DRV_DESC "SPI bus via the Blackfin SPORT" + +MODULE_AUTHOR("Cliff Cai"); +MODULE_DESCRIPTION(DRV_DESC); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:bfin-sport-spi"); + +enum bfin_sport_spi_state { + START_STATE, + RUNNING_STATE, + DONE_STATE, + ERROR_STATE, +}; + +struct bfin_sport_spi_master_data; + +struct bfin_sport_transfer_ops { + void (*write) (struct bfin_sport_spi_master_data *); + void (*read) (struct bfin_sport_spi_master_data *); + void (*duplex) (struct bfin_sport_spi_master_data *); +}; + +struct bfin_sport_spi_master_data { + /* Driver model hookup */ + struct device *dev; + + /* SPI framework hookup */ + struct spi_master *master; + + /* Regs base of SPI controller */ + struct sport_register __iomem *regs; + int err_irq; + + /* Pin request list */ + u16 *pin_req; + + /* Driver message queue */ + struct workqueue_struct *workqueue; + struct work_struct pump_messages; + spinlock_t lock; + struct list_head queue; + int busy; + bool run; + + /* Message Transfer pump */ + struct tasklet_struct pump_transfers; + + /* Current message transfer state info */ + enum bfin_sport_spi_state state; + struct spi_message *cur_msg; + struct spi_transfer *cur_transfer; + struct bfin_sport_spi_slave_data *cur_chip; + union { + void *tx; + u8 *tx8; + u16 *tx16; + }; + void *tx_end; + union { + void *rx; + u8 *rx8; + u16 *rx16; + }; + void *rx_end; + + int cs_change; + struct bfin_sport_transfer_ops *ops; +}; + +struct bfin_sport_spi_slave_data { + u16 ctl_reg; + u16 baud; + u16 cs_chg_udelay; /* Some devices require > 255usec delay */ + u32 cs_gpio; + u16 idle_tx_val; + struct bfin_sport_transfer_ops *ops; +}; + +static void +bfin_sport_spi_enable(struct bfin_sport_spi_master_data *drv_data) +{ + bfin_write_or(&drv_data->regs->tcr1, TSPEN); + bfin_write_or(&drv_data->regs->rcr1, TSPEN); + SSYNC(); +} + +static void +bfin_sport_spi_disable(struct bfin_sport_spi_master_data *drv_data) +{ + bfin_write_and(&drv_data->regs->tcr1, ~TSPEN); + bfin_write_and(&drv_data->regs->rcr1, ~TSPEN); + SSYNC(); +} + +/* Caculate the SPI_BAUD register value based on input HZ */ +static u16 +bfin_sport_hz_to_spi_baud(u32 speed_hz) +{ + u_long clk, sclk = get_sclk(); + int div = (sclk / (2 * speed_hz)) - 1; + + if (div < 0) + div = 0; + + clk = sclk / (2 * (div + 1)); + + if (clk > speed_hz) + div++; + + return div; +} + +/* Chip select operation functions for cs_change flag */ +static void +bfin_sport_spi_cs_active(struct bfin_sport_spi_slave_data *chip) +{ + gpio_direction_output(chip->cs_gpio, 0); +} + +static void +bfin_sport_spi_cs_deactive(struct bfin_sport_spi_slave_data *chip) +{ + gpio_direction_output(chip->cs_gpio, 1); + /* Move delay here for consistency */ + if (chip->cs_chg_udelay) + udelay(chip->cs_chg_udelay); +} + +static void +bfin_sport_spi_stat_poll_complete(struct bfin_sport_spi_master_data *drv_data) +{ + unsigned long timeout = jiffies + HZ; + while (!(bfin_read(&drv_data->regs->stat) & RXNE)) { + if (!time_before(jiffies, timeout)) + break; + } +} + +static void +bfin_sport_spi_u8_writer(struct bfin_sport_spi_master_data *drv_data) +{ + u16 dummy; + + while (drv_data->tx < drv_data->tx_end) { + bfin_write(&drv_data->regs->tx16, *drv_data->tx8++); + bfin_sport_spi_stat_poll_complete(drv_data); + dummy = bfin_read(&drv_data->regs->rx16); + } +} + +static void +bfin_sport_spi_u8_reader(struct bfin_sport_spi_master_data *drv_data) +{ + u16 tx_val = drv_data->cur_chip->idle_tx_val; + + while (drv_data->rx < drv_data->rx_end) { + bfin_write(&drv_data->regs->tx16, tx_val); + bfin_sport_spi_stat_poll_complete(drv_data); + *drv_data->rx8++ = bfin_read(&drv_data->regs->rx16); + } +} + +static void +bfin_sport_spi_u8_duplex(struct bfin_sport_spi_master_data *drv_data) +{ + while (drv_data->rx < drv_data->rx_end) { + bfin_write(&drv_data->regs->tx16, *drv_data->tx8++); + bfin_sport_spi_stat_poll_complete(drv_data); + *drv_data->rx8++ = bfin_read(&drv_data->regs->rx16); + } +} + +static struct bfin_sport_transfer_ops bfin_sport_transfer_ops_u8 = { + .write = bfin_sport_spi_u8_writer, + .read = bfin_sport_spi_u8_reader, + .duplex = bfin_sport_spi_u8_duplex, +}; + +static void +bfin_sport_spi_u16_writer(struct bfin_sport_spi_master_data *drv_data) +{ + u16 dummy; + + while (drv_data->tx < drv_data->tx_end) { + bfin_write(&drv_data->regs->tx16, *drv_data->tx16++); + bfin_sport_spi_stat_poll_complete(drv_data); + dummy = bfin_read(&drv_data->regs->rx16); + } +} + +static void +bfin_sport_spi_u16_reader(struct bfin_sport_spi_master_data *drv_data) +{ + u16 tx_val = drv_data->cur_chip->idle_tx_val; + + while (drv_data->rx < drv_data->rx_end) { + bfin_write(&drv_data->regs->tx16, tx_val); + bfin_sport_spi_stat_poll_complete(drv_data); + *drv_data->rx16++ = bfin_read(&drv_data->regs->rx16); + } +} + +static void +bfin_sport_spi_u16_duplex(struct bfin_sport_spi_master_data *drv_data) +{ + while (drv_data->rx < drv_data->rx_end) { + bfin_write(&drv_data->regs->tx16, *drv_data->tx16++); + bfin_sport_spi_stat_poll_complete(drv_data); + *drv_data->rx16++ = bfin_read(&drv_data->regs->rx16); + } +} + +static struct bfin_sport_transfer_ops bfin_sport_transfer_ops_u16 = { + .write = bfin_sport_spi_u16_writer, + .read = bfin_sport_spi_u16_reader, + .duplex = bfin_sport_spi_u16_duplex, +}; + +/* stop controller and re-config current chip */ +static void +bfin_sport_spi_restore_state(struct bfin_sport_spi_master_data *drv_data) +{ + struct bfin_sport_spi_slave_data *chip = drv_data->cur_chip; + unsigned int bits = (drv_data->ops == &bfin_sport_transfer_ops_u8 ? 7 : 15); + + bfin_sport_spi_disable(drv_data); + dev_dbg(drv_data->dev, "restoring spi ctl state\n"); + + bfin_write(&drv_data->regs->tcr1, chip->ctl_reg); + bfin_write(&drv_data->regs->tcr2, bits); + bfin_write(&drv_data->regs->tclkdiv, chip->baud); + bfin_write(&drv_data->regs->tfsdiv, bits); + SSYNC(); + + bfin_write(&drv_data->regs->rcr1, chip->ctl_reg & ~(ITCLK | ITFS)); + bfin_write(&drv_data->regs->rcr2, bits); + SSYNC(); + + bfin_sport_spi_cs_active(chip); +} + +/* test if there is more transfer to be done */ +static enum bfin_sport_spi_state +bfin_sport_spi_next_transfer(struct bfin_sport_spi_master_data *drv_data) +{ + struct spi_message *msg = drv_data->cur_msg; + struct spi_transfer *trans = drv_data->cur_transfer; + + /* Move to next transfer */ + if (trans->transfer_list.next != &msg->transfers) { + drv_data->cur_transfer = + list_entry(trans->transfer_list.next, + struct spi_transfer, transfer_list); + return RUNNING_STATE; + } + + return DONE_STATE; +} + +/* + * caller already set message->status; + * dma and pio irqs are blocked give finished message back + */ +static void +bfin_sport_spi_giveback(struct bfin_sport_spi_master_data *drv_data) +{ + struct bfin_sport_spi_slave_data *chip = drv_data->cur_chip; + unsigned long flags; + struct spi_message *msg; + + spin_lock_irqsave(&drv_data->lock, flags); + msg = drv_data->cur_msg; + drv_data->state = START_STATE; + drv_data->cur_msg = NULL; + drv_data->cur_transfer = NULL; + drv_data->cur_chip = NULL; + queue_work(drv_data->workqueue, &drv_data->pump_messages); + spin_unlock_irqrestore(&drv_data->lock, flags); + + if (!drv_data->cs_change) + bfin_sport_spi_cs_deactive(chip); + + if (msg->complete) + msg->complete(msg->context); +} + +static irqreturn_t +sport_err_handler(int irq, void *dev_id) +{ + struct bfin_sport_spi_master_data *drv_data = dev_id; + u16 status; + + dev_dbg(drv_data->dev, "%s enter\n", __func__); + status = bfin_read(&drv_data->regs->stat) & (TOVF | TUVF | ROVF | RUVF); + + if (status) { + bfin_write(&drv_data->regs->stat, status); + SSYNC(); + + bfin_sport_spi_disable(drv_data); + dev_err(drv_data->dev, "status error:%s%s%s%s\n", + status & TOVF ? " TOVF" : "", + status & TUVF ? " TUVF" : "", + status & ROVF ? " ROVF" : "", + status & RUVF ? " RUVF" : ""); + } + + return IRQ_HANDLED; +} + +static void +bfin_sport_spi_pump_transfers(unsigned long data) +{ + struct bfin_sport_spi_master_data *drv_data = (void *)data; + struct spi_message *message = NULL; + struct spi_transfer *transfer = NULL; + struct spi_transfer *previous = NULL; + struct bfin_sport_spi_slave_data *chip = NULL; + unsigned int bits_per_word; + u32 tranf_success = 1; + u32 transfer_speed; + u8 full_duplex = 0; + + /* Get current state information */ + message = drv_data->cur_msg; + transfer = drv_data->cur_transfer; + chip = drv_data->cur_chip; + + if (transfer->speed_hz) + transfer_speed = bfin_sport_hz_to_spi_baud(transfer->speed_hz); + else + transfer_speed = chip->baud; + bfin_write(&drv_data->regs->tclkdiv, transfer_speed); + SSYNC(); + + /* + * if msg is error or done, report it back using complete() callback + */ + + /* Handle for abort */ + if (drv_data->state == ERROR_STATE) { + dev_dbg(drv_data->dev, "transfer: we've hit an error\n"); + message->status = -EIO; + bfin_sport_spi_giveback(drv_data); + return; + } + + /* Handle end of message */ + if (drv_data->state == DONE_STATE) { + dev_dbg(drv_data->dev, "transfer: all done!\n"); + message->status = 0; + bfin_sport_spi_giveback(drv_data); + return; + } + + /* Delay if requested at end of transfer */ + if (drv_data->state == RUNNING_STATE) { + dev_dbg(drv_data->dev, "transfer: still running ...\n"); + previous = list_entry(transfer->transfer_list.prev, + struct spi_transfer, transfer_list); + if (previous->delay_usecs) + udelay(previous->delay_usecs); + } + + if (transfer->len == 0) { + /* Move to next transfer of this msg */ + drv_data->state = bfin_sport_spi_next_transfer(drv_data); + /* Schedule next transfer tasklet */ + tasklet_schedule(&drv_data->pump_transfers); + } + + if (transfer->tx_buf != NULL) { + drv_data->tx = (void *)transfer->tx_buf; + drv_data->tx_end = drv_data->tx + transfer->len; + dev_dbg(drv_data->dev, "tx_buf is %p, tx_end is %p\n", + transfer->tx_buf, drv_data->tx_end); + } else + drv_data->tx = NULL; + + if (transfer->rx_buf != NULL) { + full_duplex = transfer->tx_buf != NULL; + drv_data->rx = transfer->rx_buf; + drv_data->rx_end = drv_data->rx + transfer->len; + dev_dbg(drv_data->dev, "rx_buf is %p, rx_end is %p\n", + transfer->rx_buf, drv_data->rx_end); + } else + drv_data->rx = NULL; + + drv_data->cs_change = transfer->cs_change; + + /* Bits per word setup */ + bits_per_word = transfer->bits_per_word ? : message->spi->bits_per_word; + if (bits_per_word == 8) + drv_data->ops = &bfin_sport_transfer_ops_u8; + else + drv_data->ops = &bfin_sport_transfer_ops_u16; + + drv_data->state = RUNNING_STATE; + + if (drv_data->cs_change) + bfin_sport_spi_cs_active(chip); + + dev_dbg(drv_data->dev, + "now pumping a transfer: width is %d, len is %d\n", + bits_per_word, transfer->len); + + /* PIO mode write then read */ + dev_dbg(drv_data->dev, "doing IO transfer\n"); + + bfin_sport_spi_enable(drv_data); + if (full_duplex) { + /* full duplex mode */ + BUG_ON((drv_data->tx_end - drv_data->tx) != + (drv_data->rx_end - drv_data->rx)); + drv_data->ops->duplex(drv_data); + + if (drv_data->tx != drv_data->tx_end) + tranf_success = 0; + } else if (drv_data->tx != NULL) { + /* write only half duplex */ + + drv_data->ops->write(drv_data); + + if (drv_data->tx != drv_data->tx_end) + tranf_success = 0; + } else if (drv_data->rx != NULL) { + /* read only half duplex */ + + drv_data->ops->read(drv_data); + if (drv_data->rx != drv_data->rx_end) + tranf_success = 0; + } + bfin_sport_spi_disable(drv_data); + + if (!tranf_success) { + dev_dbg(drv_data->dev, "IO write error!\n"); + drv_data->state = ERROR_STATE; + } else { + /* Update total byte transfered */ + message->actual_length += transfer->len; + /* Move to next transfer of this msg */ + drv_data->state = bfin_sport_spi_next_transfer(drv_data); + if (drv_data->cs_change) + bfin_sport_spi_cs_deactive(chip); + } + + /* Schedule next transfer tasklet */ + tasklet_schedule(&drv_data->pump_transfers); +} + +/* pop a msg from queue and kick off real transfer */ +static void +bfin_sport_spi_pump_messages(struct work_struct *work) +{ + struct bfin_sport_spi_master_data *drv_data; + unsigned long flags; + struct spi_message *next_msg; + + drv_data = container_of(work, struct bfin_sport_spi_master_data, pump_messages); + + /* Lock queue and check for queue work */ + spin_lock_irqsave(&drv_data->lock, flags); + if (list_empty(&drv_data->queue) || !drv_data->run) { + /* pumper kicked off but no work to do */ + drv_data->busy = 0; + spin_unlock_irqrestore(&drv_data->lock, flags); + return; + } + + /* Make sure we are not already running a message */ + if (drv_data->cur_msg) { + spin_unlock_irqrestore(&drv_data->lock, flags); + return; + } + + /* Extract head of queue */ + next_msg = list_entry(drv_data->queue.next, + struct spi_message, queue); + + drv_data->cur_msg = next_msg; + + /* Setup the SSP using the per chip configuration */ + drv_data->cur_chip = spi_get_ctldata(drv_data->cur_msg->spi); + + list_del_init(&drv_data->cur_msg->queue); + + /* Initialize message state */ + drv_data->cur_msg->state = START_STATE; + drv_data->cur_transfer = list_entry(drv_data->cur_msg->transfers.next, + struct spi_transfer, transfer_list); + bfin_sport_spi_restore_state(drv_data); + dev_dbg(drv_data->dev, "got a message to pump, " + "state is set to: baud %d, cs_gpio %i, ctl 0x%x\n", + drv_data->cur_chip->baud, drv_data->cur_chip->cs_gpio, + drv_data->cur_chip->ctl_reg); + + dev_dbg(drv_data->dev, + "the first transfer len is %d\n", + drv_data->cur_transfer->len); + + /* Mark as busy and launch transfers */ + tasklet_schedule(&drv_data->pump_transfers); + + drv_data->busy = 1; + spin_unlock_irqrestore(&drv_data->lock, flags); +} + +/* + * got a msg to transfer, queue it in drv_data->queue. + * And kick off message pumper + */ +static int +bfin_sport_spi_transfer(struct spi_device *spi, struct spi_message *msg) +{ + struct bfin_sport_spi_master_data *drv_data = spi_master_get_devdata(spi->master); + unsigned long flags; + + spin_lock_irqsave(&drv_data->lock, flags); + + if (!drv_data->run) { + spin_unlock_irqrestore(&drv_data->lock, flags); + return -ESHUTDOWN; + } + + msg->actual_length = 0; + msg->status = -EINPROGRESS; + msg->state = START_STATE; + + dev_dbg(&spi->dev, "adding an msg in transfer()\n"); + list_add_tail(&msg->queue, &drv_data->queue); + + if (drv_data->run && !drv_data->busy) + queue_work(drv_data->workqueue, &drv_data->pump_messages); + + spin_unlock_irqrestore(&drv_data->lock, flags); + + return 0; +} + +/* Called every time common spi devices change state */ +static int +bfin_sport_spi_setup(struct spi_device *spi) +{ + struct bfin_sport_spi_slave_data *chip, *first = NULL; + int ret; + + /* Only alloc (or use chip_info) on first setup */ + chip = spi_get_ctldata(spi); + if (chip == NULL) { + struct bfin5xx_spi_chip *chip_info; + + chip = first = kzalloc(sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + /* platform chip_info isn't required */ + chip_info = spi->controller_data; + if (chip_info) { + /* + * DITFS and TDTYPE are only thing we don't set, but + * they probably shouldn't be changed by people. + */ + if (chip_info->ctl_reg || chip_info->enable_dma) { + ret = -EINVAL; + dev_err(&spi->dev, "don't set ctl_reg/enable_dma fields"); + goto error; + } + chip->cs_chg_udelay = chip_info->cs_chg_udelay; + chip->idle_tx_val = chip_info->idle_tx_val; + spi->bits_per_word = chip_info->bits_per_word; + } + } + + if (spi->bits_per_word != 8 && spi->bits_per_word != 16) { + ret = -EINVAL; + goto error; + } + + /* translate common spi framework into our register + * following configure contents are same for tx and rx. + */ + + if (spi->mode & SPI_CPHA) + chip->ctl_reg &= ~TCKFE; + else + chip->ctl_reg |= TCKFE; + + if (spi->mode & SPI_LSB_FIRST) + chip->ctl_reg |= TLSBIT; + else + chip->ctl_reg &= ~TLSBIT; + + /* Sport in master mode */ + chip->ctl_reg |= ITCLK | ITFS | TFSR | LATFS | LTFS; + + chip->baud = bfin_sport_hz_to_spi_baud(spi->max_speed_hz); + + chip->cs_gpio = spi->chip_select; + ret = gpio_request(chip->cs_gpio, spi->modalias); + if (ret) + goto error; + + dev_dbg(&spi->dev, "setup spi chip %s, width is %d\n", + spi->modalias, spi->bits_per_word); + dev_dbg(&spi->dev, "ctl_reg is 0x%x, GPIO is %i\n", + chip->ctl_reg, spi->chip_select); + + spi_set_ctldata(spi, chip); + + bfin_sport_spi_cs_deactive(chip); + + return ret; + + error: + kfree(first); + return ret; +} + +/* + * callback for spi framework. + * clean driver specific data + */ +static void +bfin_sport_spi_cleanup(struct spi_device *spi) +{ + struct bfin_sport_spi_slave_data *chip = spi_get_ctldata(spi); + + if (!chip) + return; + + gpio_free(chip->cs_gpio); + + kfree(chip); +} + +static int +bfin_sport_spi_init_queue(struct bfin_sport_spi_master_data *drv_data) +{ + INIT_LIST_HEAD(&drv_data->queue); + spin_lock_init(&drv_data->lock); + + drv_data->run = false; + drv_data->busy = 0; + + /* init transfer tasklet */ + tasklet_init(&drv_data->pump_transfers, + bfin_sport_spi_pump_transfers, (unsigned long)drv_data); + + /* init messages workqueue */ + INIT_WORK(&drv_data->pump_messages, bfin_sport_spi_pump_messages); + drv_data->workqueue = + create_singlethread_workqueue(dev_name(drv_data->master->dev.parent)); + if (drv_data->workqueue == NULL) + return -EBUSY; + + return 0; +} + +static int +bfin_sport_spi_start_queue(struct bfin_sport_spi_master_data *drv_data) +{ + unsigned long flags; + + spin_lock_irqsave(&drv_data->lock, flags); + + if (drv_data->run || drv_data->busy) { + spin_unlock_irqrestore(&drv_data->lock, flags); + return -EBUSY; + } + + drv_data->run = true; + drv_data->cur_msg = NULL; + drv_data->cur_transfer = NULL; + drv_data->cur_chip = NULL; + spin_unlock_irqrestore(&drv_data->lock, flags); + + queue_work(drv_data->workqueue, &drv_data->pump_messages); + + return 0; +} + +static inline int +bfin_sport_spi_stop_queue(struct bfin_sport_spi_master_data *drv_data) +{ + unsigned long flags; + unsigned limit = 500; + int status = 0; + + spin_lock_irqsave(&drv_data->lock, flags); + + /* + * This is a bit lame, but is optimized for the common execution path. + * A wait_queue on the drv_data->busy could be used, but then the common + * execution path (pump_messages) would be required to call wake_up or + * friends on every SPI message. Do this instead + */ + drv_data->run = false; + while (!list_empty(&drv_data->queue) && drv_data->busy && limit--) { + spin_unlock_irqrestore(&drv_data->lock, flags); + msleep(10); + spin_lock_irqsave(&drv_data->lock, flags); + } + + if (!list_empty(&drv_data->queue) || drv_data->busy) + status = -EBUSY; + + spin_unlock_irqrestore(&drv_data->lock, flags); + + return status; +} + +static inline int +bfin_sport_spi_destroy_queue(struct bfin_sport_spi_master_data *drv_data) +{ + int status; + + status = bfin_sport_spi_stop_queue(drv_data); + if (status) + return status; + + destroy_workqueue(drv_data->workqueue); + + return 0; +} + +static int __devinit +bfin_sport_spi_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct bfin5xx_spi_master *platform_info; + struct spi_master *master; + struct resource *res, *ires; + struct bfin_sport_spi_master_data *drv_data; + int status; + + platform_info = dev->platform_data; + + /* Allocate master with space for drv_data */ + master = spi_alloc_master(dev, sizeof(*master) + 16); + if (!master) { + dev_err(dev, "cannot alloc spi_master\n"); + return -ENOMEM; + } + + drv_data = spi_master_get_devdata(master); + drv_data->master = master; + drv_data->dev = dev; + drv_data->pin_req = platform_info->pin_req; + + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST; + master->bus_num = pdev->id; + master->num_chipselect = platform_info->num_chipselect; + master->cleanup = bfin_sport_spi_cleanup; + master->setup = bfin_sport_spi_setup; + master->transfer = bfin_sport_spi_transfer; + + /* Find and map our resources */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res == NULL) { + dev_err(dev, "cannot get IORESOURCE_MEM\n"); + status = -ENOENT; + goto out_error_get_res; + } + + drv_data->regs = ioremap(res->start, resource_size(res)); + if (drv_data->regs == NULL) { + dev_err(dev, "cannot map registers\n"); + status = -ENXIO; + goto out_error_ioremap; + } + + ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!ires) { + dev_err(dev, "cannot get IORESOURCE_IRQ\n"); + status = -ENODEV; + goto out_error_get_ires; + } + drv_data->err_irq = ires->start; + + /* Initial and start queue */ + status = bfin_sport_spi_init_queue(drv_data); + if (status) { + dev_err(dev, "problem initializing queue\n"); + goto out_error_queue_alloc; + } + + status = bfin_sport_spi_start_queue(drv_data); + if (status) { + dev_err(dev, "problem starting queue\n"); + goto out_error_queue_alloc; + } + + status = request_irq(drv_data->err_irq, sport_err_handler, + 0, "sport_spi_err", drv_data); + if (status) { + dev_err(dev, "unable to request sport err irq\n"); + goto out_error_irq; + } + + status = peripheral_request_list(drv_data->pin_req, DRV_NAME); + if (status) { + dev_err(dev, "requesting peripherals failed\n"); + goto out_error_peripheral; + } + + /* Register with the SPI framework */ + platform_set_drvdata(pdev, drv_data); + status = spi_register_master(master); + if (status) { + dev_err(dev, "problem registering spi master\n"); + goto out_error_master; + } + + dev_info(dev, "%s, regs_base@%p\n", DRV_DESC, drv_data->regs); + return 0; + + out_error_master: + peripheral_free_list(drv_data->pin_req); + out_error_peripheral: + free_irq(drv_data->err_irq, drv_data); + out_error_irq: + out_error_queue_alloc: + bfin_sport_spi_destroy_queue(drv_data); + out_error_get_ires: + iounmap(drv_data->regs); + out_error_ioremap: + out_error_get_res: + spi_master_put(master); + + return status; +} + +/* stop hardware and remove the driver */ +static int __devexit +bfin_sport_spi_remove(struct platform_device *pdev) +{ + struct bfin_sport_spi_master_data *drv_data = platform_get_drvdata(pdev); + int status = 0; + + if (!drv_data) + return 0; + + /* Remove the queue */ + status = bfin_sport_spi_destroy_queue(drv_data); + if (status) + return status; + + /* Disable the SSP at the peripheral and SOC level */ + bfin_sport_spi_disable(drv_data); + + /* Disconnect from the SPI framework */ + spi_unregister_master(drv_data->master); + + peripheral_free_list(drv_data->pin_req); + + /* Prevent double remove */ + platform_set_drvdata(pdev, NULL); + + return 0; +} + +#ifdef CONFIG_PM +static int +bfin_sport_spi_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct bfin_sport_spi_master_data *drv_data = platform_get_drvdata(pdev); + int status; + + status = bfin_sport_spi_stop_queue(drv_data); + if (status) + return status; + + /* stop hardware */ + bfin_sport_spi_disable(drv_data); + + return status; +} + +static int +bfin_sport_spi_resume(struct platform_device *pdev) +{ + struct bfin_sport_spi_master_data *drv_data = platform_get_drvdata(pdev); + int status; + + /* Enable the SPI interface */ + bfin_sport_spi_enable(drv_data); + + /* Start the queue running */ + status = bfin_sport_spi_start_queue(drv_data); + if (status) + dev_err(drv_data->dev, "problem resuming queue\n"); + + return status; +} +#else +# define bfin_sport_spi_suspend NULL +# define bfin_sport_spi_resume NULL +#endif + +static struct platform_driver bfin_sport_spi_driver = { + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + }, + .probe = bfin_sport_spi_probe, + .remove = __devexit_p(bfin_sport_spi_remove), + .suspend = bfin_sport_spi_suspend, + .resume = bfin_sport_spi_resume, +}; + +static int __init bfin_sport_spi_init(void) +{ + return platform_driver_register(&bfin_sport_spi_driver); +} +module_init(bfin_sport_spi_init); + +static void __exit bfin_sport_spi_exit(void) +{ + platform_driver_unregister(&bfin_sport_spi_driver); +} +module_exit(bfin_sport_spi_exit); -- cgit v1.2.3-70-g09d2 From 9a3865b185e77d1a4ca2d8356e37c19b78168961 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 27 May 2011 09:29:32 +0200 Subject: x86, asm: Clean up desc.h a bit I have looked at this file and found it rather ugly - improve readability a bit. No change in functionality. Link: http://lkml.kernel.org/n/tip-incpt6y26yd8586idx65t9ll@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 152 ++++++++++++++++++++++---------------------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 617bd56b307..7b439d9aea2 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -4,30 +4,33 @@ #include #include #include + #include -static inline void fill_ldt(struct desc_struct *desc, - const struct user_desc *info) -{ - desc->limit0 = info->limit & 0x0ffff; - desc->base0 = info->base_addr & 0x0000ffff; - - desc->base1 = (info->base_addr & 0x00ff0000) >> 16; - desc->type = (info->read_exec_only ^ 1) << 1; - desc->type |= info->contents << 2; - desc->s = 1; - desc->dpl = 0x3; - desc->p = info->seg_not_present ^ 1; - desc->limit = (info->limit & 0xf0000) >> 16; - desc->avl = info->useable; - desc->d = info->seg_32bit; - desc->g = info->limit_in_pages; - desc->base2 = (info->base_addr & 0xff000000) >> 24; +static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info) +{ + desc->limit0 = info->limit & 0x0ffff; + + desc->base0 = (info->base_addr & 0x0000ffff); + desc->base1 = (info->base_addr & 0x00ff0000) >> 16; + + desc->type = (info->read_exec_only ^ 1) << 1; + desc->type |= info->contents << 2; + + desc->s = 1; + desc->dpl = 0x3; + desc->p = info->seg_not_present ^ 1; + desc->limit = (info->limit & 0xf0000) >> 16; + desc->avl = info->useable; + desc->d = info->seg_32bit; + desc->g = info->limit_in_pages; + + desc->base2 = (info->base_addr & 0xff000000) >> 24; /* * Don't allow setting of the lm bit. It is useless anyway * because 64bit system calls require __USER_CS: */ - desc->l = 0; + desc->l = 0; } extern struct desc_ptr idt_descr; @@ -36,6 +39,7 @@ extern gate_desc idt_table[]; struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); + DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) @@ -48,16 +52,16 @@ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, unsigned dpl, unsigned ist, unsigned seg) { - gate->offset_low = PTR_LOW(func); - gate->segment = __KERNEL_CS; - gate->ist = ist; - gate->p = 1; - gate->dpl = dpl; - gate->zero0 = 0; - gate->zero1 = 0; - gate->type = type; - gate->offset_middle = PTR_MIDDLE(func); - gate->offset_high = PTR_HIGH(func); + gate->offset_low = PTR_LOW(func); + gate->segment = __KERNEL_CS; + gate->ist = ist; + gate->p = 1; + gate->dpl = dpl; + gate->zero0 = 0; + gate->zero1 = 0; + gate->type = type; + gate->offset_middle = PTR_MIDDLE(func); + gate->offset_high = PTR_HIGH(func); } #else @@ -66,8 +70,7 @@ static inline void pack_gate(gate_desc *gate, unsigned char type, unsigned short seg) { gate->a = (seg << 16) | (base & 0xffff); - gate->b = (base & 0xffff0000) | - (((0x80 | type | (dpl << 5)) & 0xff) << 8); + gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8); } #endif @@ -75,31 +78,29 @@ static inline void pack_gate(gate_desc *gate, unsigned char type, static inline int desc_empty(const void *ptr) { const u32 *desc = ptr; + return !(desc[0] | desc[1]); } #ifdef CONFIG_PARAVIRT #include #else -#define load_TR_desc() native_load_tr_desc() -#define load_gdt(dtr) native_load_gdt(dtr) -#define load_idt(dtr) native_load_idt(dtr) -#define load_tr(tr) asm volatile("ltr %0"::"m" (tr)) -#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt)) - -#define store_gdt(dtr) native_store_gdt(dtr) -#define store_idt(dtr) native_store_idt(dtr) -#define store_tr(tr) (tr = native_store_tr()) - -#define load_TLS(t, cpu) native_load_tls(t, cpu) -#define set_ldt native_set_ldt - -#define write_ldt_entry(dt, entry, desc) \ - native_write_ldt_entry(dt, entry, desc) -#define write_gdt_entry(dt, entry, desc, type) \ - native_write_gdt_entry(dt, entry, desc, type) -#define write_idt_entry(dt, entry, g) \ - native_write_idt_entry(dt, entry, g) +#define load_TR_desc() native_load_tr_desc() +#define load_gdt(dtr) native_load_gdt(dtr) +#define load_idt(dtr) native_load_idt(dtr) +#define load_tr(tr) asm volatile("ltr %0"::"m" (tr)) +#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt)) + +#define store_gdt(dtr) native_store_gdt(dtr) +#define store_idt(dtr) native_store_idt(dtr) +#define store_tr(tr) (tr = native_store_tr()) + +#define load_TLS(t, cpu) native_load_tls(t, cpu) +#define set_ldt native_set_ldt + +#define write_ldt_entry(dt, entry, desc) native_write_ldt_entry(dt, entry, desc) +#define write_gdt_entry(dt, entry, desc, type) native_write_gdt_entry(dt, entry, desc, type) +#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g) static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) { @@ -112,33 +113,27 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) -static inline void native_write_idt_entry(gate_desc *idt, int entry, - const gate_desc *gate) +static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) { memcpy(&idt[entry], gate, sizeof(*gate)); } -static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, - const void *desc) +static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc) { memcpy(&ldt[entry], desc, 8); } -static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry, - const void *desc, int type) +static inline void +native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type) { unsigned int size; + switch (type) { - case DESC_TSS: - size = sizeof(tss_desc); - break; - case DESC_LDT: - size = sizeof(ldt_desc); - break; - default: - size = sizeof(struct desc_struct); - break; + case DESC_TSS: size = sizeof(tss_desc); break; + case DESC_LDT: size = sizeof(ldt_desc); break; + default: size = sizeof(*gdt); break; } + memcpy(&gdt[entry], desc, size); } @@ -154,20 +149,21 @@ static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, } -static inline void set_tssldt_descriptor(void *d, unsigned long addr, - unsigned type, unsigned size) +static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size) { #ifdef CONFIG_X86_64 struct ldttss_desc64 *desc = d; + memset(desc, 0, sizeof(*desc)); - desc->limit0 = size & 0xFFFF; - desc->base0 = PTR_LOW(addr); - desc->base1 = PTR_MIDDLE(addr) & 0xFF; - desc->type = type; - desc->p = 1; - desc->limit1 = (size >> 16) & 0xF; - desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF; - desc->base3 = PTR_HIGH(addr); + + desc->limit0 = size & 0xFFFF; + desc->base0 = PTR_LOW(addr); + desc->base1 = PTR_MIDDLE(addr) & 0xFF; + desc->type = type; + desc->p = 1; + desc->limit1 = (size >> 16) & 0xF; + desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF; + desc->base3 = PTR_HIGH(addr); #else pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0); #endif @@ -237,14 +233,16 @@ static inline void native_store_idt(struct desc_ptr *dtr) static inline unsigned long native_store_tr(void) { unsigned long tr; + asm volatile("str %0":"=r" (tr)); + return tr; } static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) { - unsigned int i; struct desc_struct *gdt = get_cpu_gdt_table(cpu); + unsigned int i; for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; @@ -313,6 +311,7 @@ static inline void _set_gate(int gate, unsigned type, void *addr, unsigned dpl, unsigned ist, unsigned seg) { gate_desc s; + pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); /* * does not need to be atomic because it is only done once at @@ -343,8 +342,9 @@ static inline void alloc_system_vector(int vector) set_bit(vector, used_vectors); if (first_system_vector > vector) first_system_vector = vector; - } else + } else { BUG(); + } } static inline void alloc_intr_gate(unsigned int n, void *addr) -- cgit v1.2.3-70-g09d2 From 287548e46aa752ce9bb87fcff46f8aa794cc5037 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 May 2011 06:50:06 -0400 Subject: split __follow_mount_rcu() into normal and .. cases Signed-off-by: Al Viro --- fs/namei.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 2358b326b22..da9c2657866 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -919,12 +919,11 @@ static inline bool managed_dentry_might_block(struct dentry *dentry) } /* - * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we - * meet a managed dentry and we're not walking to "..". True is returned to - * continue, false to abort. + * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if + * we meet a managed dentry that would need blocking. */ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, - struct inode **inode, bool reverse_transit) + struct inode **inode) { for (;;) { struct vfsmount *mounted; @@ -933,8 +932,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, * that wants to block transit. */ *inode = path->dentry->d_inode; - if (!reverse_transit && - unlikely(managed_dentry_might_block(path->dentry))) + if (unlikely(managed_dentry_might_block(path->dentry))) return false; if (!d_mountpoint(path->dentry)) @@ -949,10 +947,29 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, } if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) - return reverse_transit; + return false; return true; } +static void follow_mount_rcu(struct nameidata *nd, struct path *path, + struct inode **inode) +{ + for (;;) { + struct vfsmount *mounted; + *inode = path->dentry->d_inode; + + if (!d_mountpoint(path->dentry)) + break; + + mounted = __lookup_mnt(path->mnt, path->dentry, 1); + if (!mounted) + break; + path->mnt = mounted; + path->dentry = mounted->mnt_root; + nd->seq = read_seqcount_begin(&path->dentry->d_seq); + } +} + static int follow_dotdot_rcu(struct nameidata *nd) { struct inode *inode = nd->inode; @@ -982,7 +999,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); inode = nd->path.dentry->d_inode; } - __follow_mount_rcu(nd, &nd->path, &inode, true); + follow_mount_rcu(nd, &nd->path, &inode); nd->inode = inode; return 0; @@ -1157,7 +1174,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, } path->mnt = mnt; path->dentry = dentry; - if (likely(__follow_mount_rcu(nd, path, inode, false))) + if (likely(__follow_mount_rcu(nd, path, inode))) return 0; unlazy: if (unlazy_walk(nd, dentry)) -- cgit v1.2.3-70-g09d2 From dea3937619cb67d2ad08e2d29ae923875b1eeee9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 May 2011 06:53:39 -0400 Subject: Trim excessive arguments of follow_mount_rcu() ... and kill a useless local variable in follow_dotdot_rcu(), while we are at it - follow_mount_rcu(nd, path, inode) *always* assigned value to *inode, and always it had been path->dentry->d_inode (aka nd->path.dentry->d_inode, since it always got &nd->path as the second argument). Signed-off-by: Al Viro --- fs/namei.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index da9c2657866..98808142409 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -951,29 +951,21 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, return true; } -static void follow_mount_rcu(struct nameidata *nd, struct path *path, - struct inode **inode) +static void follow_mount_rcu(struct nameidata *nd) { - for (;;) { + while (d_mountpoint(nd->path.dentry)) { struct vfsmount *mounted; - *inode = path->dentry->d_inode; - - if (!d_mountpoint(path->dentry)) - break; - - mounted = __lookup_mnt(path->mnt, path->dentry, 1); + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); if (!mounted) break; - path->mnt = mounted; - path->dentry = mounted->mnt_root; - nd->seq = read_seqcount_begin(&path->dentry->d_seq); + nd->path.mnt = mounted; + nd->path.dentry = mounted->mnt_root; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } } static int follow_dotdot_rcu(struct nameidata *nd) { - struct inode *inode = nd->inode; - set_root_rcu(nd); while (1) { @@ -989,7 +981,6 @@ static int follow_dotdot_rcu(struct nameidata *nd) seq = read_seqcount_begin(&parent->d_seq); if (read_seqcount_retry(&old->d_seq, nd->seq)) goto failed; - inode = parent->d_inode; nd->path.dentry = parent; nd->seq = seq; break; @@ -997,10 +988,9 @@ static int follow_dotdot_rcu(struct nameidata *nd) if (!follow_up_rcu(&nd->path)) break; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - inode = nd->path.dentry->d_inode; } - follow_mount_rcu(nd, &nd->path, &inode); - nd->inode = inode; + follow_mount_rcu(nd); + nd->inode = nd->path.dentry->d_inode; return 0; failed: -- cgit v1.2.3-70-g09d2 From d6e9bd256c88ce5f4b668249e363a74f51393daa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 May 2011 07:03:15 -0400 Subject: Lift the check for automount points into do_lookup() Signed-off-by: Al Viro --- fs/namei.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 98808142409..1ab641f2e78 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -945,9 +945,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, path->dentry = mounted->mnt_root; nd->seq = read_seqcount_begin(&path->dentry->d_seq); } - - if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) - return false; return true; } @@ -1164,8 +1161,11 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, } path->mnt = mnt; path->dentry = dentry; - if (likely(__follow_mount_rcu(nd, path, inode))) - return 0; + if (unlikely(!__follow_mount_rcu(nd, path, inode))) + goto unlazy; + if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) + goto unlazy; + return 0; unlazy: if (unlazy_walk(nd, dentry)) return -ECHILD; -- cgit v1.2.3-70-g09d2 From aa38572954ade525817fe88c54faebf85e5a61c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 May 2011 06:53:02 -0400 Subject: fs: pass exact type of data dirties to ->dirty_inode Tell the filesystem if we just updated timestamp (I_DIRTY_SYNC) or anything else, so that the filesystem can track internally if it needs to push out a transaction for fdatasync or not. This is just the prototype change with no user for it yet. I plan to push large XFS changes for the next merge window, and getting this trivial infrastructure in this window would help a lot to avoid tree interdependencies. Also remove incorrect comments that ->dirty_inode can't block. That has been changed a long time ago, and many implementations rely on it. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 4 ++-- Documentation/filesystems/vfs.txt | 2 +- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 2 +- fs/ext3/inode.c | 2 +- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 2 +- fs/fs-writeback.c | 5 +---- fs/jffs2/fs.c | 2 +- fs/jffs2/os-linux.h | 2 +- fs/jfs/inode.c | 2 +- fs/jfs/jfs_inode.h | 2 +- fs/nilfs2/inode.c | 2 +- fs/nilfs2/nilfs.h | 2 +- fs/reiserfs/super.c | 2 +- fs/ubifs/super.c | 2 +- fs/xfs/linux-2.6/xfs_super.c | 3 ++- include/linux/ext3_fs.h | 2 +- include/linux/fs.h | 2 +- 19 files changed, 21 insertions(+), 23 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 61b31acb917..57d827d6071 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -104,7 +104,7 @@ of the locking scheme for directory operations. prototypes: struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); @@ -126,7 +126,7 @@ locking rules: s_umount alloc_inode: destroy_inode: -dirty_inode: (must not sleep) +dirty_inode: write_inode: drop_inode: !!!inode->i_lock!!! evict_inode: diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 21a7dc467bb..88b9f5519af 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -211,7 +211,7 @@ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, int); void (*drop_inode) (struct inode *); void (*delete_inode) (struct inode *); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8f4b81de3ae..d2177e7ad64 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2522,7 +2522,7 @@ int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); void btrfs_put_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); -void btrfs_dirty_inode(struct inode *inode); +void btrfs_dirty_inode(struct inode *inode, int flags); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cd8ab0ef04..ecff7d7a505 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4396,7 +4396,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. */ -void btrfs_dirty_inode(struct inode *inode) +void btrfs_dirty_inode(struct inode *inode, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 68b2e43d7c3..3451d23c3ba 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3392,7 +3392,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) * so would cause a commit on atime updates, which we don't bother doing. * We handle synchronous inodes at the highest possible level. */ -void ext3_dirty_inode(struct inode *inode) +void ext3_dirty_inode(struct inode *inode, int flags) { handle_t *current_handle = ext3_journal_current_handle(); handle_t *handle; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a74b89c09f9..1921392cd70 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1813,7 +1813,7 @@ extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, extern void ext4_evict_inode(struct inode *); extern void ext4_clear_inode(struct inode *); extern int ext4_sync_inode(handle_t *, struct inode *); -extern void ext4_dirty_inode(struct inode *); +extern void ext4_dirty_inode(struct inode *, int); extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_can_truncate(struct inode *inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 50d0e9c6458..a5763e3505b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5733,7 +5733,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) * so would cause a commit on atime updates, which we don't bother doing. * We handle synchronous inodes at the highest possible level. */ -void ext4_dirty_inode(struct inode *inode) +void ext4_dirty_inode(struct inode *inode, int flags) { handle_t *handle; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 34591ee804b..0f015a0468d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1007,9 +1007,6 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) * In short, make sure you hash any inodes _before_ you start marking * them dirty. * - * This function *must* be atomic for the I_DIRTY_PAGES case - - * set_page_dirty() is called under spinlock in several places. - * * Note that for blockdevs, inode->dirtied_when represents the dirtying time of * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of * the kernel-internal blockdev inode represents the dirtying time of the @@ -1028,7 +1025,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) */ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { if (sb->s_op->dirty_inode) - sb->s_op->dirty_inode(inode); + sb->s_op->dirty_inode(inode, flags); } /* diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index e896e67767e..46ad619b612 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -357,7 +357,7 @@ error: return ERR_PTR(ret); } -void jffs2_dirty_inode(struct inode *inode) +void jffs2_dirty_inode(struct inode *inode, int flags) { struct iattr iattr; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 00bae7cc2e4..65c6c43ca48 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -172,7 +172,7 @@ int jffs2_setattr (struct dentry *, struct iattr *); int jffs2_do_setattr (struct inode *, struct iattr *); struct inode *jffs2_iget(struct super_block *, unsigned long); void jffs2_evict_inode (struct inode *); -void jffs2_dirty_inode(struct inode *inode); +void jffs2_dirty_inode(struct inode *inode, int flags); struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri); int jffs2_statfs (struct dentry *, struct kstatfs *); diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index eddbb373209..109655904bb 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -173,7 +173,7 @@ void jfs_evict_inode(struct inode *inode) dquot_drop(inode); } -void jfs_dirty_inode(struct inode *inode) +void jfs_dirty_inode(struct inode *inode, int flags) { static int noisy = 5; diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 155e91eff07..ec2fb8b945f 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -28,7 +28,7 @@ extern struct inode *jfs_iget(struct super_block *, unsigned long); extern int jfs_commit_inode(struct inode *, int); extern int jfs_write_inode(struct inode *, struct writeback_control *); extern void jfs_evict_inode(struct inode *); -extern void jfs_dirty_inode(struct inode *); +extern void jfs_dirty_inode(struct inode *, int); extern void jfs_truncate(struct inode *); extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 587f1843283..b954878ad6c 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -917,7 +917,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) * construction. This function can be called both as a single operation * and as a part of indivisible file operations. */ -void nilfs_dirty_inode(struct inode *inode) +void nilfs_dirty_inode(struct inode *inode, int flags) { struct nilfs_transaction_info ti; struct nilfs_mdt_info *mdi = NILFS_MDT(inode); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index a9c6a531f80..f02b9ad43a2 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -269,7 +269,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); extern int nilfs_inode_dirty(struct inode *); int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); extern int nilfs_mark_inode_dirty(struct inode *); -extern void nilfs_dirty_inode(struct inode *); +extern void nilfs_dirty_inode(struct inode *, int flags); int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b216ff6be1c..aa91089162c 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -568,7 +568,7 @@ static void destroy_inodecache(void) } /* we don't mark inodes dirty, we just log them */ -static void reiserfs_dirty_inode(struct inode *inode) +static void reiserfs_dirty_inode(struct inode *inode, int flags) { struct reiserfs_transaction_handle th; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 6db0bdaa9f7..1ab0d22e4c9 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -382,7 +382,7 @@ done: end_writeback(inode); } -static void ubifs_dirty_inode(struct inode *inode) +static void ubifs_dirty_inode(struct inode *inode, int flags) { struct ubifs_inode *ui = ubifs_inode(inode); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 98b9c91fcdf..1e3a7ce804d 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -925,7 +925,8 @@ xfs_fs_inode_init_once( */ STATIC void xfs_fs_dirty_inode( - struct inode *inode) + struct inode *inode, + int flags) { barrier(); XFS_I(inode)->i_update_core = 1; diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 85c1d302c12..5e06acf95d0 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -909,7 +909,7 @@ extern int ext3_setattr (struct dentry *, struct iattr *); extern void ext3_evict_inode (struct inode *); extern int ext3_sync_inode (handle_t *, struct inode *); extern void ext3_discard_reservation (struct inode *); -extern void ext3_dirty_inode(struct inode *); +extern void ext3_dirty_inode(struct inode *, int); extern int ext3_change_inode_journal_flag(struct inode *, int); extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *); extern int ext3_can_truncate(struct inode *inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index 241609346df..573028df050 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1618,7 +1618,7 @@ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); -- cgit v1.2.3-70-g09d2 From 55b23bde19c08f14127a27d461a4e079942c7258 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 27 May 2011 14:50:36 +0200 Subject: xattr: Fix error results for non-existent / invisible attributes Return -ENODATA when trying to read a user.* attribute which cannot exist: user space otherwise does not have a reasonable way to distinguish between non-existent and inaccessible attributes. Likewise, return -ENODATA when an unprivileged process tries to read a trusted.* attribute: to unprivileged processes, those attributes are invisible (listxattr() won't include them). Related to this bug report: https://bugzilla.redhat.com/660613 Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/xattr.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index f1ef94974de..4be2e7666d0 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -46,18 +46,22 @@ xattr_permission(struct inode *inode, const char *name, int mask) return 0; /* - * The trusted.* namespace can only be accessed by a privileged user. + * The trusted.* namespace can only be accessed by privileged users. */ - if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) - return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); + if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) { + if (!capable(CAP_SYS_ADMIN)) + return (mask & MAY_WRITE) ? -EPERM : -ENODATA; + return 0; + } - /* In user.* namespace, only regular files and directories can have + /* + * In the user.* namespace, only regular files and directories can have * extended attributes. For sticky directories, only the owner and - * privileged user can write attributes. + * privileged users can write attributes. */ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) - return -EPERM; + return (mask & MAY_WRITE) ? -EPERM : -ENODATA; if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && (mask & MAY_WRITE) && !inode_owner_or_capable(inode)) return -EPERM; -- cgit v1.2.3-70-g09d2 From c642808454ac81d6a6701da6022e93bbe47bb38b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 27 May 2011 14:52:09 +0200 Subject: vfs: Improve the bio_add_page() and bio_add_pc_page() descriptions The descriptions of bio_add_page() and bio_add_pc_page() are slightly inconsistent; improve them. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/bio.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/bio.c b/fs/bio.c index 840a0d75524..9bfade8a609 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -638,10 +638,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page * @offset: vec entry offset * * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block - * device limitations. The target block device must allow bio's - * smaller than PAGE_SIZE, so it is always possible to add a single - * page to an empty bio. This should only be used by REQ_PC bios. + * number of reasons, such as the bio being full or target block device + * limitations. The target block device must allow bio's up to PAGE_SIZE, + * so it is always possible to add a single page to an empty bio. + * + * This should only be used by REQ_PC bios. */ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset) @@ -659,10 +660,9 @@ EXPORT_SYMBOL(bio_add_pc_page); * @offset: vec entry offset * * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block - * device limitations. The target block device must allow bio's - * smaller than PAGE_SIZE, so it is always possible to add a single - * page to an empty bio. + * number of reasons, such as the bio being full or target block device + * limitations. The target block device must allow bio's up to PAGE_SIZE, + * so it is always possible to add a single page to an empty bio. */ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) -- cgit v1.2.3-70-g09d2 From 4b4563dc80594c6a2580aa52d9fcf0177a27074e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 May 2011 09:28:01 -0400 Subject: fs: cosmetic inode.c cleanups Move the lock order description after all the includes, remove several fairly outdated and/or incorrect comments, move Andrea's copyright/changelog to the top where it belongs, remove the pointless filename in the top of the file comment, and remove to useless macros. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 54 +++++------------------------------------------------- 1 file changed, 5 insertions(+), 49 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 990d284877a..0f7e88a7803 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1,9 +1,7 @@ /* - * linux/fs/inode.c - * * (C) 1997 Linus Torvalds + * (C) 1999 Andrea Arcangeli (dynamic inode allocation) */ - #include #include #include @@ -27,10 +25,11 @@ #include #include #include +#include /* for inode_has_buffers */ #include "internal.h" /* - * inode locking rules. + * Inode locking rules: * * inode->i_lock protects: * inode->i_state, inode->i_hash, __iget() @@ -60,54 +59,11 @@ * inode_hash_lock */ -/* - * This is needed for the following functions: - * - inode_has_buffers - * - invalidate_bdev - * - * FIXME: remove all knowledge of the buffer layer from this file - */ -#include - -/* - * New inode.c implementation. - * - * This implementation has the basic premise of trying - * to be extremely low-overhead and SMP-safe, yet be - * simple enough to be "obviously correct". - * - * Famous last words. - */ - -/* inode dynamic allocation 1999, Andrea Arcangeli */ - -/* #define INODE_PARANOIA 1 */ -/* #define INODE_DEBUG 1 */ - -/* - * Inode lookup is no longer as critical as it used to be: - * most of the lookups are going to be through the dcache. - */ -#define I_HASHBITS i_hash_shift -#define I_HASHMASK i_hash_mask - static unsigned int i_hash_mask __read_mostly; static unsigned int i_hash_shift __read_mostly; static struct hlist_head *inode_hashtable __read_mostly; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); -/* - * Each inode can be on two separate lists. One is - * the hash list of the inode, used for lookups. The - * other linked list is the "type" list: - * "in_use" - valid inode, i_count > 0, i_nlink > 0 - * "dirty" - as "in_use" but also dirty - * "unused" - valid inode, i_count = 0 - * - * A "dirty" list is maintained for each super block, - * allowing for low-overhead inode sync() operations. - */ - static LIST_HEAD(inode_lru); static DEFINE_SPINLOCK(inode_lru_lock); @@ -424,8 +380,8 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval) tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / L1_CACHE_BYTES; - tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); - return tmp & I_HASHMASK; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift); + return tmp & i_hash_mask; } /** -- cgit v1.2.3-70-g09d2 From 1007da0604b1d2f064bfecece0f131d57237b03f Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 26 May 2011 09:57:33 -0600 Subject: ASoC: Fix dapm_is_shared_kcontrol so everything isn't shared Commit af46800 ("ASoC: Implement mux control sharing") introduced function dapm_is_shared_kcontrol. When this function returns true, the naming of DAPM controls is derived from the kcontrol_new. Otherwise, the name comes from the widget (and possibly a widget's naming prefix). A bug in the implementation of dapm_is_shared_kcontrol made it return 1 in all cases. Hence, that commit caused a change in control naming for all controls instead of just shared controls. Specifically, a control is always considered shared because it is always compared against itself. Solve this by never comparing against the widget containing the control being created. Equally, controls should never be shared between DAPM contexts; when the same codec is instantiated multiple times, the same kcontrol_new will be used. However, the control should no be shared between the multiple instances. I tested that with the Tegra WM8903 driver: * Shared is now mostly 0 as expected, and sometimes 1. * The expected controls are still generated after this change. However, I don't have any systems that have a widget/control naming prefix, so I can't test that aspect. Thanks for Jarkko Nikula for pointing out how to fix this. Reported-by: Liam Girdwood Tested-by: Jarkko Nikula Signed-off-by: Stephen Warren Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 999bb08cdfb..776e6f41830 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -325,6 +325,7 @@ static int dapm_connect_mixer(struct snd_soc_dapm_context *dapm, } static int dapm_is_shared_kcontrol(struct snd_soc_dapm_context *dapm, + struct snd_soc_dapm_widget *kcontrolw, const struct snd_kcontrol_new *kcontrol_new, struct snd_kcontrol **kcontrol) { @@ -334,6 +335,8 @@ static int dapm_is_shared_kcontrol(struct snd_soc_dapm_context *dapm, *kcontrol = NULL; list_for_each_entry(w, &dapm->card->widgets, list) { + if (w == kcontrolw || w->dapm != kcontrolw->dapm) + continue; for (i = 0; i < w->num_kcontrols; i++) { if (&w->kcontrol_news[i] == kcontrol_new) { if (w->kcontrols) @@ -468,7 +471,7 @@ static int dapm_new_mux(struct snd_soc_dapm_context *dapm, return -EINVAL; } - shared = dapm_is_shared_kcontrol(dapm, &w->kcontrol_news[0], + shared = dapm_is_shared_kcontrol(dapm, w, &w->kcontrol_news[0], &kcontrol); if (kcontrol) { wlist = kcontrol->private_data; -- cgit v1.2.3-70-g09d2 From ea02c63d57d7ec099f66ddb2942b4022e865cd5f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 27 May 2011 21:56:16 +0800 Subject: ASoC: Fix wm_hubs input PGA ZC bits Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm_hubs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c index e55b298c14a..9e370d14ad8 100644 --- a/sound/soc/codecs/wm_hubs.c +++ b/sound/soc/codecs/wm_hubs.c @@ -215,23 +215,23 @@ static const struct snd_kcontrol_new analogue_snd_controls[] = { SOC_SINGLE_TLV("IN1L Volume", WM8993_LEFT_LINE_INPUT_1_2_VOLUME, 0, 31, 0, inpga_tlv), SOC_SINGLE("IN1L Switch", WM8993_LEFT_LINE_INPUT_1_2_VOLUME, 7, 1, 1), -SOC_SINGLE("IN1L ZC Switch", WM8993_LEFT_LINE_INPUT_1_2_VOLUME, 7, 1, 0), +SOC_SINGLE("IN1L ZC Switch", WM8993_LEFT_LINE_INPUT_1_2_VOLUME, 6, 1, 0), SOC_SINGLE_TLV("IN1R Volume", WM8993_RIGHT_LINE_INPUT_1_2_VOLUME, 0, 31, 0, inpga_tlv), SOC_SINGLE("IN1R Switch", WM8993_RIGHT_LINE_INPUT_1_2_VOLUME, 7, 1, 1), -SOC_SINGLE("IN1R ZC Switch", WM8993_RIGHT_LINE_INPUT_1_2_VOLUME, 7, 1, 0), +SOC_SINGLE("IN1R ZC Switch", WM8993_RIGHT_LINE_INPUT_1_2_VOLUME, 6, 1, 0), SOC_SINGLE_TLV("IN2L Volume", WM8993_LEFT_LINE_INPUT_3_4_VOLUME, 0, 31, 0, inpga_tlv), SOC_SINGLE("IN2L Switch", WM8993_LEFT_LINE_INPUT_3_4_VOLUME, 7, 1, 1), -SOC_SINGLE("IN2L ZC Switch", WM8993_LEFT_LINE_INPUT_3_4_VOLUME, 7, 1, 0), +SOC_SINGLE("IN2L ZC Switch", WM8993_LEFT_LINE_INPUT_3_4_VOLUME, 6, 1, 0), SOC_SINGLE_TLV("IN2R Volume", WM8993_RIGHT_LINE_INPUT_3_4_VOLUME, 0, 31, 0, inpga_tlv), SOC_SINGLE("IN2R Switch", WM8993_RIGHT_LINE_INPUT_3_4_VOLUME, 7, 1, 1), -SOC_SINGLE("IN2R ZC Switch", WM8993_RIGHT_LINE_INPUT_3_4_VOLUME, 7, 1, 0), +SOC_SINGLE("IN2R ZC Switch", WM8993_RIGHT_LINE_INPUT_3_4_VOLUME, 6, 1, 0), SOC_SINGLE_TLV("MIXINL IN2L Volume", WM8993_INPUT_MIXER3, 7, 1, 0, inmix_sw_tlv), -- cgit v1.2.3-70-g09d2 From f133ecca9cbb31b5e6e9bda27cbe3034fbf656df Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 26 May 2011 12:40:09 -0400 Subject: arch/tile: more /proc and /sys file support This change introduces a few of the less controversial /proc and /proc/sys interfaces for tile, along with sysfs attributes for various things that were originally proposed as /proc/tile files. It also adjusts the "hardwall" proc API. Arnd Bergmann reviewed the initial arch/tile submission, which included a complete set of all the /proc/tile and /proc/sys/tile knobs that we had added in a somewhat ad hoc way during initial development, and provided feedback on where most of them should go. One knob turned out to be similar enough to the existing /proc/sys/debug/exception-trace that it was re-implemented to use that model instead. Another knob was /proc/tile/grid, which reported the "grid" dimensions of a tile chip (e.g. 8x8 processors = 64-core chip). Arnd suggested looking at sysfs for that, so this change moves that information to a pair of sysfs attributes (chip_width and chip_height) in the /sys/devices/system/cpu directory. We also put the "chip_serial" and "chip_revision" information from our old /proc/tile/board file as attributes in /sys/devices/system/cpu. Other information collected via hypervisor APIs is now placed in /sys/hypervisor. We create a /sys/hypervisor/type file (holding the constant string "tilera") to be parallel with the Xen use of /sys/hypervisor/type holding "xen". We create three top-level files, "version" (the hypervisor's own version), "config_version" (the version of the configuration file), and "hvconfig" (the contents of the configuration file). The remaining information from our old /proc/tile/board and /proc/tile/switch files becomes an attribute group appearing under /sys/hypervisor/board/. Finally, after some feedback from Arnd Bergmann for the previous version of this patch, the /proc/tile/hardwall file is split up into two conceptual parts. First, a directory /proc/tile/hardwall/ which contains one file per active hardwall, each file named after the hardwall's ID and holding a cpulist that says which cpus are enclosed by the hardwall. Second, a /proc/PID file "hardwall" that is either empty (for non-hardwall-using processes) or contains the hardwall ID. Finally, this change pushes the /proc/sys/tile/unaligned_fixup/ directory, with knobs controlling the kernel code for handling the fixup of unaligned exceptions. Reviewed-by: Arnd Bergmann Signed-off-by: Chris Metcalf --- arch/tile/Kconfig | 1 + arch/tile/include/asm/hardwall.h | 15 +++- arch/tile/kernel/Makefile | 2 +- arch/tile/kernel/hardwall.c | 90 ++++++++++++++----- arch/tile/kernel/proc.c | 73 +++++++++++++++ arch/tile/kernel/sysfs.c | 185 +++++++++++++++++++++++++++++++++++++++ fs/proc/base.c | 9 ++ 7 files changed, 347 insertions(+), 28 deletions(-) create mode 100644 arch/tile/kernel/sysfs.c diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 635e1bfb1c5..3f7d63c7ae8 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -12,6 +12,7 @@ config TILE select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW + select SYS_HYPERVISOR # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h index 0bed3ec7b42..2ac422848c7 100644 --- a/arch/tile/include/asm/hardwall.h +++ b/arch/tile/include/asm/hardwall.h @@ -40,6 +40,10 @@ #define HARDWALL_DEACTIVATE \ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE) +#define _HARDWALL_GET_ID 4 +#define HARDWALL_GET_ID \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID) + #ifndef __KERNEL__ /* This is the canonical name expected by userspace. */ @@ -47,9 +51,14 @@ #else -/* Hook for /proc/tile/hardwall. */ -struct seq_file; -int proc_tile_hardwall_show(struct seq_file *sf, void *v); +/* /proc hooks for hardwall. */ +struct proc_dir_entry; +#ifdef CONFIG_HARDWALL +void proc_tile_hardwall_init(struct proc_dir_entry *root); +int proc_pid_hardwall(struct task_struct *task, char *buffer); +#else +static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {} +#endif #endif diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index b4c8e8ec45d..b4dbc057baa 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -5,7 +5,7 @@ extra-y := vmlinux.lds head_$(BITS).o obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \ pci-dma.o proc.o process.o ptrace.o reboot.o \ - setup.o signal.o single_step.o stack.o sys.o time.o traps.o \ + setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \ intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o obj-$(CONFIG_HARDWALL) += hardwall.o diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 3bddef710de..8c41891aab3 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -40,16 +40,25 @@ struct hardwall_info { struct list_head list; /* "rectangles" list */ struct list_head task_head; /* head of tasks in this hardwall */ + struct cpumask cpumask; /* cpus in the rectangle */ int ulhc_x; /* upper left hand corner x coord */ int ulhc_y; /* upper left hand corner y coord */ int width; /* rectangle width */ int height; /* rectangle height */ + int id; /* integer id for this hardwall */ int teardown_in_progress; /* are we tearing this one down? */ }; /* Currently allocated hardwall rectangles */ static LIST_HEAD(rectangles); +/* /proc/tile/hardwall */ +static struct proc_dir_entry *hardwall_proc_dir; + +/* Functions to manage files in /proc/tile/hardwall. */ +static void hardwall_add_proc(struct hardwall_info *rect); +static void hardwall_remove_proc(struct hardwall_info *rect); + /* * Guard changes to the hardwall data structures. * This could be finer grained (e.g. one lock for the list of hardwall @@ -105,6 +114,8 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) r->ulhc_y = cpu_y(ulhc); r->width = cpu_x(lrhc) - r->ulhc_x + 1; r->height = cpu_y(lrhc) - r->ulhc_y + 1; + cpumask_copy(&r->cpumask, mask); + r->id = ulhc; /* The ulhc cpu id can be the hardwall id. */ /* Width and height must be positive */ if (r->width <= 0 || r->height <= 0) @@ -388,6 +399,9 @@ static struct hardwall_info *hardwall_create( /* Set up appropriate hardwalling on all affected cpus. */ hardwall_setup(rect); + /* Create a /proc/tile/hardwall entry. */ + hardwall_add_proc(rect); + return rect; } @@ -645,6 +659,9 @@ static void hardwall_destroy(struct hardwall_info *rect) /* Restart switch and disable firewall. */ on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1); + /* Remove the /proc/tile/hardwall entry. */ + hardwall_remove_proc(rect); + /* Now free the rectangle from the list. */ spin_lock_irqsave(&hardwall_lock, flags); BUG_ON(!list_empty(&rect->task_head)); @@ -654,35 +671,57 @@ static void hardwall_destroy(struct hardwall_info *rect) } -/* - * Dump hardwall state via /proc; initialized in arch/tile/sys/proc.c. - */ -int proc_tile_hardwall_show(struct seq_file *sf, void *v) +static int hardwall_proc_show(struct seq_file *sf, void *v) { - struct hardwall_info *r; + struct hardwall_info *rect = sf->private; + char buf[256]; - if (udn_disabled) { - seq_printf(sf, "%dx%d 0,0 pids:\n", smp_width, smp_height); - return 0; - } - - spin_lock_irq(&hardwall_lock); - list_for_each_entry(r, &rectangles, list) { - struct task_struct *p; - seq_printf(sf, "%dx%d %d,%d pids:", - r->width, r->height, r->ulhc_x, r->ulhc_y); - list_for_each_entry(p, &r->task_head, thread.hardwall_list) { - unsigned int cpu = cpumask_first(&p->cpus_allowed); - unsigned int x = cpu % smp_width; - unsigned int y = cpu / smp_width; - seq_printf(sf, " %d@%d,%d", p->pid, x, y); - } - seq_printf(sf, "\n"); - } - spin_unlock_irq(&hardwall_lock); + int rc = cpulist_scnprintf(buf, sizeof(buf), &rect->cpumask); + buf[rc++] = '\n'; + seq_write(sf, buf, rc); return 0; } +static int hardwall_proc_open(struct inode *inode, + struct file *file) +{ + return single_open(file, hardwall_proc_show, PDE(inode)->data); +} + +static const struct file_operations hardwall_proc_fops = { + .open = hardwall_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void hardwall_add_proc(struct hardwall_info *rect) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", rect->id); + proc_create_data(buf, 0444, hardwall_proc_dir, + &hardwall_proc_fops, rect); +} + +static void hardwall_remove_proc(struct hardwall_info *rect) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", rect->id); + remove_proc_entry(buf, hardwall_proc_dir); +} + +int proc_pid_hardwall(struct task_struct *task, char *buffer) +{ + struct hardwall_info *rect = task->thread.hardwall; + return rect ? sprintf(buffer, "%d\n", rect->id) : 0; +} + +void proc_tile_hardwall_init(struct proc_dir_entry *root) +{ + if (!udn_disabled) + hardwall_proc_dir = proc_mkdir("hardwall", root); +} + /* * Character device support via ioctl/close. @@ -716,6 +755,9 @@ static long hardwall_ioctl(struct file *file, unsigned int a, unsigned long b) return -EINVAL; return hardwall_deactivate(current); + case _HARDWALL_GET_ID: + return rect ? rect->id : -EINVAL; + default: return -EINVAL; } diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 2e02c41ddf3..62d820833c6 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -88,3 +89,75 @@ const struct seq_operations cpuinfo_op = { .stop = c_stop, .show = show_cpuinfo, }; + +/* + * Support /proc/tile directory + */ + +static int __init proc_tile_init(void) +{ + struct proc_dir_entry *root = proc_mkdir("tile", NULL); + if (root == NULL) + return 0; + + proc_tile_hardwall_init(root); + + return 0; +} + +arch_initcall(proc_tile_init); + +/* + * Support /proc/sys/tile directory + */ + +#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ +static ctl_table unaligned_subtable[] = { + { + .procname = "enabled", + .data = &unaligned_fixup, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "printk", + .data = &unaligned_printk, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "count", + .data = &unaligned_fixup_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + {} +}; + +static ctl_table unaligned_table[] = { + { + .procname = "unaligned_fixup", + .mode = 0555, + .child = unaligned_subtable + }, + {} +}; +#endif + +static struct ctl_path tile_path[] = { + { .procname = "tile" }, + { } +}; + +static int __init proc_sys_tile_init(void) +{ +#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ + register_sysctl_paths(tile_path, unaligned_table); +#endif + return 0; +} + +arch_initcall(proc_sys_tile_init); diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c new file mode 100644 index 00000000000..b671a86f451 --- /dev/null +++ b/arch/tile/kernel/sysfs.c @@ -0,0 +1,185 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * /sys entry support. + */ + +#include +#include +#include +#include +#include + +/* Return a string queried from the hypervisor, truncated to page size. */ +static ssize_t get_hv_confstr(char *page, int query) +{ + ssize_t n = hv_confstr(query, (unsigned long)page, PAGE_SIZE - 1); + n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1) - 1; + if (n) + page[n++] = '\n'; + page[n] = '\0'; + return n; +} + +static ssize_t chip_width_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_width); +} +static SYSDEV_CLASS_ATTR(chip_width, 0444, chip_width_show, NULL); + +static ssize_t chip_height_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_height); +} +static SYSDEV_CLASS_ATTR(chip_height, 0444, chip_height_show, NULL); + +static ssize_t chip_serial_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_SERIAL_NUM); +} +static SYSDEV_CLASS_ATTR(chip_serial, 0444, chip_serial_show, NULL); + +static ssize_t chip_revision_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_REV); +} +static SYSDEV_CLASS_ATTR(chip_revision, 0444, chip_revision_show, NULL); + + +static ssize_t type_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return sprintf(page, "tilera\n"); +} +static SYSDEV_CLASS_ATTR(type, 0444, type_show, NULL); + +#define HV_CONF_ATTR(name, conf) \ + static ssize_t name ## _show(struct sysdev_class *dev, \ + struct sysdev_class_attribute *attr, \ + char *page) \ + { \ + return get_hv_confstr(page, conf); \ + } \ + static SYSDEV_CLASS_ATTR(name, 0444, name ## _show, NULL); + +HV_CONF_ATTR(version, HV_CONFSTR_HV_SW_VER) +HV_CONF_ATTR(config_version, HV_CONFSTR_HV_CONFIG_VER) + +HV_CONF_ATTR(board_part, HV_CONFSTR_BOARD_PART_NUM) +HV_CONF_ATTR(board_serial, HV_CONFSTR_BOARD_SERIAL_NUM) +HV_CONF_ATTR(board_revision, HV_CONFSTR_BOARD_REV) +HV_CONF_ATTR(board_description, HV_CONFSTR_BOARD_DESC) +HV_CONF_ATTR(mezz_part, HV_CONFSTR_MEZZ_PART_NUM) +HV_CONF_ATTR(mezz_serial, HV_CONFSTR_MEZZ_SERIAL_NUM) +HV_CONF_ATTR(mezz_revision, HV_CONFSTR_MEZZ_REV) +HV_CONF_ATTR(mezz_description, HV_CONFSTR_MEZZ_DESC) +HV_CONF_ATTR(switch_control, HV_CONFSTR_SWITCH_CONTROL) + +static struct attribute *board_attrs[] = { + &attr_board_part.attr, + &attr_board_serial.attr, + &attr_board_revision.attr, + &attr_board_description.attr, + &attr_mezz_part.attr, + &attr_mezz_serial.attr, + &attr_mezz_revision.attr, + &attr_mezz_description.attr, + &attr_switch_control.attr, + NULL +}; + +static struct attribute_group board_attr_group = { + .name = "board", + .attrs = board_attrs, +}; + + +static struct bin_attribute hvconfig_bin; + +static ssize_t +hvconfig_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + static size_t size; + + /* Lazily learn the true size (minus the trailing NUL). */ + if (size == 0) + size = hv_confstr(HV_CONFSTR_HV_CONFIG, 0, 0) - 1; + + /* Check and adjust input parameters. */ + if (off > size) + return -EINVAL; + if (count > size - off) + count = size - off; + + if (count) { + /* Get a copy of the hvc and copy out the relevant portion. */ + char *hvc; + + size = off + count; + hvc = kmalloc(size, GFP_KERNEL); + if (hvc == NULL) + return -ENOMEM; + hv_confstr(HV_CONFSTR_HV_CONFIG, (unsigned long)hvc, size); + memcpy(buf, hvc + off, count); + kfree(hvc); + } + + return count; +} + +static int __init create_sysfs_entries(void) +{ + struct sysdev_class *cls = &cpu_sysdev_class; + int err = 0; + +#define create_cpu_attr(name) \ + if (!err) \ + err = sysfs_create_file(&cls->kset.kobj, &attr_##name.attr); + create_cpu_attr(chip_width); + create_cpu_attr(chip_height); + create_cpu_attr(chip_serial); + create_cpu_attr(chip_revision); + +#define create_hv_attr(name) \ + if (!err) \ + err = sysfs_create_file(hypervisor_kobj, &attr_##name.attr); + create_hv_attr(type); + create_hv_attr(version); + create_hv_attr(config_version); + + if (!err) + err = sysfs_create_group(hypervisor_kobj, &board_attr_group); + + if (!err) { + sysfs_bin_attr_init(&hvconfig_bin); + hvconfig_bin.attr.name = "hvconfig"; + hvconfig_bin.attr.mode = S_IRUGO; + hvconfig_bin.read = hvconfig_bin_read; + hvconfig_bin.size = PAGE_SIZE; + err = sysfs_create_bin_file(hypervisor_kobj, &hvconfig_bin); + } + + return err; +} +subsys_initcall(create_sysfs_entries); diff --git a/fs/proc/base.c b/fs/proc/base.c index dfa532730e5..3ad615fb865 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -83,6 +83,9 @@ #include #include #include +#ifdef CONFIG_HARDWALL +#include +#endif #include "internal.h" /* NOTE: @@ -2894,6 +2897,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_TASK_IO_ACCOUNTING INF("io", S_IRUGO, proc_tgid_io_accounting), #endif +#ifdef CONFIG_HARDWALL + INF("hardwall", S_IRUGO, proc_pid_hardwall), +#endif }; static int proc_tgid_base_readdir(struct file * filp, @@ -3232,6 +3238,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_TASK_IO_ACCOUNTING INF("io", S_IRUGO, proc_tid_io_accounting), #endif +#ifdef CONFIG_HARDWALL + INF("hardwall", S_IRUGO, proc_pid_hardwall), +#endif }; static int proc_tid_base_readdir(struct file * filp, -- cgit v1.2.3-70-g09d2 From 39ddf3bf6463bc86041e806b43e014d50a144aa4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:32 -0700 Subject: asus-wmi: Remove __init from asus_wmi_platform_init It's used by a non-init function. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 832a3fd7c1c..c8c9842753d 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -1223,7 +1223,7 @@ static int asus_wmi_sysfs_init(struct platform_device *device) /* * Platform device */ -static int __init asus_wmi_platform_init(struct asus_wmi *asus) +static int asus_wmi_platform_init(struct asus_wmi *asus) { int rv; -- cgit v1.2.3-70-g09d2 From 3a35125f97be0658305c9509c51077959c19b244 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:33 -0700 Subject: ibm_rtl: Remove warnings from casts of pointer to int Just print them as %p. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/ibm_rtl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c index b1396e5b295..10563458408 100644 --- a/drivers/platform/x86/ibm_rtl.c +++ b/drivers/platform/x86/ibm_rtl.c @@ -288,7 +288,7 @@ static int __init ibm_rtl_init(void) { if ((readq(&tmp->signature) & RTL_MASK) == RTL_SIGNATURE) { phys_addr_t addr; unsigned int plen; - RTL_DEBUG("found RTL_SIGNATURE at %#llx\n", (u64)tmp); + RTL_DEBUG("found RTL_SIGNATURE at %p\n", tmp); rtl_table = tmp; /* The address, value, width and offset are platform * dependent and found in the ibm_rtl_table */ @@ -300,7 +300,7 @@ static int __init ibm_rtl_init(void) { RTL_DEBUG("addr = %#llx\n", (unsigned long long)addr); plen = rtl_cmd_width/sizeof(char); rtl_cmd_addr = rtl_port_map(addr, plen); - RTL_DEBUG("rtl_cmd_addr = %#llx\n", (u64)rtl_cmd_addr); + RTL_DEBUG("rtl_cmd_addr = %p\n", rtl_cmd_addr); if (!rtl_cmd_addr) { ret = -ENOMEM; break; -- cgit v1.2.3-70-g09d2 From 249c720d88fa325522cb763dda09f8c9e8e964cb Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:34 -0700 Subject: acer-wmi: pr_ cleanups Convert pr_warning to pr_warn. Add some missing newlines to pr_ uses. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/acer-wmi.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index ac4e7f83ce6..fa8fa73f543 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -1081,7 +1081,7 @@ static acpi_status wmid3_get_device_status(u32 *value, u16 device) return AE_ERROR; } if (obj->buffer.length != 8) { - pr_warning("Unknown buffer length %d\n", obj->buffer.length); + pr_warn("Unknown buffer length %d\n", obj->buffer.length); kfree(obj); return AE_ERROR; } @@ -1090,8 +1090,8 @@ static acpi_status wmid3_get_device_status(u32 *value, u16 device) kfree(obj); if (return_value.error_code || return_value.ec_return_value) - pr_warning("Get Device Status failed: " - "0x%x - 0x%x\n", return_value.error_code, + pr_warn("Get Device Status failed: 0x%x - 0x%x\n", + return_value.error_code, return_value.ec_return_value); else *value = !!(return_value.devices & device); @@ -1317,7 +1317,7 @@ static void acer_wmi_notify(u32 value, void *context) status = wmi_get_event_data(value, &response); if (status != AE_OK) { - pr_warning("bad event status 0x%x\n", status); + pr_warn("bad event status 0x%x\n", status); return; } @@ -1326,12 +1326,12 @@ static void acer_wmi_notify(u32 value, void *context) if (!obj) return; if (obj->type != ACPI_TYPE_BUFFER) { - pr_warning("Unknown response received %d\n", obj->type); + pr_warn("Unknown response received %d\n", obj->type); kfree(obj); return; } if (obj->buffer.length != 8) { - pr_warning("Unknown buffer length %d\n", obj->buffer.length); + pr_warn("Unknown buffer length %d\n", obj->buffer.length); kfree(obj); return; } @@ -1356,11 +1356,11 @@ static void acer_wmi_notify(u32 value, void *context) } if (!sparse_keymap_report_event(acer_wmi_input_dev, return_value.key_num, 1, true)) - pr_warning("Unknown key number - 0x%x\n", + pr_warn("Unknown key number - 0x%x\n", return_value.key_num); break; default: - pr_warning("Unknown function number - %d - %d\n", + pr_warn("Unknown function number - %d - %d\n", return_value.function, return_value.key_num); break; } @@ -1389,7 +1389,7 @@ wmid3_set_lm_mode(struct lm_input_params *params, return AE_ERROR; } if (obj->buffer.length != 4) { - pr_warning("Unknown buffer length %d\n", obj->buffer.length); + pr_warn("Unknown buffer length %d\n", obj->buffer.length); kfree(obj); return AE_ERROR; } @@ -1414,11 +1414,11 @@ static int acer_wmi_enable_ec_raw(void) status = wmid3_set_lm_mode(¶ms, &return_value); if (return_value.error_code || return_value.ec_return_value) - pr_warning("Enabling EC raw mode failed: " - "0x%x - 0x%x\n", return_value.error_code, - return_value.ec_return_value); + pr_warn("Enabling EC raw mode failed: 0x%x - 0x%x\n", + return_value.error_code, + return_value.ec_return_value); else - pr_info("Enabled EC raw mode"); + pr_info("Enabled EC raw mode\n"); return status; } @@ -1437,9 +1437,9 @@ static int acer_wmi_enable_lm(void) status = wmid3_set_lm_mode(¶ms, &return_value); if (return_value.error_code || return_value.ec_return_value) - pr_warning("Enabling Launch Manager failed: " - "0x%x - 0x%x\n", return_value.error_code, - return_value.ec_return_value); + pr_warn("Enabling Launch Manager failed: 0x%x - 0x%x\n", + return_value.error_code, + return_value.ec_return_value); return status; } -- cgit v1.2.3-70-g09d2 From 5ad77dcfb49a85715bcb9174a133cba140eda1bc Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:35 -0700 Subject: asus: Add pr_fmt and convert printks to pr_ Add pr_fmt, prefixes each log message. Convert printks to pr_. Convert pr_warning to pr_warn. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-laptop.c | 34 ++++++++--------- drivers/platform/x86/asus-wmi.c | 20 +++++----- drivers/platform/x86/asus_acpi.c | 77 ++++++++++++++++---------------------- 3 files changed, 59 insertions(+), 72 deletions(-) diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index c53b3ff7978..d65df92e2ac 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -318,7 +318,7 @@ static int acpi_check_handle(acpi_handle handle, const char *method, if (status != AE_OK) { if (ret) - pr_warning("Error finding %s\n", method); + pr_warn("Error finding %s\n", method); return -ENODEV; } return 0; @@ -383,7 +383,7 @@ static int asus_kled_lvl(struct asus_laptop *asus) rv = acpi_evaluate_integer(asus->handle, METHOD_KBD_LIGHT_GET, ¶ms, &kblv); if (ACPI_FAILURE(rv)) { - pr_warning("Error reading kled level\n"); + pr_warn("Error reading kled level\n"); return -ENODEV; } return kblv; @@ -397,7 +397,7 @@ static int asus_kled_set(struct asus_laptop *asus, int kblv) kblv = 0; if (write_acpi_int(asus->handle, METHOD_KBD_LIGHT_SET, kblv)) { - pr_warning("Keyboard LED display write failed\n"); + pr_warn("Keyboard LED display write failed\n"); return -EINVAL; } return 0; @@ -531,7 +531,7 @@ static int asus_read_brightness(struct backlight_device *bd) rv = acpi_evaluate_integer(asus->handle, METHOD_BRIGHTNESS_GET, NULL, &value); if (ACPI_FAILURE(rv)) - pr_warning("Error reading brightness\n"); + pr_warn("Error reading brightness\n"); return value; } @@ -541,7 +541,7 @@ static int asus_set_brightness(struct backlight_device *bd, int value) struct asus_laptop *asus = bl_get_data(bd); if (write_acpi_int(asus->handle, METHOD_BRIGHTNESS_SET, value)) { - pr_warning("Error changing brightness\n"); + pr_warn("Error changing brightness\n"); return -EIO; } return 0; @@ -730,7 +730,7 @@ static ssize_t store_ledd(struct device *dev, struct device_attribute *attr, rv = parse_arg(buf, count, &value); if (rv > 0) { if (write_acpi_int(asus->handle, METHOD_LEDD, value)) { - pr_warning("LED display write failed\n"); + pr_warn("LED display write failed\n"); return -ENODEV; } asus->ledd_status = (u32) value; @@ -752,7 +752,7 @@ static int asus_wireless_status(struct asus_laptop *asus, int mask) rv = acpi_evaluate_integer(asus->handle, METHOD_WL_STATUS, NULL, &status); if (ACPI_FAILURE(rv)) { - pr_warning("Error reading Wireless status\n"); + pr_warn("Error reading Wireless status\n"); return -EINVAL; } return !!(status & mask); @@ -764,7 +764,7 @@ static int asus_wireless_status(struct asus_laptop *asus, int mask) static int asus_wlan_set(struct asus_laptop *asus, int status) { if (write_acpi_int(asus->handle, METHOD_WLAN, !!status)) { - pr_warning("Error setting wlan status to %d", status); + pr_warn("Error setting wlan status to %d\n", status); return -EIO; } return 0; @@ -792,7 +792,7 @@ static ssize_t store_wlan(struct device *dev, struct device_attribute *attr, static int asus_bluetooth_set(struct asus_laptop *asus, int status) { if (write_acpi_int(asus->handle, METHOD_BLUETOOTH, !!status)) { - pr_warning("Error setting bluetooth status to %d", status); + pr_warn("Error setting bluetooth status to %d\n", status); return -EIO; } return 0; @@ -821,7 +821,7 @@ static ssize_t store_bluetooth(struct device *dev, static int asus_wimax_set(struct asus_laptop *asus, int status) { if (write_acpi_int(asus->handle, METHOD_WIMAX, !!status)) { - pr_warning("Error setting wimax status to %d", status); + pr_warn("Error setting wimax status to %d\n", status); return -EIO; } return 0; @@ -850,7 +850,7 @@ static ssize_t store_wimax(struct device *dev, static int asus_wwan_set(struct asus_laptop *asus, int status) { if (write_acpi_int(asus->handle, METHOD_WWAN, !!status)) { - pr_warning("Error setting wwan status to %d", status); + pr_warn("Error setting wwan status to %d\n", status); return -EIO; } return 0; @@ -880,7 +880,7 @@ static void asus_set_display(struct asus_laptop *asus, int value) { /* no sanity check needed for now */ if (write_acpi_int(asus->handle, METHOD_SWITCH_DISPLAY, value)) - pr_warning("Error setting display\n"); + pr_warn("Error setting display\n"); return; } @@ -909,7 +909,7 @@ static ssize_t store_disp(struct device *dev, struct device_attribute *attr, static void asus_als_switch(struct asus_laptop *asus, int value) { if (write_acpi_int(asus->handle, METHOD_ALS_CONTROL, value)) - pr_warning("Error setting light sensor switch\n"); + pr_warn("Error setting light sensor switch\n"); asus->light_switch = value; } @@ -937,7 +937,7 @@ static ssize_t store_lssw(struct device *dev, struct device_attribute *attr, static void asus_als_level(struct asus_laptop *asus, int value) { if (write_acpi_int(asus->handle, METHOD_ALS_LEVEL, value)) - pr_warning("Error setting light sensor level\n"); + pr_warn("Error setting light sensor level\n"); asus->light_level = value; } @@ -976,7 +976,7 @@ static int asus_gps_status(struct asus_laptop *asus) rv = acpi_evaluate_integer(asus->handle, METHOD_GPS_STATUS, NULL, &status); if (ACPI_FAILURE(rv)) { - pr_warning("Error reading GPS status\n"); + pr_warn("Error reading GPS status\n"); return -ENODEV; } return !!status; @@ -1284,7 +1284,7 @@ static int asus_laptop_get_info(struct asus_laptop *asus) */ status = acpi_get_table(ACPI_SIG_DSDT, 1, &asus->dsdt_info); if (ACPI_FAILURE(status)) - pr_warning("Couldn't get the DSDT table header\n"); + pr_warn("Couldn't get the DSDT table header\n"); /* We have to write 0 on init this far for all ASUS models */ if (write_acpi_int_ret(asus->handle, "INIT", 0, &buffer)) { @@ -1296,7 +1296,7 @@ static int asus_laptop_get_info(struct asus_laptop *asus) status = acpi_evaluate_integer(asus->handle, "BSTS", NULL, &bsts_result); if (ACPI_FAILURE(status)) - pr_warning("Error calling BSTS\n"); + pr_warn("Error calling BSTS\n"); else if (bsts_result) pr_notice("BSTS called, 0x%02x returned\n", (uint) bsts_result); diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index c8c9842753d..00460cb9587 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -425,7 +425,7 @@ static void asus_rfkill_hotplug(struct asus_wmi *asus) if (asus->hotplug_slot) { bus = pci_find_bus(0, 1); if (!bus) { - pr_warning("Unable to find PCI bus 1?\n"); + pr_warn("Unable to find PCI bus 1?\n"); goto out_unlock; } @@ -436,12 +436,12 @@ static void asus_rfkill_hotplug(struct asus_wmi *asus) absent = (l == 0xffffffff); if (blocked != absent) { - pr_warning("BIOS says wireless lan is %s, " - "but the pci device is %s\n", - blocked ? "blocked" : "unblocked", - absent ? "absent" : "present"); - pr_warning("skipped wireless hotplug as probably " - "inappropriate for this model\n"); + pr_warn("BIOS says wireless lan is %s, " + "but the pci device is %s\n", + blocked ? "blocked" : "unblocked", + absent ? "absent" : "present"); + pr_warn("skipped wireless hotplug as probably " + "inappropriate for this model\n"); goto out_unlock; } @@ -500,7 +500,7 @@ static int asus_register_rfkill_notifier(struct asus_wmi *asus, char *node) ACPI_SYSTEM_NOTIFY, asus_rfkill_notify, asus); if (ACPI_FAILURE(status)) - pr_warning("Failed to register notify on %s\n", node); + pr_warn("Failed to register notify on %s\n", node); } else return -ENODEV; @@ -1583,12 +1583,12 @@ static int asus_wmi_probe(struct platform_device *pdev) int ret; if (!wmi_has_guid(ASUS_WMI_MGMT_GUID)) { - pr_warning("Management GUID not found\n"); + pr_warn("Management GUID not found\n"); return -ENODEV; } if (wdrv->event_guid && !wmi_has_guid(wdrv->event_guid)) { - pr_warning("Event GUID not found\n"); + pr_warn("Event GUID not found\n"); return -ENODEV; } diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c index f503607c064..d9312b3073e 100644 --- a/drivers/platform/x86/asus_acpi.c +++ b/drivers/platform/x86/asus_acpi.c @@ -30,6 +30,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -581,8 +583,7 @@ static int read_led(const char *ledname, int ledmask) if (read_acpi_int(NULL, ledname, &led_status)) return led_status; else - printk(KERN_WARNING "Asus ACPI: Error reading LED " - "status\n"); + pr_warn("Error reading LED status\n"); } return (hotk->status & ledmask) ? 1 : 0; } @@ -621,8 +622,7 @@ write_led(const char __user *buffer, unsigned long count, led_out = !led_out; if (!write_acpi_int(hotk->handle, ledname, led_out, NULL)) - printk(KERN_WARNING "Asus ACPI: LED (%s) write failed\n", - ledname); + pr_warn("LED (%s) write failed\n", ledname); return rv; } @@ -679,8 +679,7 @@ static ssize_t ledd_proc_write(struct file *file, const char __user *buffer, if (rv > 0) { if (!write_acpi_int (hotk->handle, hotk->methods->mt_ledd, value, NULL)) - printk(KERN_WARNING - "Asus ACPI: LED display write failed\n"); + pr_warn("LED display write failed\n"); else hotk->ledd_status = (u32) value; } @@ -838,8 +837,7 @@ static int get_lcd_state(void) } else { /* We don't have to check anything if we are here */ if (!read_acpi_int(NULL, hotk->methods->lcd_status, &lcd)) - printk(KERN_WARNING - "Asus ACPI: Error reading LCD status\n"); + pr_warn("Error reading LCD status\n"); if (hotk->model == L2D) lcd = ~lcd; @@ -871,7 +869,7 @@ static int set_lcd_state(int value) the exact behaviour is simulated here */ } if (ACPI_FAILURE(status)) - printk(KERN_WARNING "Asus ACPI: Error switching LCD\n"); + pr_warn("Error switching LCD\n"); } return 0; @@ -915,13 +913,11 @@ static int read_brightness(struct backlight_device *bd) if (hotk->methods->brightness_get) { /* SPLV/GPLV laptop */ if (!read_acpi_int(hotk->handle, hotk->methods->brightness_get, &value)) - printk(KERN_WARNING - "Asus ACPI: Error reading brightness\n"); + pr_warn("Error reading brightness\n"); } else if (hotk->methods->brightness_status) { /* For D1 for example */ if (!read_acpi_int(NULL, hotk->methods->brightness_status, &value)) - printk(KERN_WARNING - "Asus ACPI: Error reading brightness\n"); + pr_warn("Error reading brightness\n"); } else /* No GPLV method */ value = hotk->brightness; return value; @@ -939,8 +935,7 @@ static int set_brightness(int value) if (hotk->methods->brightness_set) { if (!write_acpi_int(hotk->handle, hotk->methods->brightness_set, value, NULL)) { - printk(KERN_WARNING - "Asus ACPI: Error changing brightness\n"); + pr_warn("Error changing brightness\n"); ret = -EIO; } goto out; @@ -955,8 +950,7 @@ static int set_brightness(int value) NULL, NULL); (value > 0) ? value-- : value++; if (ACPI_FAILURE(status)) { - printk(KERN_WARNING - "Asus ACPI: Error changing brightness\n"); + pr_warn("Error changing brightness\n"); ret = -EIO; } } @@ -1008,7 +1002,7 @@ static void set_display(int value) /* no sanity check needed for now */ if (!write_acpi_int(hotk->handle, hotk->methods->display_set, value, NULL)) - printk(KERN_WARNING "Asus ACPI: Error setting display\n"); + pr_warn("Error setting display\n"); return; } @@ -1021,8 +1015,7 @@ static int disp_proc_show(struct seq_file *m, void *v) int value = 0; if (!read_acpi_int(hotk->handle, hotk->methods->display_get, &value)) - printk(KERN_WARNING - "Asus ACPI: Error reading display status\n"); + pr_warn("Error reading display status\n"); value &= 0x07; /* needed for some models, shouldn't hurt others */ seq_printf(m, "%d\n", value); return 0; @@ -1068,7 +1061,7 @@ asus_proc_add(char *name, const struct file_operations *proc_fops, mode_t mode, proc = proc_create_data(name, mode, acpi_device_dir(device), proc_fops, acpi_driver_data(device)); if (!proc) { - printk(KERN_WARNING " Unable to create %s fs entry\n", name); + pr_warn(" Unable to create %s fs entry\n", name); return -1; } proc->uid = asus_uid; @@ -1085,8 +1078,8 @@ static int asus_hotk_add_fs(struct acpi_device *device) mode = S_IFREG | S_IRUGO | S_IWUSR | S_IWGRP; } else { mode = S_IFREG | S_IRUSR | S_IRGRP | S_IWUSR | S_IWGRP; - printk(KERN_WARNING " asus_uid and asus_gid parameters are " - "deprecated, use chown and chmod instead!\n"); + pr_warn(" asus_uid and asus_gid parameters are " + "deprecated, use chown and chmod instead!\n"); } acpi_device_dir(device) = asus_proc_dir; @@ -1099,8 +1092,7 @@ static int asus_hotk_add_fs(struct acpi_device *device) proc->uid = asus_uid; proc->gid = asus_gid; } else { - printk(KERN_WARNING " Unable to create " PROC_INFO - " fs entry\n"); + pr_warn(" Unable to create " PROC_INFO " fs entry\n"); } if (hotk->methods->mt_wled) { @@ -1283,20 +1275,19 @@ static int asus_hotk_get_info(void) */ status = acpi_get_table(ACPI_SIG_DSDT, 1, &asus_info); if (ACPI_FAILURE(status)) - printk(KERN_WARNING " Couldn't get the DSDT table header\n"); + pr_warn(" Couldn't get the DSDT table header\n"); /* We have to write 0 on init this far for all ASUS models */ if (!write_acpi_int(hotk->handle, "INIT", 0, &buffer)) { - printk(KERN_ERR " Hotkey initialization failed\n"); + pr_err(" Hotkey initialization failed\n"); return -ENODEV; } /* This needs to be called for some laptops to init properly */ if (!read_acpi_int(hotk->handle, "BSTS", &bsts_result)) - printk(KERN_WARNING " Error calling BSTS\n"); + pr_warn(" Error calling BSTS\n"); else if (bsts_result) - printk(KERN_NOTICE " BSTS called, 0x%02x returned\n", - bsts_result); + pr_notice(" BSTS called, 0x%02x returned\n", bsts_result); /* * Try to match the object returned by INIT to the specific model. @@ -1324,23 +1315,21 @@ static int asus_hotk_get_info(void) if (asus_info && strncmp(asus_info->oem_table_id, "ODEM", 4) == 0) { hotk->model = P30; - printk(KERN_NOTICE - " Samsung P30 detected, supported\n"); + pr_notice(" Samsung P30 detected, supported\n"); hotk->methods = &model_conf[hotk->model]; kfree(model); return 0; } else { hotk->model = M2E; - printk(KERN_NOTICE " unsupported model %s, trying " - "default values\n", string); - printk(KERN_NOTICE - " send /proc/acpi/dsdt to the developers\n"); + pr_notice(" unsupported model %s, trying default values\n", + string); + pr_notice(" send /proc/acpi/dsdt to the developers\n"); kfree(model); return -ENODEV; } } hotk->methods = &model_conf[hotk->model]; - printk(KERN_NOTICE " %s model detected, supported\n", string); + pr_notice(" %s model detected, supported\n", string); /* Sort of per-model blacklist */ if (strncmp(string, "L2B", 3) == 0) @@ -1385,7 +1374,7 @@ static int asus_hotk_check(void) if (hotk->device->status.present) { result = asus_hotk_get_info(); } else { - printk(KERN_ERR " Hotkey device not present, aborting\n"); + pr_err(" Hotkey device not present, aborting\n"); return -EINVAL; } @@ -1399,8 +1388,7 @@ static int asus_hotk_add(struct acpi_device *device) acpi_status status = AE_OK; int result; - printk(KERN_NOTICE "Asus Laptop ACPI Extras version %s\n", - ASUS_ACPI_VERSION); + pr_notice("Asus Laptop ACPI Extras version %s\n", ASUS_ACPI_VERSION); hotk = kzalloc(sizeof(struct asus_hotk), GFP_KERNEL); if (!hotk) @@ -1428,15 +1416,14 @@ static int asus_hotk_add(struct acpi_device *device) acpi_evaluate_object(NULL, hotk->methods->brightness_down, NULL, NULL); if (ACPI_FAILURE(status)) - printk(KERN_WARNING " Error changing brightness\n"); + pr_warn(" Error changing brightness\n"); else { status = acpi_evaluate_object(NULL, hotk->methods->brightness_up, NULL, NULL); if (ACPI_FAILURE(status)) - printk(KERN_WARNING " Strange, error changing" - " brightness\n"); + pr_warn(" Strange, error changing brightness\n"); } } @@ -1488,7 +1475,7 @@ static int __init asus_acpi_init(void) asus_proc_dir = proc_mkdir(PROC_ASUS, acpi_root_dir); if (!asus_proc_dir) { - printk(KERN_ERR "Asus ACPI: Unable to create /proc entry\n"); + pr_err("Unable to create /proc entry\n"); acpi_bus_unregister_driver(&asus_hotk_driver); return -ENODEV; } @@ -1513,7 +1500,7 @@ static int __init asus_acpi_init(void) &asus_backlight_data, &props); if (IS_ERR(asus_backlight_device)) { - printk(KERN_ERR "Could not register asus backlight device\n"); + pr_err("Could not register asus backlight device\n"); asus_backlight_device = NULL; asus_acpi_exit(); return -ENODEV; -- cgit v1.2.3-70-g09d2 From b4a4bc0bd1328b587a905e346d8494f9b9423cc5 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:36 -0700 Subject: compal-laptop: Convert printks to pr_ Add pr_fmt. Convert printks to pr_ removing DRIVER_NAME prefix. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/compal-laptop.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c index c16a27641ce..4c4a7422c5e 100644 --- a/drivers/platform/x86/compal-laptop.c +++ b/drivers/platform/x86/compal-laptop.c @@ -68,6 +68,8 @@ * only enabled on a JHL90 board until it is verified that they work on the * other boards too. See the extra_features variable. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -760,16 +762,14 @@ static struct rfkill *bt_rfkill; static int dmi_check_cb(const struct dmi_system_id *id) { - printk(KERN_INFO DRIVER_NAME": Identified laptop model '%s'\n", - id->ident); + pr_info("Identified laptop model '%s'\n", id->ident); extra_features = false; return 1; } static int dmi_check_cb_extra(const struct dmi_system_id *id) { - printk(KERN_INFO DRIVER_NAME": Identified laptop model '%s', " - "enabling extra features\n", + pr_info("Identified laptop model '%s', enabling extra features\n", id->ident); extra_features = true; return 1; @@ -956,14 +956,12 @@ static int __init compal_init(void) int ret; if (acpi_disabled) { - printk(KERN_ERR DRIVER_NAME": ACPI needs to be enabled for " - "this driver to work!\n"); + pr_err("ACPI needs to be enabled for this driver to work!\n"); return -ENODEV; } if (!force && !dmi_check_system(compal_dmi_table)) { - printk(KERN_ERR DRIVER_NAME": Motherboard not recognized (You " - "could try the module's force-parameter)"); + pr_err("Motherboard not recognized (You could try the module's force-parameter)\n"); return -ENODEV; } @@ -998,8 +996,7 @@ static int __init compal_init(void) if (ret) goto err_rfkill; - printk(KERN_INFO DRIVER_NAME": Driver "DRIVER_VERSION - " successfully loaded\n"); + pr_info("Driver " DRIVER_VERSION " successfully loaded\n"); return 0; err_rfkill: @@ -1064,7 +1061,7 @@ static void __exit compal_cleanup(void) rfkill_destroy(wifi_rfkill); rfkill_destroy(bt_rfkill); - printk(KERN_INFO DRIVER_NAME": Driver unloaded\n"); + pr_info("Driver unloaded\n"); } static int __devexit compal_remove(struct platform_device *pdev) @@ -1074,8 +1071,7 @@ static int __devexit compal_remove(struct platform_device *pdev) if (!extra_features) return 0; - printk(KERN_INFO DRIVER_NAME": Unloading: resetting fan control " - "to motherboard\n"); + pr_info("Unloading: resetting fan control to motherboard\n"); pwm_disable_control(); data = platform_get_drvdata(pdev); -- cgit v1.2.3-70-g09d2 From eb8895241dfb6c26114928b186cc1810cbd57f1b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:37 -0700 Subject: dell: Convert printks to pr_ Add pr_fmt. Remove hard coded prefixes and use pr_. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/dell-laptop.c | 12 ++++++------ drivers/platform/x86/dell-wmi-aio.c | 3 ++- drivers/platform/x86/dell-wmi.c | 17 ++++++++--------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index de301aa8e5c..d3841de6a8c 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -11,6 +11,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -434,8 +436,7 @@ static int __init dell_setup_rfkill(void) int ret; if (dmi_check_system(dell_blacklist)) { - printk(KERN_INFO "dell-laptop: Blacklisted hardware detected - " - "not enabling rfkill\n"); + pr_info("Blacklisted hardware detected - not enabling rfkill\n"); return 0; } @@ -606,7 +607,7 @@ static int __init dell_init(void) dmi_walk(find_tokens, NULL); if (!da_tokens) { - printk(KERN_INFO "dell-laptop: Unable to find dmi tokens\n"); + pr_info("Unable to find dmi tokens\n"); return -ENODEV; } @@ -636,14 +637,13 @@ static int __init dell_init(void) ret = dell_setup_rfkill(); if (ret) { - printk(KERN_WARNING "dell-laptop: Unable to setup rfkill\n"); + pr_warn("Unable to setup rfkill\n"); goto fail_rfkill; } ret = i8042_install_filter(dell_laptop_i8042_filter); if (ret) { - printk(KERN_WARNING - "dell-laptop: Unable to install key filter\n"); + pr_warn("Unable to install key filter\n"); goto fail_filter; } diff --git a/drivers/platform/x86/dell-wmi-aio.c b/drivers/platform/x86/dell-wmi-aio.c index 0ed84573ae1..3f945457f71 100644 --- a/drivers/platform/x86/dell-wmi-aio.c +++ b/drivers/platform/x86/dell-wmi-aio.c @@ -15,6 +15,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include @@ -138,7 +139,7 @@ static int __init dell_wmi_aio_init(void) guid = dell_wmi_aio_find(); if (!guid) { - pr_warning("No known WMI GUID found\n"); + pr_warn("No known WMI GUID found\n"); return -ENXIO; } diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index 77f1d55414c..ce790827e19 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -23,6 +23,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -141,7 +143,7 @@ static void dell_wmi_notify(u32 value, void *context) status = wmi_get_event_data(value, &response); if (status != AE_OK) { - printk(KERN_INFO "dell-wmi: bad event status 0x%x\n", status); + pr_info("bad event status 0x%x\n", status); return; } @@ -153,8 +155,8 @@ static void dell_wmi_notify(u32 value, void *context) u16 *buffer_entry = (u16 *)obj->buffer.pointer; if (dell_new_hk_type && (buffer_entry[1] != 0x10)) { - printk(KERN_INFO "dell-wmi: Received unknown WMI event" - " (0x%x)\n", buffer_entry[1]); + pr_info("Received unknown WMI event (0x%x)\n", + buffer_entry[1]); kfree(obj); return; } @@ -167,8 +169,7 @@ static void dell_wmi_notify(u32 value, void *context) key = sparse_keymap_entry_from_scancode(dell_wmi_input_dev, reported_key); if (!key) { - printk(KERN_INFO "dell-wmi: Unknown key %x pressed\n", - reported_key); + pr_info("Unknown key %x pressed\n", reported_key); } else if ((key->keycode == KEY_BRIGHTNESSUP || key->keycode == KEY_BRIGHTNESSDOWN) && acpi_video) { /* Don't report brightness notifications that will also @@ -275,7 +276,7 @@ static int __init dell_wmi_init(void) acpi_status status; if (!wmi_has_guid(DELL_EVENT_GUID)) { - printk(KERN_WARNING "dell-wmi: No known WMI GUID found\n"); + pr_warn("No known WMI GUID found\n"); return -ENODEV; } @@ -290,9 +291,7 @@ static int __init dell_wmi_init(void) dell_wmi_notify, NULL); if (ACPI_FAILURE(status)) { dell_wmi_input_destroy(); - printk(KERN_ERR - "dell-wmi: Unable to register notify handler - %d\n", - status); + pr_err("Unable to register notify handler - %d\n", status); return -ENODEV; } -- cgit v1.2.3-70-g09d2 From 22441ffeed17b94b28bd7c609f2cdb24d11e81f3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:38 -0700 Subject: eeepc: Use pr_warn Just a trivial pr_warning to pr_warn conversion while adding a few missing newlines. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/eeepc-laptop.c | 21 +++++++++++---------- drivers/platform/x86/eeepc-wmi.c | 14 +++++++------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 2c1abf63957..1c45d92e216 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -228,7 +228,7 @@ static int set_acpi(struct eeepc_laptop *eeepc, int cm, int value) return -ENODEV; if (write_acpi_int(eeepc->handle, method, value)) - pr_warning("Error writing %s\n", method); + pr_warn("Error writing %s\n", method); return 0; } @@ -243,7 +243,7 @@ static int get_acpi(struct eeepc_laptop *eeepc, int cm) return -ENODEV; if (read_acpi_int(eeepc->handle, method, &value)) - pr_warning("Error reading %s\n", method); + pr_warn("Error reading %s\n", method); return value; } @@ -261,7 +261,7 @@ static int acpi_setter_handle(struct eeepc_laptop *eeepc, int cm, status = acpi_get_handle(eeepc->handle, (char *)method, handle); if (status != AE_OK) { - pr_warning("Error finding %s\n", method); + pr_warn("Error finding %s\n", method); return -ENODEV; } return 0; @@ -417,7 +417,7 @@ static ssize_t store_cpufv_disabled(struct device *dev, switch (value) { case 0: if (eeepc->cpufv_disabled) - pr_warning("cpufv enabled (not officially supported " + pr_warn("cpufv enabled (not officially supported " "on this model)\n"); eeepc->cpufv_disabled = false; return rv; @@ -609,7 +609,7 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc, acpi_handle handle) bus = port->subordinate; if (!bus) { - pr_warning("Unable to find PCI bus?\n"); + pr_warn("Unable to find PCI bus 1?\n"); goto out_unlock; } @@ -621,12 +621,12 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc, acpi_handle handle) absent = (l == 0xffffffff); if (blocked != absent) { - pr_warning("BIOS says wireless lan is %s, " - "but the pci device is %s\n", + pr_warn("BIOS says wireless lan is %s, " + "but the pci device is %s\n", blocked ? "blocked" : "unblocked", absent ? "absent" : "present"); - pr_warning("skipped wireless hotplug as probably " - "inappropriate for this model\n"); + pr_warn("skipped wireless hotplug as probably " + "inappropriate for this model\n"); goto out_unlock; } @@ -691,7 +691,8 @@ static int eeepc_register_rfkill_notifier(struct eeepc_laptop *eeepc, eeepc_rfkill_notify, eeepc); if (ACPI_FAILURE(status)) - pr_warning("Failed to register notify on %s\n", node); + pr_warn("Failed to register notify on %s\n", node); + /* * Refresh pci hotplug in case the rfkill state was * changed during setup. diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c index 649dcadd8ea..4aa867a9b88 100644 --- a/drivers/platform/x86/eeepc-wmi.c +++ b/drivers/platform/x86/eeepc-wmi.c @@ -84,7 +84,7 @@ static const struct key_entry eeepc_wmi_keymap[] = { static acpi_status eeepc_wmi_parse_device(acpi_handle handle, u32 level, void *context, void **retval) { - pr_warning("Found legacy ATKD device (%s)", EEEPC_ACPI_HID); + pr_warn("Found legacy ATKD device (%s)\n", EEEPC_ACPI_HID); *(bool *)context = true; return AE_CTRL_TERMINATE; } @@ -105,12 +105,12 @@ static int eeepc_wmi_check_atkd(void) static int eeepc_wmi_probe(struct platform_device *pdev) { if (eeepc_wmi_check_atkd()) { - pr_warning("WMI device present, but legacy ATKD device is also " - "present and enabled."); - pr_warning("You probably booted with acpi_osi=\"Linux\" or " - "acpi_osi=\"!Windows 2009\""); - pr_warning("Can't load eeepc-wmi, use default acpi_osi " - "(preferred) or eeepc-laptop"); + pr_warn("WMI device present, but legacy ATKD device is also " + "present and enabled\n"); + pr_warn("You probably booted with acpi_osi=\"Linux\" or " + "acpi_osi=\"!Windows 2009\"\n"); + pr_warn("Can't load eeepc-wmi, use default acpi_osi " + "(preferred) or eeepc-laptop\n"); return -EBUSY; } return 0; -- cgit v1.2.3-70-g09d2 From 77bad7c830e20085deba6a760ce85c13ecb12f4d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:39 -0700 Subject: fujitsu-laptop: Convert printks to pr_ Added pr_fmt, converted printks and removed hard coded prefixes. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/fujitsu-laptop.c | 39 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 493054c2dbe..6b26666b37f 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -56,6 +56,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -585,8 +587,7 @@ static struct platform_driver fujitsupf_driver = { static void dmi_check_cb_common(const struct dmi_system_id *id) { acpi_handle handle; - printk(KERN_INFO "fujitsu-laptop: Identified laptop model '%s'.\n", - id->ident); + pr_info("Identified laptop model '%s'\n", id->ident); if (use_alt_lcd_levels == -1) { if (ACPI_SUCCESS(acpi_get_handle(NULL, "\\_SB.PCI0.LPCB.FJEX.SBL2", &handle))) @@ -691,11 +692,11 @@ static int acpi_fujitsu_add(struct acpi_device *device) result = acpi_bus_update_power(fujitsu->acpi_handle, &state); if (result) { - printk(KERN_ERR "Error reading power state\n"); + pr_err("Error reading power state\n"); goto err_unregister_input_dev; } - printk(KERN_INFO "ACPI: %s [%s] (%s)\n", + pr_info("ACPI: %s [%s] (%s)\n", acpi_device_name(device), acpi_device_bid(device), !device->power.state ? "on" : "off"); @@ -707,7 +708,7 @@ static int acpi_fujitsu_add(struct acpi_device *device) if (ACPI_FAILURE (acpi_evaluate_object (device->handle, METHOD_NAME__INI, NULL, NULL))) - printk(KERN_ERR "_INI Method failed\n"); + pr_err("_INI Method failed\n"); } /* do config (detect defaults) */ @@ -827,7 +828,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int), GFP_KERNEL); if (error) { - printk(KERN_ERR "kfifo_alloc failed\n"); + pr_err("kfifo_alloc failed\n"); goto err_stop; } @@ -859,13 +860,13 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) result = acpi_bus_update_power(fujitsu_hotkey->acpi_handle, &state); if (result) { - printk(KERN_ERR "Error reading power state\n"); + pr_err("Error reading power state\n"); goto err_unregister_input_dev; } - printk(KERN_INFO "ACPI: %s [%s] (%s)\n", - acpi_device_name(device), acpi_device_bid(device), - !device->power.state ? "on" : "off"); + pr_info("ACPI: %s [%s] (%s)\n", + acpi_device_name(device), acpi_device_bid(device), + !device->power.state ? "on" : "off"); fujitsu_hotkey->dev = device; @@ -875,7 +876,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if (ACPI_FAILURE (acpi_evaluate_object (device->handle, METHOD_NAME__INI, NULL, NULL))) - printk(KERN_ERR "_INI Method failed\n"); + pr_err("_INI Method failed\n"); } i = 0; @@ -897,8 +898,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); /* Suspect this is a keymap of the application panel, print it */ - printk(KERN_INFO "fujitsu-laptop: BTNI: [0x%x]\n", - call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0)); + pr_info("BTNI: [0x%x]\n", call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0)); #if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE) if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) { @@ -907,8 +907,8 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if (result == 0) { fujitsu_hotkey->logolamp_registered = 1; } else { - printk(KERN_ERR "fujitsu-laptop: Could not register " - "LED handler for logo lamp, error %i\n", result); + pr_err("Could not register LED handler for logo lamp, error %i\n", + result); } } @@ -919,8 +919,8 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if (result == 0) { fujitsu_hotkey->kblamps_registered = 1; } else { - printk(KERN_ERR "fujitsu-laptop: Could not register " - "LED handler for keyboard lamps, error %i\n", result); + pr_err("Could not register LED handler for keyboard lamps, error %i\n", + result); } } #endif @@ -1169,8 +1169,7 @@ static int __init fujitsu_init(void) fujitsu->bl_device->props.power = 0; } - printk(KERN_INFO "fujitsu-laptop: driver " FUJITSU_DRIVER_VERSION - " successfully loaded.\n"); + pr_info("driver " FUJITSU_DRIVER_VERSION " successfully loaded\n"); return 0; @@ -1216,7 +1215,7 @@ static void __exit fujitsu_cleanup(void) kfree(fujitsu); - printk(KERN_INFO "fujitsu-laptop: driver unloaded.\n"); + pr_info("driver unloaded\n"); } module_init(fujitsu_init); -- cgit v1.2.3-70-g09d2 From 611f5763fd0e38f896aa38346d4bc3f5c388b608 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:40 -0700 Subject: hdaps: Convert printks to pr_ Added pr_fmt, converted printks and removed hard coded prefixes. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/hdaps.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/hdaps.c b/drivers/platform/x86/hdaps.c index 067bf36d32f..5a34973dc16 100644 --- a/drivers/platform/x86/hdaps.c +++ b/drivers/platform/x86/hdaps.c @@ -26,6 +26,8 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -238,7 +240,7 @@ static int hdaps_device_init(void) __check_latch(0x1611, 0x01)) goto out; - printk(KERN_DEBUG "hdaps: initial latch check good (0x%02x).\n", + printk(KERN_DEBUG "hdaps: initial latch check good (0x%02x)\n", __get_latch(0x1611)); outb(0x17, 0x1610); @@ -299,7 +301,7 @@ static int hdaps_probe(struct platform_device *dev) if (ret) return ret; - printk(KERN_INFO "hdaps: device successfully initialized.\n"); + pr_info("device successfully initialized\n"); return 0; } @@ -480,7 +482,7 @@ static struct attribute_group hdaps_attribute_group = { /* hdaps_dmi_match - found a match. return one, short-circuiting the hunt. */ static int __init hdaps_dmi_match(const struct dmi_system_id *id) { - printk(KERN_INFO "hdaps: %s detected.\n", id->ident); + pr_info("%s detected\n", id->ident); return 1; } @@ -488,8 +490,7 @@ static int __init hdaps_dmi_match(const struct dmi_system_id *id) static int __init hdaps_dmi_match_invert(const struct dmi_system_id *id) { hdaps_invert = (unsigned long)id->driver_data; - printk(KERN_INFO "hdaps: inverting axis (%u) readings.\n", - hdaps_invert); + pr_info("inverting axis (%u) readings\n", hdaps_invert); return hdaps_dmi_match(id); } @@ -543,7 +544,7 @@ static int __init hdaps_init(void) int ret; if (!dmi_check_system(hdaps_whitelist)) { - printk(KERN_WARNING "hdaps: supported laptop not found!\n"); + pr_warn("supported laptop not found!\n"); ret = -ENODEV; goto out; } @@ -595,7 +596,7 @@ static int __init hdaps_init(void) if (ret) goto out_idev; - printk(KERN_INFO "hdaps: driver successfully loaded.\n"); + pr_info("driver successfully loaded\n"); return 0; out_idev: @@ -609,7 +610,7 @@ out_driver: out_region: release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS); out: - printk(KERN_WARNING "hdaps: driver init failed (ret=%d)!\n", ret); + pr_warn("driver init failed (ret=%d)!\n", ret); return ret; } @@ -622,7 +623,7 @@ static void __exit hdaps_exit(void) platform_driver_unregister(&hdaps_driver); release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS); - printk(KERN_INFO "hdaps: driver unloaded.\n"); + pr_info("driver unloaded\n"); } module_init(hdaps_init); -- cgit v1.2.3-70-g09d2 From b5a4223c1ce5edbaae3db9494de8dcb2d08a755b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:41 -0700 Subject: hp-wmi: Convert printks to pr_ Added pr_fmt and converted printks to pr_. Removed now unused PREFIX and UNIMPL #defines. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/hp-wmi.c | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 1bc4a7539ba..f94017bcdd6 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -24,6 +24,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -54,9 +56,6 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); #define HPWMI_HOTKEY_QUERY 0xc #define HPWMI_WIRELESS2_QUERY 0x1b -#define PREFIX "HP WMI: " -#define UNIMP "Unimplemented " - enum hp_wmi_radio { HPWMI_WIFI = 0, HPWMI_BLUETOOTH = 1, @@ -228,9 +227,8 @@ static int hp_wmi_perform_query(int query, int write, void *buffer, if (bios_return->return_code) { if (bios_return->return_code != HPWMI_RET_UNKNOWN_CMDTYPE) - printk(KERN_WARNING PREFIX "query 0x%x returned " - "error 0x%x\n", - query, bios_return->return_code); + pr_warn("query 0x%x returned error 0x%x\n", + query, bios_return->return_code); kfree(obj); return bios_return->return_code; } @@ -384,8 +382,7 @@ static int hp_wmi_rfkill2_refresh(void) if (num >= state.count || devstate->rfkill_id != rfkill2[i].id) { - printk(KERN_WARNING PREFIX "power configuration of " - "the wireless devices unexpectedly changed\n"); + pr_warn("power configuration of the wireless devices unexpectedly changed\n"); continue; } @@ -471,7 +468,7 @@ static void hp_wmi_notify(u32 value, void *context) status = wmi_get_event_data(value, &response); if (status != AE_OK) { - printk(KERN_INFO PREFIX "bad event status 0x%x\n", status); + pr_info("bad event status 0x%x\n", status); return; } @@ -480,8 +477,7 @@ static void hp_wmi_notify(u32 value, void *context) if (!obj) return; if (obj->type != ACPI_TYPE_BUFFER) { - printk(KERN_INFO "hp-wmi: Unknown response received %d\n", - obj->type); + pr_info("Unknown response received %d\n", obj->type); kfree(obj); return; } @@ -498,8 +494,7 @@ static void hp_wmi_notify(u32 value, void *context) event_id = *location; event_data = *(location + 2); } else { - printk(KERN_INFO "hp-wmi: Unknown buffer length %d\n", - obj->buffer.length); + pr_info("Unknown buffer length %d\n", obj->buffer.length); kfree(obj); return; } @@ -527,8 +522,7 @@ static void hp_wmi_notify(u32 value, void *context) if (!sparse_keymap_report_event(hp_wmi_input_dev, key_code, 1, true)) - printk(KERN_INFO PREFIX "Unknown key code - 0x%x\n", - key_code); + pr_info("Unknown key code - 0x%x\n", key_code); break; case HPWMI_WIRELESS: if (rfkill2_count) { @@ -550,14 +544,12 @@ static void hp_wmi_notify(u32 value, void *context) hp_wmi_get_hw_state(HPWMI_WWAN)); break; case HPWMI_CPU_BATTERY_THROTTLE: - printk(KERN_INFO PREFIX UNIMP "CPU throttle because of 3 Cell" - " battery event detected\n"); + pr_info("Unimplemented CPU throttle because of 3 Cell battery event detected\n"); break; case HPWMI_LOCK_SWITCH: break; default: - printk(KERN_INFO PREFIX "Unknown event_id - %d - 0x%x\n", - event_id, event_data); + pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break; } } @@ -705,7 +697,7 @@ static int __devinit hp_wmi_rfkill2_setup(struct platform_device *device) return err; if (state.count > HPWMI_MAX_RFKILL2_DEVICES) { - printk(KERN_WARNING PREFIX "unable to parse 0x1b query output\n"); + pr_warn("unable to parse 0x1b query output\n"); return -EINVAL; } @@ -727,14 +719,14 @@ static int __devinit hp_wmi_rfkill2_setup(struct platform_device *device) name = "hp-wwan"; break; default: - printk(KERN_WARNING PREFIX "unknown device type 0x%x\n", - state.device[i].radio_type); + pr_warn("unknown device type 0x%x\n", + state.device[i].radio_type); continue; } if (!state.device[i].vendor_id) { - printk(KERN_WARNING PREFIX "zero device %d while %d " - "reported\n", i, state.count); + pr_warn("zero device %d while %d reported\n", + i, state.count); continue; } @@ -755,8 +747,7 @@ static int __devinit hp_wmi_rfkill2_setup(struct platform_device *device) IS_HWBLOCKED(state.device[i].power)); if (!(state.device[i].power & HPWMI_POWER_BIOS)) - printk(KERN_INFO PREFIX "device %s blocked by BIOS\n", - name); + pr_info("device %s blocked by BIOS\n", name); err = rfkill_register(rfkill); if (err) { -- cgit v1.2.3-70-g09d2 From 323623a717c7feec3f36099f4caa18b3319c5cca Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:42 -0700 Subject: ibm_rtl: Use pr_fmt and pr_ Remove hard coded prefixes from logging messages. Neaten RTL_DEBUG macro and uses. Convert __FUNCTION__ to __func__. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/ibm_rtl.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c index 10563458408..811d436cd67 100644 --- a/drivers/platform/x86/ibm_rtl.c +++ b/drivers/platform/x86/ibm_rtl.c @@ -22,6 +22,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -69,9 +71,10 @@ struct ibm_rtl_table { #define RTL_SIGNATURE 0x0000005f4c54525fULL #define RTL_MASK 0x000000ffffffffffULL -#define RTL_DEBUG(A, ...) do { \ - if (debug) \ - pr_info("ibm-rtl: " A, ##__VA_ARGS__ ); \ +#define RTL_DEBUG(fmt, ...) \ +do { \ + if (debug) \ + pr_info(fmt, ##__VA_ARGS__); \ } while (0) static DEFINE_MUTEX(rtl_lock); @@ -114,7 +117,7 @@ static int ibm_rtl_write(u8 value) int ret = 0, count = 0; static u32 cmd_port_val; - RTL_DEBUG("%s(%d)\n", __FUNCTION__, value); + RTL_DEBUG("%s(%d)\n", __func__, value); value = value == 1 ? RTL_CMD_ENTER_PRTM : RTL_CMD_EXIT_PRTM; @@ -144,8 +147,8 @@ static int ibm_rtl_write(u8 value) while (ioread8(&rtl_table->command)) { msleep(10); if (count++ > 500) { - pr_err("ibm-rtl: Hardware not responding to " - "mode switch request\n"); + pr_err("Hardware not responding to " + "mode switch request\n"); ret = -EIO; break; } @@ -250,7 +253,7 @@ static int __init ibm_rtl_init(void) { int ret = -ENODEV, i; if (force) - pr_warning("ibm-rtl: module loaded by force\n"); + pr_warn("module loaded by force\n"); /* first ensure that we are running on IBM HW */ else if (efi_enabled || !dmi_check_system(ibm_rtl_dmi_table)) return -ENODEV; @@ -295,7 +298,7 @@ static int __init ibm_rtl_init(void) { rtl_cmd_width = ioread8(&rtl_table->cmd_granularity); rtl_cmd_type = ioread8(&rtl_table->cmd_address_type); RTL_DEBUG("rtl_cmd_width = %u, rtl_cmd_type = %u\n", - rtl_cmd_width, rtl_cmd_type); + rtl_cmd_width, rtl_cmd_type); addr = ioread32(&rtl_table->cmd_port_address); RTL_DEBUG("addr = %#llx\n", (unsigned long long)addr); plen = rtl_cmd_width/sizeof(char); -- cgit v1.2.3-70-g09d2 From 9ab23989d3e774f4545c83eeab0137e35f7115be Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:43 -0700 Subject: ideapad-laptop: Add pr_fmt Add pr_fmt to prefix logging messages. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/ideapad-laptop.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 21b101899ba..bfdda33feb2 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -20,6 +20,8 @@ * 02110-1301, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include -- cgit v1.2.3-70-g09d2 From 4686f6df69c5bca5ea795e01048d32f6eed07ba7 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:44 -0700 Subject: intel_menlow: Add pr_fmt and use pr_ Add pr_fmt to prefix the logging messages. Convert printk to pr_. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_menlow.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c index eacd5da7dd2..809adea4965 100644 --- a/drivers/platform/x86/intel_menlow.c +++ b/drivers/platform/x86/intel_menlow.c @@ -27,6 +27,8 @@ * to get/set bandwidth. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -135,8 +137,7 @@ static int memory_set_cur_bandwidth(struct thermal_cooling_device *cdev, acpi_evaluate_integer(handle, MEMORY_SET_BANDWIDTH, &arg_list, &temp); - printk(KERN_INFO - "Bandwidth value was %ld: status is %d\n", state, status); + pr_info("Bandwidth value was %ld: status is %d\n", state, status); if (ACPI_FAILURE(status)) return -EFAULT; -- cgit v1.2.3-70-g09d2 From 9a2ffd168ef8ec3705060ffb21d6dcb20eeb9506 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:45 -0700 Subject: intel_pmic_gpio: Convert printks to pr_ Add #define pr_fmt(fmt) "%s: " fmt, __func__ to prefix function name to each output message. Convert printks to pr_. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_pmic_gpio.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c index 464bb3fc4d8..1686c1e07d5 100644 --- a/drivers/platform/x86/intel_pmic_gpio.c +++ b/drivers/platform/x86/intel_pmic_gpio.c @@ -19,6 +19,8 @@ * Moorestown platform PMIC chip */ +#define pr_fmt(fmt) "%s: " fmt, __func__ + #include #include #include @@ -90,8 +92,7 @@ static void pmic_program_irqtype(int gpio, int type) static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned offset) { if (offset > 8) { - printk(KERN_ERR - "%s: only pin 0-7 support input\n", __func__); + pr_err("only pin 0-7 support input\n"); return -1;/* we only have 8 GPIO can use as input */ } return intel_scu_ipc_update_register(GPIO0 + offset, @@ -116,8 +117,7 @@ static int pmic_gpio_direction_output(struct gpio_chip *chip, value ? 1 << (offset - 16) : 0, 1 << (offset - 16)); else { - printk(KERN_ERR - "%s: invalid PMIC GPIO pin %d!\n", __func__, offset); + pr_err("invalid PMIC GPIO pin %d!\n", offset); WARN_ON(1); } @@ -260,7 +260,7 @@ static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev) /* setting up SRAM mapping for GPIOINT register */ pg->gpiointr = ioremap_nocache(pdata->gpiointr, 8); if (!pg->gpiointr) { - printk(KERN_ERR "%s: Can not map GPIOINT.\n", __func__); + pr_err("Can not map GPIOINT\n"); retval = -EINVAL; goto err2; } @@ -281,13 +281,13 @@ static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev) pg->chip.dev = dev; retval = gpiochip_add(&pg->chip); if (retval) { - printk(KERN_ERR "%s: Can not add pmic gpio chip.\n", __func__); + pr_err("Can not add pmic gpio chip\n"); goto err; } retval = request_irq(pg->irq, pmic_irq_handler, 0, "pmic", pg); if (retval) { - printk(KERN_WARNING "pmic: Interrupt request failed\n"); + pr_warn("Interrupt request failed\n"); goto err; } -- cgit v1.2.3-70-g09d2 From f9dcf192ed527fbf2f3a8a3574006d1ae855100b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:46 -0700 Subject: msi-laptop: pr_ neatening Just making it a bit more like other logging message uses. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/msi-laptop.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 23fb2afda00..f89c0b6a9ae 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -447,7 +447,7 @@ static struct platform_device *msipf_device; static int dmi_check_cb(const struct dmi_system_id *id) { - pr_info("Identified laptop model '%s'.\n", id->ident); + pr_info("Identified laptop model '%s'\n", id->ident); return 1; } @@ -875,8 +875,7 @@ static int __init msi_init(void) /* Register backlight stuff */ if (acpi_video_backlight_support()) { - pr_info("Brightness ignored, must be controlled " - "by ACPI video driver\n"); + pr_info("Brightness ignored, must be controlled by ACPI video driver\n"); } else { struct backlight_properties props; memset(&props, 0, sizeof(struct backlight_properties)); @@ -930,7 +929,7 @@ static int __init msi_init(void) if (auto_brightness != 2) set_auto_brightness(auto_brightness); - pr_info("driver "MSI_DRIVER_VERSION" successfully loaded.\n"); + pr_info("driver " MSI_DRIVER_VERSION " successfully loaded\n"); return 0; @@ -978,7 +977,7 @@ static void __exit msi_cleanup(void) if (auto_brightness != 2) set_auto_brightness(1); - pr_info("driver unloaded.\n"); + pr_info("driver unloaded\n"); } module_init(msi_init); -- cgit v1.2.3-70-g09d2 From dd3c7f2308e91d6ecbe5cd8a3310bb49e47e1ae7 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:47 -0700 Subject: msi-wmi: Use pr_fmt and pr_ Added pr_fmt. Removed now unused #define DRV_PFX Convert dprintk to pr_debug. Convert printks to pr_. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/msi-wmi.c | 45 +++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c index d5419c9ec07..c832e3356cd 100644 --- a/drivers/platform/x86/msi-wmi.c +++ b/drivers/platform/x86/msi-wmi.c @@ -20,6 +20,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -36,13 +37,10 @@ MODULE_ALIAS("wmi:551A1F84-FBDD-4125-91DB-3EA8F44F1D45"); MODULE_ALIAS("wmi:B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2"); #define DRV_NAME "msi-wmi" -#define DRV_PFX DRV_NAME ": " #define MSIWMI_BIOS_GUID "551A1F84-FBDD-4125-91DB-3EA8F44F1D45" #define MSIWMI_EVENT_GUID "B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2" -#define dprintk(msg...) pr_debug(DRV_PFX msg) - #define SCANCODE_BASE 0xD0 #define MSI_WMI_BRIGHTNESSUP SCANCODE_BASE #define MSI_WMI_BRIGHTNESSDOWN (SCANCODE_BASE + 1) @@ -78,7 +76,7 @@ static int msi_wmi_query_block(int instance, int *ret) if (!obj || obj->type != ACPI_TYPE_INTEGER) { if (obj) { - printk(KERN_ERR DRV_PFX "query block returned object " + pr_err("query block returned object " "type: %d - buffer length:%d\n", obj->type, obj->type == ACPI_TYPE_BUFFER ? obj->buffer.length : 0); @@ -97,8 +95,8 @@ static int msi_wmi_set_block(int instance, int value) struct acpi_buffer input = { sizeof(int), &value }; - dprintk("Going to set block of instance: %d - value: %d\n", - instance, value); + pr_debug("Going to set block of instance: %d - value: %d\n", + instance, value); status = wmi_set_block(MSIWMI_BIOS_GUID, instance, &input); @@ -112,20 +110,19 @@ static int bl_get(struct backlight_device *bd) /* Instance 1 is "get backlight", cmp with DSDT */ err = msi_wmi_query_block(1, &ret); if (err) { - printk(KERN_ERR DRV_PFX "Could not query backlight: %d\n", err); + pr_err("Could not query backlight: %d\n", err); return -EINVAL; } - dprintk("Get: Query block returned: %d\n", ret); + pr_debug("Get: Query block returned: %d\n", ret); for (level = 0; level < ARRAY_SIZE(backlight_map); level++) { if (backlight_map[level] == ret) { - dprintk("Current backlight level: 0x%X - index: %d\n", - backlight_map[level], level); + pr_debug("Current backlight level: 0x%X - index: %d\n", + backlight_map[level], level); break; } } if (level == ARRAY_SIZE(backlight_map)) { - printk(KERN_ERR DRV_PFX "get: Invalid brightness value: 0x%X\n", - ret); + pr_err("get: Invalid brightness value: 0x%X\n", ret); return -EINVAL; } return level; @@ -156,7 +153,7 @@ static void msi_wmi_notify(u32 value, void *context) status = wmi_get_event_data(value, &response); if (status != AE_OK) { - printk(KERN_INFO DRV_PFX "bad event status 0x%x\n", status); + pr_info("bad event status 0x%x\n", status); return; } @@ -164,7 +161,7 @@ static void msi_wmi_notify(u32 value, void *context) if (obj && obj->type == ACPI_TYPE_INTEGER) { int eventcode = obj->integer.value; - dprintk("Eventcode: 0x%x\n", eventcode); + pr_debug("Eventcode: 0x%x\n", eventcode); key = sparse_keymap_entry_from_scancode(msi_wmi_input_dev, eventcode); if (key) { @@ -175,8 +172,8 @@ static void msi_wmi_notify(u32 value, void *context) /* Ignore event if the same event happened in a 50 ms timeframe -> Key press may result in 10-20 GPEs */ if (ktime_to_us(diff) < 1000 * 50) { - dprintk("Suppressed key event 0x%X - " - "Last press was %lld us ago\n", + pr_debug("Suppressed key event 0x%X - " + "Last press was %lld us ago\n", key->code, ktime_to_us(diff)); return; } @@ -187,17 +184,16 @@ static void msi_wmi_notify(u32 value, void *context) (!acpi_video_backlight_support() || (key->code != MSI_WMI_BRIGHTNESSUP && key->code != MSI_WMI_BRIGHTNESSDOWN))) { - dprintk("Send key: 0x%X - " - "Input layer keycode: %d\n", key->code, - key->keycode); + pr_debug("Send key: 0x%X - " + "Input layer keycode: %d\n", + key->code, key->keycode); sparse_keymap_report_entry(msi_wmi_input_dev, key, 1, true); } } else - printk(KERN_INFO "Unknown key pressed - %x\n", - eventcode); + pr_info("Unknown key pressed - %x\n", eventcode); } else - printk(KERN_INFO DRV_PFX "Unknown event received\n"); + pr_info("Unknown event received\n"); kfree(response.pointer); } @@ -238,8 +234,7 @@ static int __init msi_wmi_init(void) int err; if (!wmi_has_guid(MSIWMI_EVENT_GUID)) { - printk(KERN_ERR - "This machine doesn't have MSI-hotkeys through WMI\n"); + pr_err("This machine doesn't have MSI-hotkeys through WMI\n"); return -ENODEV; } err = wmi_install_notify_handler(MSIWMI_EVENT_GUID, @@ -270,7 +265,7 @@ static int __init msi_wmi_init(void) backlight->props.brightness = err; } - dprintk("Event handler installed\n"); + pr_debug("Event handler installed\n"); return 0; -- cgit v1.2.3-70-g09d2 From 50f581a4f09f5b7f78a1c791e09b2c8d24a1e20c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:48 -0700 Subject: sony-laptop: Add and use #define pr_fmt Add pr_fmt. Remove now unused #define DRV_PRX. Neaten dprintk macro. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/sony-laptop.c | 104 +++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 55 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 6fe8cd6e23b..12dd0a62f30 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -42,6 +42,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -70,10 +72,10 @@ #include #endif -#define DRV_PFX "sony-laptop: " -#define dprintk(msg...) do { \ - if (debug) \ - pr_warn(DRV_PFX msg); \ +#define dprintk(fmt, ...) \ +do { \ + if (debug) \ + pr_warn(fmt, ##__VA_ARGS__); \ } while (0) #define SONY_LAPTOP_DRIVER_VERSION "0.6" @@ -418,7 +420,7 @@ static int sony_laptop_setup_input(struct acpi_device *acpi_device) error = kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL); if (error) { - pr_err(DRV_PFX "kfifo_alloc failed\n"); + pr_err("kfifo_alloc failed\n"); goto err_dec_users; } @@ -702,7 +704,7 @@ static int acpi_callgetfunc(acpi_handle handle, char *name, int *result) return 0; } - pr_warn(DRV_PFX "acpi_callreadfunc failed\n"); + pr_warn("acpi_callreadfunc failed\n"); return -1; } @@ -728,8 +730,7 @@ static int acpi_callsetfunc(acpi_handle handle, char *name, int value, if (status == AE_OK) { if (result != NULL) { if (out_obj.type != ACPI_TYPE_INTEGER) { - pr_warn(DRV_PFX "acpi_evaluate_object bad " - "return type\n"); + pr_warn("acpi_evaluate_object bad return type\n"); return -1; } *result = out_obj.integer.value; @@ -737,7 +738,7 @@ static int acpi_callsetfunc(acpi_handle handle, char *name, int value, return 0; } - pr_warn(DRV_PFX "acpi_evaluate_object failed\n"); + pr_warn("acpi_evaluate_object failed\n"); return -1; } @@ -1104,10 +1105,8 @@ static void sony_nc_notify(struct acpi_device *device, u32 event) } if (!key_event->data) - pr_info(DRV_PFX - "Unknown event: 0x%x 0x%x\n", - key_handle, - ev); + pr_info("Unknown event: 0x%x 0x%x\n", + key_handle, ev); else sony_laptop_report_input_event(ev); } @@ -1128,7 +1127,7 @@ static acpi_status sony_walk_callback(acpi_handle handle, u32 level, struct acpi_device_info *info; if (ACPI_SUCCESS(acpi_get_object_info(handle, &info))) { - pr_warn(DRV_PFX "method: name: %4.4s, args %X\n", + pr_warn("method: name: %4.4s, args %X\n", (char *)&info->name, info->param_count); kfree(info); @@ -1169,7 +1168,7 @@ static int sony_nc_resume(struct acpi_device *device) ret = acpi_callsetfunc(sony_nc_acpi_handle, *item->acpiset, item->value, NULL); if (ret < 0) { - pr_err(DRV_PFX "%s: %d\n", __func__, ret); + pr_err("%s: %d\n", __func__, ret); break; } } @@ -1336,12 +1335,12 @@ static void sony_nc_rfkill_setup(struct acpi_device *device) device_enum = (union acpi_object *) buffer.pointer; if (!device_enum) { - pr_err(DRV_PFX "No SN06 return object."); + pr_err("No SN06 return object\n"); goto out_no_enum; } if (device_enum->type != ACPI_TYPE_BUFFER) { - pr_err(DRV_PFX "Invalid SN06 return object 0x%.2x\n", - device_enum->type); + pr_err("Invalid SN06 return object 0x%.2x\n", + device_enum->type); goto out_no_enum; } @@ -1662,7 +1661,7 @@ static void sony_nc_backlight_setup(void) ops, &props); if (IS_ERR(sony_bl_props.dev)) { - pr_warn(DRV_PFX "unable to register backlight device\n"); + pr_warn("unable to register backlight device\n"); sony_bl_props.dev = NULL; } else sony_bl_props.dev->props.brightness = @@ -1682,8 +1681,7 @@ static int sony_nc_add(struct acpi_device *device) acpi_handle handle; struct sony_nc_value *item; - pr_info(DRV_PFX "%s v%s.\n", SONY_NC_DRIVER_NAME, - SONY_LAPTOP_DRIVER_VERSION); + pr_info("%s v%s\n", SONY_NC_DRIVER_NAME, SONY_LAPTOP_DRIVER_VERSION); sony_nc_acpi_device = device; strcpy(acpi_device_class(device), "sony/hotkey"); @@ -1708,7 +1706,7 @@ static int sony_nc_add(struct acpi_device *device) sony_nc_acpi_handle, 1, sony_walk_callback, NULL, NULL, NULL); if (ACPI_FAILURE(status)) { - pr_warn(DRV_PFX "unable to walk acpi resources\n"); + pr_warn("unable to walk acpi resources\n"); result = -ENODEV; goto outpresent; } @@ -1736,13 +1734,12 @@ static int sony_nc_add(struct acpi_device *device) /* setup input devices and helper fifo */ result = sony_laptop_setup_input(device); if (result) { - pr_err(DRV_PFX "Unable to create input devices.\n"); + pr_err("Unable to create input devices\n"); goto outkbdbacklight; } if (acpi_video_backlight_support()) { - pr_info(DRV_PFX "brightness ignored, must be " - "controlled by ACPI video driver\n"); + pr_info("brightness ignored, must be controlled by ACPI video driver\n"); } else { sony_nc_backlight_setup(); } @@ -2265,9 +2262,9 @@ out: if (pcidev) pci_dev_put(pcidev); - pr_info(DRV_PFX "detected Type%d model\n", - dev->model == SONYPI_DEVICE_TYPE1 ? 1 : - dev->model == SONYPI_DEVICE_TYPE2 ? 2 : 3); + pr_info("detected Type%d model\n", + dev->model == SONYPI_DEVICE_TYPE1 ? 1 : + dev->model == SONYPI_DEVICE_TYPE2 ? 2 : 3); } /* camera tests and poweron/poweroff */ @@ -2313,7 +2310,7 @@ static int __sony_pic_camera_ready(void) static int __sony_pic_camera_off(void) { if (!camera) { - pr_warn(DRV_PFX "camera control not enabled\n"); + pr_warn("camera control not enabled\n"); return -ENODEV; } @@ -2333,7 +2330,7 @@ static int __sony_pic_camera_on(void) int i, j, x; if (!camera) { - pr_warn(DRV_PFX "camera control not enabled\n"); + pr_warn("camera control not enabled\n"); return -ENODEV; } @@ -2356,7 +2353,7 @@ static int __sony_pic_camera_on(void) } if (j == 0) { - pr_warn(DRV_PFX "failed to power on camera\n"); + pr_warn("failed to power on camera\n"); return -ENODEV; } @@ -2412,8 +2409,7 @@ int sony_pic_camera_command(int command, u8 value) ITERATIONS_SHORT); break; default: - pr_err(DRV_PFX "sony_pic_camera_command invalid: %d\n", - command); + pr_err("sony_pic_camera_command invalid: %d\n", command); break; } mutex_unlock(&spic_dev.lock); @@ -2819,7 +2815,7 @@ static int sonypi_compat_init(void) error = kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL); if (error) { - pr_err(DRV_PFX "kfifo_alloc failed\n"); + pr_err("kfifo_alloc failed\n"); return error; } @@ -2829,12 +2825,12 @@ static int sonypi_compat_init(void) sonypi_misc_device.minor = minor; error = misc_register(&sonypi_misc_device); if (error) { - pr_err(DRV_PFX "misc_register failed\n"); + pr_err("misc_register failed\n"); goto err_free_kfifo; } if (minor == -1) - pr_info(DRV_PFX "device allocated minor is %d\n", - sonypi_misc_device.minor); + pr_info("device allocated minor is %d\n", + sonypi_misc_device.minor); return 0; @@ -2893,8 +2889,8 @@ sony_pic_read_possible_resource(struct acpi_resource *resource, void *context) } for (i = 0; i < p->interrupt_count; i++) { if (!p->interrupts[i]) { - pr_warn(DRV_PFX "Invalid IRQ %d\n", - p->interrupts[i]); + pr_warn("Invalid IRQ %d\n", + p->interrupts[i]); continue; } interrupt = kzalloc(sizeof(*interrupt), @@ -2932,14 +2928,14 @@ sony_pic_read_possible_resource(struct acpi_resource *resource, void *context) ioport->io2.address_length); } else { - pr_err(DRV_PFX "Unknown SPIC Type, more than 2 IO Ports\n"); + pr_err("Unknown SPIC Type, more than 2 IO Ports\n"); return AE_ERROR; } return AE_OK; } default: dprintk("Resource %d isn't an IRQ nor an IO port\n", - resource->type); + resource->type); case ACPI_RESOURCE_TYPE_END_TAG: return AE_OK; @@ -2960,7 +2956,7 @@ static int sony_pic_possible_resources(struct acpi_device *device) dprintk("Evaluating _STA\n"); result = acpi_bus_get_status(device); if (result) { - pr_warn(DRV_PFX "Unable to read status\n"); + pr_warn("Unable to read status\n"); goto end; } @@ -2976,8 +2972,7 @@ static int sony_pic_possible_resources(struct acpi_device *device) status = acpi_walk_resources(device->handle, METHOD_NAME__PRS, sony_pic_read_possible_resource, &spic_dev); if (ACPI_FAILURE(status)) { - pr_warn(DRV_PFX "Failure evaluating %s\n", - METHOD_NAME__PRS); + pr_warn("Failure evaluating %s\n", METHOD_NAME__PRS); result = -ENODEV; } end: @@ -3090,7 +3085,7 @@ static int sony_pic_enable(struct acpi_device *device, /* check for total failure */ if (ACPI_FAILURE(status)) { - pr_err(DRV_PFX "Error evaluating _SRS\n"); + pr_err("Error evaluating _SRS\n"); result = -ENODEV; goto end; } @@ -3182,7 +3177,7 @@ static int sony_pic_remove(struct acpi_device *device, int type) struct sony_pic_irq *irq, *tmp_irq; if (sony_pic_disable(device)) { - pr_err(DRV_PFX "Couldn't disable device.\n"); + pr_err("Couldn't disable device\n"); return -ENXIO; } @@ -3222,8 +3217,7 @@ static int sony_pic_add(struct acpi_device *device) struct sony_pic_ioport *io, *tmp_io; struct sony_pic_irq *irq, *tmp_irq; - pr_info(DRV_PFX "%s v%s.\n", SONY_PIC_DRIVER_NAME, - SONY_LAPTOP_DRIVER_VERSION); + pr_info("%s v%s\n", SONY_PIC_DRIVER_NAME, SONY_LAPTOP_DRIVER_VERSION); spic_dev.acpi_dev = device; strcpy(acpi_device_class(device), "sony/hotkey"); @@ -3233,14 +3227,14 @@ static int sony_pic_add(struct acpi_device *device) /* read _PRS resources */ result = sony_pic_possible_resources(device); if (result) { - pr_err(DRV_PFX "Unable to read possible resources.\n"); + pr_err("Unable to read possible resources\n"); goto err_free_resources; } /* setup input devices and helper fifo */ result = sony_laptop_setup_input(device); if (result) { - pr_err(DRV_PFX "Unable to create input devices.\n"); + pr_err("Unable to create input devices\n"); goto err_free_resources; } @@ -3281,7 +3275,7 @@ static int sony_pic_add(struct acpi_device *device) } } if (!spic_dev.cur_ioport) { - pr_err(DRV_PFX "Failed to request_region.\n"); + pr_err("Failed to request_region\n"); result = -ENODEV; goto err_remove_compat; } @@ -3301,7 +3295,7 @@ static int sony_pic_add(struct acpi_device *device) } } if (!spic_dev.cur_irq) { - pr_err(DRV_PFX "Failed to request_irq.\n"); + pr_err("Failed to request_irq\n"); result = -ENODEV; goto err_release_region; } @@ -3309,7 +3303,7 @@ static int sony_pic_add(struct acpi_device *device) /* set resource status _SRS */ result = sony_pic_enable(device, spic_dev.cur_ioport, spic_dev.cur_irq); if (result) { - pr_err(DRV_PFX "Couldn't enable device.\n"); + pr_err("Couldn't enable device\n"); goto err_free_irq; } @@ -3418,7 +3412,7 @@ static int __init sony_laptop_init(void) if (!no_spic && dmi_check_system(sonypi_dmi_table)) { result = acpi_bus_register_driver(&sony_pic_driver); if (result) { - pr_err(DRV_PFX "Unable to register SPIC driver."); + pr_err("Unable to register SPIC driver\n"); goto out; } spic_drv_registered = 1; @@ -3426,7 +3420,7 @@ static int __init sony_laptop_init(void) result = acpi_bus_register_driver(&sony_nc_driver); if (result) { - pr_err(DRV_PFX "Unable to register SNC driver."); + pr_err("Unable to register SNC driver\n"); goto out_unregister_pic; } -- cgit v1.2.3-70-g09d2 From 33cab1b71ec1bd02d55cede277bbc91485a68068 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:49 -0700 Subject: tc1100-wmi: Add pr_fmt, use pr_ Use the more normal logging styles. Removed now unused local logging #defines. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/tc1100-wmi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/tc1100-wmi.c b/drivers/platform/x86/tc1100-wmi.c index 865ef78d6f1..e24f5ae475a 100644 --- a/drivers/platform/x86/tc1100-wmi.c +++ b/drivers/platform/x86/tc1100-wmi.c @@ -25,6 +25,8 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -40,9 +42,6 @@ #define TC1100_INSTANCE_WIRELESS 1 #define TC1100_INSTANCE_JOGDIAL 2 -#define TC1100_LOGPREFIX "tc1100-wmi: " -#define TC1100_INFO KERN_INFO TC1100_LOGPREFIX - MODULE_AUTHOR("Jamey Hicks, Carlos Corbacho"); MODULE_DESCRIPTION("HP Compaq TC1100 Tablet WMI Extras"); MODULE_LICENSE("GPL"); @@ -264,7 +263,7 @@ static int __init tc1100_init(void) if (error) goto err_device_del; - printk(TC1100_INFO "HP Compaq TC1100 Tablet WMI Extras loaded\n"); + pr_info("HP Compaq TC1100 Tablet WMI Extras loaded\n"); return 0; err_device_del: -- cgit v1.2.3-70-g09d2 From 93c1d05b5fa83ac771b5d1a5b4eb0fe6f54b3b46 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:51 -0700 Subject: topstar-laptop: Convert remaining printk to pr_info To be similar to all other uses. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/topstar-laptop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/topstar-laptop.c b/drivers/platform/x86/topstar-laptop.c index 1d07d6d09f2..4c20447ddbb 100644 --- a/drivers/platform/x86/topstar-laptop.c +++ b/drivers/platform/x86/topstar-laptop.c @@ -194,7 +194,7 @@ static int __init topstar_laptop_init(void) if (ret < 0) return ret; - printk(KERN_INFO "Topstar Laptop ACPI extras driver loaded\n"); + pr_info("ACPI extras driver loaded\n"); return 0; } -- cgit v1.2.3-70-g09d2 From 7e33460d8d991843a5821d667b55c75a092cf6e3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:52 -0700 Subject: toshiba: Convert printks to pr_ Add pr_fmt. Remove local MY_ #defines. Convert printks to pr_. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/toshiba_acpi.c | 59 +++++++++++++++----------------- drivers/platform/x86/toshiba_bluetooth.c | 11 +++--- 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 63f42a22e10..cb009b2629e 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -35,6 +35,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #define TOSHIBA_ACPI_VERSION "0.19" #define PROC_INTERFACE_VERSION 1 @@ -60,11 +62,6 @@ MODULE_AUTHOR("John Belmonte"); MODULE_DESCRIPTION("Toshiba Laptop ACPI Extras Driver"); MODULE_LICENSE("GPL"); -#define MY_LOGPREFIX "toshiba_acpi: " -#define MY_ERR KERN_ERR MY_LOGPREFIX -#define MY_NOTICE KERN_NOTICE MY_LOGPREFIX -#define MY_INFO KERN_INFO MY_LOGPREFIX - /* Toshiba ACPI method paths */ #define METHOD_LCD_BRIGHTNESS "\\_SB_.PCI0.VGA_.LCD_._BCM" #define TOSH_INTERFACE_1 "\\_SB_.VALD" @@ -301,7 +298,7 @@ static int toshiba_illumination_available(void) in[0] = 0xf100; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Illumination device not available\n"); + pr_info("Illumination device not available\n"); return 0; } in[0] = 0xf400; @@ -320,7 +317,7 @@ static void toshiba_illumination_set(struct led_classdev *cdev, in[0] = 0xf100; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Illumination device not available\n"); + pr_info("Illumination device not available\n"); return; } @@ -331,7 +328,7 @@ static void toshiba_illumination_set(struct led_classdev *cdev, in[2] = 1; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "ACPI call for illumination failed.\n"); + pr_info("ACPI call for illumination failed\n"); return; } } else { @@ -341,7 +338,7 @@ static void toshiba_illumination_set(struct led_classdev *cdev, in[2] = 0; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "ACPI call for illumination failed.\n"); + pr_info("ACPI call for illumination failed.\n"); return; } } @@ -364,7 +361,7 @@ static enum led_brightness toshiba_illumination_get(struct led_classdev *cdev) in[0] = 0xf100; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Illumination device not available\n"); + pr_info("Illumination device not available\n"); return LED_OFF; } @@ -373,7 +370,7 @@ static enum led_brightness toshiba_illumination_get(struct led_classdev *cdev) in[1] = 0x14e; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "ACPI call for illumination failed.\n"); + pr_info("ACPI call for illumination failed.\n"); return LED_OFF; } @@ -517,7 +514,7 @@ static int lcd_proc_show(struct seq_file *m, void *v) seq_printf(m, "brightness_levels: %d\n", HCI_LCD_BRIGHTNESS_LEVELS); } else { - printk(MY_ERR "Error reading LCD brightness\n"); + pr_err("Error reading LCD brightness\n"); } return 0; @@ -592,7 +589,7 @@ static int video_proc_show(struct seq_file *m, void *v) seq_printf(m, "crt_out: %d\n", is_crt); seq_printf(m, "tv_out: %d\n", is_tv); } else { - printk(MY_ERR "Error reading video out status\n"); + pr_err("Error reading video out status\n"); } return 0; @@ -686,7 +683,7 @@ static int fan_proc_show(struct seq_file *m, void *v) seq_printf(m, "running: %d\n", (value > 0)); seq_printf(m, "force_on: %d\n", force_fan); } else { - printk(MY_ERR "Error reading fan status\n"); + pr_err("Error reading fan status\n"); } return 0; @@ -750,9 +747,9 @@ static int keys_proc_show(struct seq_file *m, void *v) * some machines where system events sporadically * become disabled. */ hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result); - printk(MY_NOTICE "Re-enabled hotkeys\n"); + pr_notice("Re-enabled hotkeys\n"); } else { - printk(MY_ERR "Error reading hotkey status\n"); + pr_err("Error reading hotkey status\n"); goto end; } } @@ -863,7 +860,7 @@ static void toshiba_acpi_notify(acpi_handle handle, u32 event, void *context) if (!sparse_keymap_report_event(toshiba_acpi.hotkey_dev, value, 1, true)) { - printk(MY_INFO "Unknown key %x\n", + pr_info("Unknown key %x\n", value); } } else if (hci_result == HCI_NOT_SUPPORTED) { @@ -871,7 +868,7 @@ static void toshiba_acpi_notify(acpi_handle handle, u32 event, void *context) * some machines where system events sporadically * become disabled. */ hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result); - printk(MY_NOTICE "Re-enabled hotkeys\n"); + pr_notice("Re-enabled hotkeys\n"); } } while (hci_result != HCI_EMPTY); } @@ -883,13 +880,13 @@ static int __init toshiba_acpi_setup_keyboard(char *device) status = acpi_get_handle(NULL, device, &toshiba_acpi.handle); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Unable to get notification device\n"); + pr_info("Unable to get notification device\n"); return -ENODEV; } toshiba_acpi.hotkey_dev = input_allocate_device(); if (!toshiba_acpi.hotkey_dev) { - printk(MY_INFO "Unable to register input device\n"); + pr_info("Unable to register input device\n"); return -ENOMEM; } @@ -905,21 +902,21 @@ static int __init toshiba_acpi_setup_keyboard(char *device) status = acpi_install_notify_handler(toshiba_acpi.handle, ACPI_DEVICE_NOTIFY, toshiba_acpi_notify, NULL); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Unable to install hotkey notification\n"); + pr_info("Unable to install hotkey notification\n"); error = -ENODEV; goto err_free_keymap; } status = acpi_evaluate_object(toshiba_acpi.handle, "ENAB", NULL, NULL); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Unable to enable hotkeys\n"); + pr_info("Unable to enable hotkeys\n"); error = -ENODEV; goto err_remove_notify; } error = input_register_device(toshiba_acpi.hotkey_dev); if (error) { - printk(MY_INFO "Unable to register input device\n"); + pr_info("Unable to register input device\n"); goto err_remove_notify; } @@ -980,17 +977,17 @@ static int __init toshiba_acpi_init(void) if (is_valid_acpi_path(TOSH_INTERFACE_1 GHCI_METHOD)) { method_hci = TOSH_INTERFACE_1 GHCI_METHOD; if (toshiba_acpi_setup_keyboard(TOSH_INTERFACE_1)) - printk(MY_INFO "Unable to activate hotkeys\n"); + pr_info("Unable to activate hotkeys\n"); } else if (is_valid_acpi_path(TOSH_INTERFACE_2 GHCI_METHOD)) { method_hci = TOSH_INTERFACE_2 GHCI_METHOD; if (toshiba_acpi_setup_keyboard(TOSH_INTERFACE_2)) - printk(MY_INFO "Unable to activate hotkeys\n"); + pr_info("Unable to activate hotkeys\n"); } else return -ENODEV; - printk(MY_INFO "Toshiba Laptop ACPI Extras version %s\n", + pr_info("Toshiba Laptop ACPI Extras version %s\n", TOSHIBA_ACPI_VERSION); - printk(MY_INFO " HCI method: %s\n", method_hci); + pr_info(" HCI method: %s\n", method_hci); mutex_init(&toshiba_acpi.mutex); @@ -998,7 +995,7 @@ static int __init toshiba_acpi_init(void) -1, NULL, 0); if (IS_ERR(toshiba_acpi.p_dev)) { ret = PTR_ERR(toshiba_acpi.p_dev); - printk(MY_ERR "unable to register platform device\n"); + pr_err("unable to register platform device\n"); toshiba_acpi.p_dev = NULL; toshiba_acpi_exit(); return ret; @@ -1028,7 +1025,7 @@ static int __init toshiba_acpi_init(void) if (IS_ERR(toshiba_backlight_device)) { ret = PTR_ERR(toshiba_backlight_device); - printk(KERN_ERR "Could not register toshiba backlight device\n"); + pr_err("Could not register toshiba backlight device\n"); toshiba_backlight_device = NULL; toshiba_acpi_exit(); return ret; @@ -1042,14 +1039,14 @@ static int __init toshiba_acpi_init(void) &toshiba_rfk_ops, &toshiba_acpi); if (!toshiba_acpi.bt_rfk) { - printk(MY_ERR "unable to allocate rfkill device\n"); + pr_err("unable to allocate rfkill device\n"); toshiba_acpi_exit(); return -ENOMEM; } ret = rfkill_register(toshiba_acpi.bt_rfk); if (ret) { - printk(MY_ERR "unable to register rfkill device\n"); + pr_err("unable to register rfkill device\n"); rfkill_destroy(toshiba_acpi.bt_rfk); toshiba_acpi_exit(); return ret; diff --git a/drivers/platform/x86/toshiba_bluetooth.c b/drivers/platform/x86/toshiba_bluetooth.c index 94406861191..5fb7186694d 100644 --- a/drivers/platform/x86/toshiba_bluetooth.c +++ b/drivers/platform/x86/toshiba_bluetooth.c @@ -17,6 +17,8 @@ * delivered. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -70,14 +72,13 @@ static int toshiba_bluetooth_enable(acpi_handle handle) if (!(result & 0x01)) return 0; - printk(KERN_INFO "toshiba_bluetooth: Re-enabling Toshiba Bluetooth\n"); + pr_info("Re-enabling Toshiba Bluetooth\n"); res1 = acpi_evaluate_object(handle, "AUSB", NULL, NULL); res2 = acpi_evaluate_object(handle, "BTPO", NULL, NULL); if (!ACPI_FAILURE(res1) || !ACPI_FAILURE(res2)) return 0; - printk(KERN_WARNING "toshiba_bluetooth: Failed to re-enable " - "Toshiba Bluetooth\n"); + pr_warn("Failed to re-enable Toshiba Bluetooth\n"); return -ENODEV; } @@ -107,8 +108,8 @@ static int toshiba_bt_rfkill_add(struct acpi_device *device) &bt_present); if (!ACPI_FAILURE(status) && bt_present) { - printk(KERN_INFO "Detected Toshiba ACPI Bluetooth device - " - "installing RFKill handler\n"); + pr_info("Detected Toshiba ACPI Bluetooth device - " + "installing RFKill handler\n"); result = toshiba_bluetooth_enable(device->handle); } -- cgit v1.2.3-70-g09d2 From dd8e908e82964b8b493ef241707fb6109aa41cfd Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:53 -0700 Subject: wmi: Removed trailing whitespace from logging message. Just neatening. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/wmi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 05cc79672a8..f23d5a84e7b 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -486,16 +486,16 @@ static void wmi_dump_wdg(const struct guid_block *g) pr_info("\tnotify_id: %02X\n", g->notify_id); pr_info("\treserved: %02X\n", g->reserved); pr_info("\tinstance_count: %d\n", g->instance_count); - pr_info("\tflags: %#x ", g->flags); + pr_info("\tflags: %#x", g->flags); if (g->flags) { if (g->flags & ACPI_WMI_EXPENSIVE) - pr_cont("ACPI_WMI_EXPENSIVE "); + pr_cont(" ACPI_WMI_EXPENSIVE"); if (g->flags & ACPI_WMI_METHOD) - pr_cont("ACPI_WMI_METHOD "); + pr_cont(" ACPI_WMI_METHOD"); if (g->flags & ACPI_WMI_STRING) - pr_cont("ACPI_WMI_STRING "); + pr_cont(" ACPI_WMI_STRING"); if (g->flags & ACPI_WMI_EVENT) - pr_cont("ACPI_WMI_EVENT "); + pr_cont(" ACPI_WMI_EVENT"); } pr_cont("\n"); -- cgit v1.2.3-70-g09d2 From ad3f2f038fbd67f3341f760507aed90381114145 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Mar 2011 15:21:54 -0700 Subject: xo15-ebook: Use pr_ Use the current logging styles. Remove local #define PREFIX. Add pr_fmt. Convert printk to pr_. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/xo15-ebook.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/xo15-ebook.c b/drivers/platform/x86/xo15-ebook.c index c1372ed9d2e..fad153dc035 100644 --- a/drivers/platform/x86/xo15-ebook.c +++ b/drivers/platform/x86/xo15-ebook.c @@ -11,6 +11,8 @@ * your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -20,7 +22,6 @@ #include #define MODULE_NAME "xo15-ebook" -#define PREFIX MODULE_NAME ": " #define XO15_EBOOK_CLASS MODULE_NAME #define XO15_EBOOK_TYPE_UNKNOWN 0x00 @@ -105,7 +106,7 @@ static int ebook_switch_add(struct acpi_device *device) class = acpi_device_class(device); if (strcmp(hid, XO15_EBOOK_HID)) { - printk(KERN_ERR PREFIX "Unsupported hid [%s]\n", hid); + pr_err("Unsupported hid [%s]\n", hid); error = -ENODEV; goto err_free_input; } -- cgit v1.2.3-70-g09d2 From 253a0069d7a222f7ad55f2c40f541a7648920c77 Mon Sep 17 00:00:00 2001 From: Ameya Palande Date: Fri, 1 Apr 2011 16:54:11 +0300 Subject: platform-x86: intel_mid_thermal: Fix coding style Before fixing checkpatch.pl reported 74 errors and 234 warnings Signed-off-by: Ameya Palande Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_mid_thermal.c | 606 +++++++++++++++---------------- 1 file changed, 300 insertions(+), 306 deletions(-) diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c index c2f4bd8013b..f0776504521 100644 --- a/drivers/platform/x86/intel_mid_thermal.c +++ b/drivers/platform/x86/intel_mid_thermal.c @@ -37,49 +37,50 @@ #include /* Number of thermal sensors */ -#define MSIC_THERMAL_SENSORS 4 +#define MSIC_THERMAL_SENSORS 4 /* ADC1 - thermal registers */ -#define MSIC_THERM_ADC1CNTL1 0x1C0 -#define MSIC_ADC_ENBL 0x10 -#define MSIC_ADC_START 0x08 +#define MSIC_THERM_ADC1CNTL1 0x1C0 +#define MSIC_ADC_ENBL 0x10 +#define MSIC_ADC_START 0x08 -#define MSIC_THERM_ADC1CNTL3 0x1C2 -#define MSIC_ADCTHERM_ENBL 0x04 -#define MSIC_ADCRRDATA_ENBL 0x05 -#define MSIC_CHANL_MASK_VAL 0x0F +#define MSIC_THERM_ADC1CNTL3 0x1C2 +#define MSIC_ADCTHERM_ENBL 0x04 +#define MSIC_ADCRRDATA_ENBL 0x05 +#define MSIC_CHANL_MASK_VAL 0x0F -#define MSIC_STOPBIT_MASK 16 -#define MSIC_ADCTHERM_MASK 4 -#define ADC_CHANLS_MAX 15 /* Number of ADC channels */ -#define ADC_LOOP_MAX (ADC_CHANLS_MAX - MSIC_THERMAL_SENSORS) +#define MSIC_STOPBIT_MASK 16 +#define MSIC_ADCTHERM_MASK 4 +/* Number of ADC channels */ +#define ADC_CHANLS_MAX 15 +#define ADC_LOOP_MAX (ADC_CHANLS_MAX - MSIC_THERMAL_SENSORS) /* ADC channel code values */ -#define SKIN_SENSOR0_CODE 0x08 -#define SKIN_SENSOR1_CODE 0x09 -#define SYS_SENSOR_CODE 0x0A -#define MSIC_DIE_SENSOR_CODE 0x03 +#define SKIN_SENSOR0_CODE 0x08 +#define SKIN_SENSOR1_CODE 0x09 +#define SYS_SENSOR_CODE 0x0A +#define MSIC_DIE_SENSOR_CODE 0x03 -#define SKIN_THERM_SENSOR0 0 -#define SKIN_THERM_SENSOR1 1 -#define SYS_THERM_SENSOR2 2 -#define MSIC_DIE_THERM_SENSOR3 3 +#define SKIN_THERM_SENSOR0 0 +#define SKIN_THERM_SENSOR1 1 +#define SYS_THERM_SENSOR2 2 +#define MSIC_DIE_THERM_SENSOR3 3 /* ADC code range */ -#define ADC_MAX 977 -#define ADC_MIN 162 -#define ADC_VAL0C 887 -#define ADC_VAL20C 720 -#define ADC_VAL40C 508 -#define ADC_VAL60C 315 +#define ADC_MAX 977 +#define ADC_MIN 162 +#define ADC_VAL0C 887 +#define ADC_VAL20C 720 +#define ADC_VAL40C 508 +#define ADC_VAL60C 315 /* ADC base addresses */ -#define ADC_CHNL_START_ADDR 0x1C5 /* increments by 1 */ -#define ADC_DATA_START_ADDR 0x1D4 /* increments by 2 */ +#define ADC_CHNL_START_ADDR 0x1C5 /* increments by 1 */ +#define ADC_DATA_START_ADDR 0x1D4 /* increments by 2 */ /* MSIC die attributes */ -#define MSIC_DIE_ADC_MIN 488 -#define MSIC_DIE_ADC_MAX 1004 +#define MSIC_DIE_ADC_MIN 488 +#define MSIC_DIE_ADC_MAX 1004 /* This holds the address of the first free ADC channel, * among the 15 channels @@ -87,15 +88,15 @@ static int channel_index; struct platform_info { - struct platform_device *pdev; - struct thermal_zone_device *tzd[MSIC_THERMAL_SENSORS]; + struct platform_device *pdev; + struct thermal_zone_device *tzd[MSIC_THERMAL_SENSORS]; }; struct thermal_device_info { - unsigned int chnl_addr; - int direct; - /* This holds the current temperature in millidegree celsius */ - long curr_temp; + unsigned int chnl_addr; + int direct; + /* This holds the current temperature in millidegree celsius */ + long curr_temp; }; /** @@ -106,7 +107,7 @@ struct thermal_device_info { */ static int to_msic_die_temp(uint16_t adc_val) { - return (368 * (adc_val) / 1000) - 220; + return (368 * (adc_val) / 1000) - 220; } /** @@ -118,7 +119,7 @@ static int to_msic_die_temp(uint16_t adc_val) */ static int is_valid_adc(uint16_t adc_val, uint16_t min, uint16_t max) { - return (adc_val >= min) && (adc_val <= max); + return (adc_val >= min) && (adc_val <= max); } /** @@ -136,35 +137,35 @@ static int is_valid_adc(uint16_t adc_val, uint16_t min, uint16_t max) */ static int adc_to_temp(int direct, uint16_t adc_val, unsigned long *tp) { - int temp; - - /* Direct conversion for die temperature */ - if (direct) { - if (is_valid_adc(adc_val, MSIC_DIE_ADC_MIN, MSIC_DIE_ADC_MAX)) { - *tp = to_msic_die_temp(adc_val) * 1000; - return 0; - } - return -ERANGE; - } - - if (!is_valid_adc(adc_val, ADC_MIN, ADC_MAX)) - return -ERANGE; - - /* Linear approximation for skin temperature */ - if (adc_val > ADC_VAL0C) - temp = 177 - (adc_val/5); - else if ((adc_val <= ADC_VAL0C) && (adc_val > ADC_VAL20C)) - temp = 111 - (adc_val/8); - else if ((adc_val <= ADC_VAL20C) && (adc_val > ADC_VAL40C)) - temp = 92 - (adc_val/10); - else if ((adc_val <= ADC_VAL40C) && (adc_val > ADC_VAL60C)) - temp = 91 - (adc_val/10); - else - temp = 112 - (adc_val/6); - - /* Convert temperature in celsius to milli degree celsius */ - *tp = temp * 1000; - return 0; + int temp; + + /* Direct conversion for die temperature */ + if (direct) { + if (is_valid_adc(adc_val, MSIC_DIE_ADC_MIN, MSIC_DIE_ADC_MAX)) { + *tp = to_msic_die_temp(adc_val) * 1000; + return 0; + } + return -ERANGE; + } + + if (!is_valid_adc(adc_val, ADC_MIN, ADC_MAX)) + return -ERANGE; + + /* Linear approximation for skin temperature */ + if (adc_val > ADC_VAL0C) + temp = 177 - (adc_val/5); + else if ((adc_val <= ADC_VAL0C) && (adc_val > ADC_VAL20C)) + temp = 111 - (adc_val/8); + else if ((adc_val <= ADC_VAL20C) && (adc_val > ADC_VAL40C)) + temp = 92 - (adc_val/10); + else if ((adc_val <= ADC_VAL40C) && (adc_val > ADC_VAL60C)) + temp = 91 - (adc_val/10); + else + temp = 112 - (adc_val/6); + + /* Convert temperature in celsius to milli degree celsius */ + *tp = temp * 1000; + return 0; } /** @@ -178,47 +179,47 @@ static int adc_to_temp(int direct, uint16_t adc_val, unsigned long *tp) */ static int mid_read_temp(struct thermal_zone_device *tzd, unsigned long *temp) { - struct thermal_device_info *td_info = tzd->devdata; - uint16_t adc_val, addr; - uint8_t data = 0; - int ret; - unsigned long curr_temp; - - - addr = td_info->chnl_addr; - - /* Enable the msic for conversion before reading */ - ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCRRDATA_ENBL); - if (ret) - return ret; - - /* Re-toggle the RRDATARD bit (temporary workaround) */ - ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCTHERM_ENBL); - if (ret) - return ret; - - /* Read the higher bits of data */ - ret = intel_scu_ipc_ioread8(addr, &data); - if (ret) - return ret; - - /* Shift bits to accommodate the lower two data bits */ - adc_val = (data << 2); - addr++; - - ret = intel_scu_ipc_ioread8(addr, &data);/* Read lower bits */ - if (ret) - return ret; - - /* Adding lower two bits to the higher bits */ - data &= 03; - adc_val += data; - - /* Convert ADC value to temperature */ - ret = adc_to_temp(td_info->direct, adc_val, &curr_temp); - if (ret == 0) - *temp = td_info->curr_temp = curr_temp; - return ret; + struct thermal_device_info *td_info = tzd->devdata; + uint16_t adc_val, addr; + uint8_t data = 0; + int ret; + unsigned long curr_temp; + + + addr = td_info->chnl_addr; + + /* Enable the msic for conversion before reading */ + ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCRRDATA_ENBL); + if (ret) + return ret; + + /* Re-toggle the RRDATARD bit (temporary workaround) */ + ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCTHERM_ENBL); + if (ret) + return ret; + + /* Read the higher bits of data */ + ret = intel_scu_ipc_ioread8(addr, &data); + if (ret) + return ret; + + /* Shift bits to accommodate the lower two data bits */ + adc_val = (data << 2); + addr++; + + ret = intel_scu_ipc_ioread8(addr, &data);/* Read lower bits */ + if (ret) + return ret; + + /* Adding lower two bits to the higher bits */ + data &= 03; + adc_val += data; + + /* Convert ADC value to temperature */ + ret = adc_to_temp(td_info->direct, adc_val, &curr_temp); + if (ret == 0) + *temp = td_info->curr_temp = curr_temp; + return ret; } /** @@ -231,22 +232,21 @@ static int mid_read_temp(struct thermal_zone_device *tzd, unsigned long *temp) */ static int configure_adc(int val) { - int ret; - uint8_t data; - - ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data); - if (ret) - return ret; - - if (val) { - /* Enable and start the ADC */ - data |= (MSIC_ADC_ENBL | MSIC_ADC_START); - } else { - /* Just stop the ADC */ - data &= (~MSIC_ADC_START); - } - - return intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL1, data); + int ret; + uint8_t data; + + ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data); + if (ret) + return ret; + + if (val) { + /* Enable and start the ADC */ + data |= (MSIC_ADC_ENBL | MSIC_ADC_START); + } else { + /* Just stop the ADC */ + data &= (~MSIC_ADC_START); + } + return intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL1, data); } /** @@ -259,30 +259,30 @@ static int configure_adc(int val) */ static int set_up_therm_channel(u16 base_addr) { - int ret; - - /* Enable all the sensor channels */ - ret = intel_scu_ipc_iowrite8(base_addr, SKIN_SENSOR0_CODE); - if (ret) - return ret; - - ret = intel_scu_ipc_iowrite8(base_addr + 1, SKIN_SENSOR1_CODE); - if (ret) - return ret; - - ret = intel_scu_ipc_iowrite8(base_addr + 2, SYS_SENSOR_CODE); - if (ret) - return ret; - - /* Since this is the last channel, set the stop bit - to 1 by ORing the DIE_SENSOR_CODE with 0x10 */ - ret = intel_scu_ipc_iowrite8(base_addr + 3, - (MSIC_DIE_SENSOR_CODE | 0x10)); - if (ret) - return ret; - - /* Enable ADC and start it */ - return configure_adc(1); + int ret; + + /* Enable all the sensor channels */ + ret = intel_scu_ipc_iowrite8(base_addr, SKIN_SENSOR0_CODE); + if (ret) + return ret; + + ret = intel_scu_ipc_iowrite8(base_addr + 1, SKIN_SENSOR1_CODE); + if (ret) + return ret; + + ret = intel_scu_ipc_iowrite8(base_addr + 2, SYS_SENSOR_CODE); + if (ret) + return ret; + + /* Since this is the last channel, set the stop bit + * to 1 by ORing the DIE_SENSOR_CODE with 0x10 */ + ret = intel_scu_ipc_iowrite8(base_addr + 3, + (MSIC_DIE_SENSOR_CODE | 0x10)); + if (ret) + return ret; + + /* Enable ADC and start it */ + return configure_adc(1); } /** @@ -293,13 +293,13 @@ static int set_up_therm_channel(u16 base_addr) */ static int reset_stopbit(uint16_t addr) { - int ret; - uint8_t data; - ret = intel_scu_ipc_ioread8(addr, &data); - if (ret) - return ret; - /* Set the stop bit to zero */ - return intel_scu_ipc_iowrite8(addr, (data & 0xEF)); + int ret; + uint8_t data; + ret = intel_scu_ipc_ioread8(addr, &data); + if (ret) + return ret; + /* Set the stop bit to zero */ + return intel_scu_ipc_iowrite8(addr, (data & 0xEF)); } /** @@ -317,30 +317,30 @@ static int reset_stopbit(uint16_t addr) */ static int find_free_channel(void) { - int ret; - int i; - uint8_t data; - - /* check whether ADC is enabled */ - ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data); - if (ret) - return ret; - - if ((data & MSIC_ADC_ENBL) == 0) - return 0; - - /* ADC is already enabled; Looking for an empty channel */ - for (i = 0; i < ADC_CHANLS_MAX; i++) { - ret = intel_scu_ipc_ioread8(ADC_CHNL_START_ADDR + i, &data); - if (ret) - return ret; - - if (data & MSIC_STOPBIT_MASK) { - ret = i; - break; - } - } - return (ret > ADC_LOOP_MAX) ? (-EINVAL) : ret; + int ret; + int i; + uint8_t data; + + /* check whether ADC is enabled */ + ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data); + if (ret) + return ret; + + if ((data & MSIC_ADC_ENBL) == 0) + return 0; + + /* ADC is already enabled; Looking for an empty channel */ + for (i = 0; i < ADC_CHANLS_MAX; i++) { + ret = intel_scu_ipc_ioread8(ADC_CHNL_START_ADDR + i, &data); + if (ret) + return ret; + + if (data & MSIC_STOPBIT_MASK) { + ret = i; + break; + } + } + return (ret > ADC_LOOP_MAX) ? (-EINVAL) : ret; } /** @@ -351,48 +351,48 @@ static int find_free_channel(void) */ static int mid_initialize_adc(struct device *dev) { - u8 data; - u16 base_addr; - int ret; - - /* - * Ensure that adctherm is disabled before we - * initialize the ADC - */ - ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL3, &data); - if (ret) - return ret; - - if (data & MSIC_ADCTHERM_MASK) - dev_warn(dev, "ADCTHERM already set"); - - /* Index of the first channel in which the stop bit is set */ - channel_index = find_free_channel(); - if (channel_index < 0) { - dev_err(dev, "No free ADC channels"); - return channel_index; - } - - base_addr = ADC_CHNL_START_ADDR + channel_index; - - if (!(channel_index == 0 || channel_index == ADC_LOOP_MAX)) { - /* Reset stop bit for channels other than 0 and 12 */ - ret = reset_stopbit(base_addr); - if (ret) - return ret; - - /* Index of the first free channel */ - base_addr++; - channel_index++; - } - - ret = set_up_therm_channel(base_addr); - if (ret) { - dev_err(dev, "unable to enable ADC"); - return ret; - } - dev_dbg(dev, "ADC initialization successful"); - return ret; + u8 data; + u16 base_addr; + int ret; + + /* + * Ensure that adctherm is disabled before we + * initialize the ADC + */ + ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL3, &data); + if (ret) + return ret; + + if (data & MSIC_ADCTHERM_MASK) + dev_warn(dev, "ADCTHERM already set"); + + /* Index of the first channel in which the stop bit is set */ + channel_index = find_free_channel(); + if (channel_index < 0) { + dev_err(dev, "No free ADC channels"); + return channel_index; + } + + base_addr = ADC_CHNL_START_ADDR + channel_index; + + if (!(channel_index == 0 || channel_index == ADC_LOOP_MAX)) { + /* Reset stop bit for channels other than 0 and 12 */ + ret = reset_stopbit(base_addr); + if (ret) + return ret; + + /* Index of the first free channel */ + base_addr++; + channel_index++; + } + + ret = set_up_therm_channel(base_addr); + if (ret) { + dev_err(dev, "unable to enable ADC"); + return ret; + } + dev_dbg(dev, "ADC initialization successful"); + return ret; } /** @@ -403,18 +403,18 @@ static int mid_initialize_adc(struct device *dev) */ static struct thermal_device_info *initialize_sensor(int index) { - struct thermal_device_info *td_info = - kzalloc(sizeof(struct thermal_device_info), GFP_KERNEL); - - if (!td_info) - return NULL; - - /* Set the base addr of the channel for this sensor */ - td_info->chnl_addr = ADC_DATA_START_ADDR + 2 * (channel_index + index); - /* Sensor 3 is direct conversion */ - if (index == 3) - td_info->direct = 1; - return td_info; + struct thermal_device_info *td_info = + kzalloc(sizeof(struct thermal_device_info), GFP_KERNEL); + + if (!td_info) + return NULL; + + /* Set the base addr of the channel for this sensor */ + td_info->chnl_addr = ADC_DATA_START_ADDR + 2 * (channel_index + index); + /* Sensor 3 is direct conversion */ + if (index == 3) + td_info->direct = 1; + return td_info; } /** @@ -425,7 +425,7 @@ static struct thermal_device_info *initialize_sensor(int index) */ static int mid_thermal_resume(struct platform_device *pdev) { - return mid_initialize_adc(&pdev->dev); + return mid_initialize_adc(&pdev->dev); } /** @@ -437,12 +437,12 @@ static int mid_thermal_resume(struct platform_device *pdev) */ static int mid_thermal_suspend(struct platform_device *pdev, pm_message_t mesg) { - /* - * This just stops the ADC and does not disable it. - * temporary workaround until we have a generic ADC driver. - * If 0 is passed, it disables the ADC. - */ - return configure_adc(0); + /* + * This just stops the ADC and does not disable it. + * temporary workaround until we have a generic ADC driver. + * If 0 is passed, it disables the ADC. + */ + return configure_adc(0); } /** @@ -453,16 +453,15 @@ static int mid_thermal_suspend(struct platform_device *pdev, pm_message_t mesg) */ static int read_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp) { - WARN_ON(tzd == NULL); - return mid_read_temp(tzd, temp); + WARN_ON(tzd == NULL); + return mid_read_temp(tzd, temp); } /* Can't be const */ static struct thermal_zone_device_ops tzd_ops = { - .get_temp = read_curr_temp, + .get_temp = read_curr_temp, }; - /** * mid_thermal_probe - mfld thermal initialize * @pdev: platform device structure @@ -472,46 +471,45 @@ static struct thermal_zone_device_ops tzd_ops = { */ static int mid_thermal_probe(struct platform_device *pdev) { - static char *name[MSIC_THERMAL_SENSORS] = { - "skin0", "skin1", "sys", "msicdie" - }; - - int ret; - int i; - struct platform_info *pinfo; - - pinfo = kzalloc(sizeof(struct platform_info), GFP_KERNEL); - if (!pinfo) - return -ENOMEM; - - /* Initializing the hardware */ - ret = mid_initialize_adc(&pdev->dev); - if (ret) { - dev_err(&pdev->dev, "ADC init failed"); - kfree(pinfo); - return ret; - } - - /* Register each sensor with the generic thermal framework*/ - for (i = 0; i < MSIC_THERMAL_SENSORS; i++) { - pinfo->tzd[i] = thermal_zone_device_register(name[i], - 0, initialize_sensor(i), - &tzd_ops, 0, 0, 0, 0); - if (IS_ERR(pinfo->tzd[i])) - goto reg_fail; - } - - pinfo->pdev = pdev; - platform_set_drvdata(pdev, pinfo); - return 0; + static char *name[MSIC_THERMAL_SENSORS] = { + "skin0", "skin1", "sys", "msicdie" + }; + + int ret; + int i; + struct platform_info *pinfo; + + pinfo = kzalloc(sizeof(struct platform_info), GFP_KERNEL); + if (!pinfo) + return -ENOMEM; + + /* Initializing the hardware */ + ret = mid_initialize_adc(&pdev->dev); + if (ret) { + dev_err(&pdev->dev, "ADC init failed"); + kfree(pinfo); + return ret; + } + + /* Register each sensor with the generic thermal framework*/ + for (i = 0; i < MSIC_THERMAL_SENSORS; i++) { + pinfo->tzd[i] = thermal_zone_device_register(name[i], + 0, initialize_sensor(i), &tzd_ops, 0, 0, 0, 0); + if (IS_ERR(pinfo->tzd[i])) + goto reg_fail; + } + + pinfo->pdev = pdev; + platform_set_drvdata(pdev, pinfo); + return 0; reg_fail: - ret = PTR_ERR(pinfo->tzd[i]); - while (--i >= 0) - thermal_zone_device_unregister(pinfo->tzd[i]); - configure_adc(0); - kfree(pinfo); - return ret; + ret = PTR_ERR(pinfo->tzd[i]); + while (--i >= 0) + thermal_zone_device_unregister(pinfo->tzd[i]); + configure_adc(0); + kfree(pinfo); + return ret; } /** @@ -523,49 +521,45 @@ reg_fail: */ static int mid_thermal_remove(struct platform_device *pdev) { - int i; - struct platform_info *pinfo = platform_get_drvdata(pdev); + int i; + struct platform_info *pinfo = platform_get_drvdata(pdev); - for (i = 0; i < MSIC_THERMAL_SENSORS; i++) - thermal_zone_device_unregister(pinfo->tzd[i]); + for (i = 0; i < MSIC_THERMAL_SENSORS; i++) + thermal_zone_device_unregister(pinfo->tzd[i]); - platform_set_drvdata(pdev, NULL); + platform_set_drvdata(pdev, NULL); - /* Stop the ADC */ - return configure_adc(0); + /* Stop the ADC */ + return configure_adc(0); } -/********************************************************************* - * Driver initialisation and finalization - *********************************************************************/ - #define DRIVER_NAME "msic_sensor" static const struct platform_device_id therm_id_table[] = { - { DRIVER_NAME, 1 }, - { } + { DRIVER_NAME, 1 }, + { } }; static struct platform_driver mid_thermal_driver = { - .driver = { - .name = DRIVER_NAME, - .owner = THIS_MODULE, - }, - .probe = mid_thermal_probe, - .suspend = mid_thermal_suspend, - .resume = mid_thermal_resume, - .remove = __devexit_p(mid_thermal_remove), - .id_table = therm_id_table, + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, + .probe = mid_thermal_probe, + .suspend = mid_thermal_suspend, + .resume = mid_thermal_resume, + .remove = __devexit_p(mid_thermal_remove), + .id_table = therm_id_table, }; static int __init mid_thermal_module_init(void) { - return platform_driver_register(&mid_thermal_driver); + return platform_driver_register(&mid_thermal_driver); } static void __exit mid_thermal_module_exit(void) { - platform_driver_unregister(&mid_thermal_driver); + platform_driver_unregister(&mid_thermal_driver); } module_init(mid_thermal_module_init); -- cgit v1.2.3-70-g09d2 From 112a6ee053f9e9f014ab64f2549d3a25551aa349 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 4 Apr 2011 10:06:24 -0700 Subject: thinkpad_acpi: Correct !CONFIG_THINKPAD_ACPI_VIDEO warning Move TPACPI_HANDLE declaration into #ifdef block and neaten it a bit. Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/thinkpad_acpi.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 562fcf0dd2b..7693e89459f 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -535,15 +535,6 @@ TPACPI_HANDLE(hkey, ec, "\\_SB.HKEY", /* 600e/x, 770e, 770x */ "HKEY", /* all others */ ); /* 570 */ -TPACPI_HANDLE(vid, root, "\\_SB.PCI.AGP.VGA", /* 570 */ - "\\_SB.PCI0.AGP0.VID0", /* 600e/x, 770x */ - "\\_SB.PCI0.VID0", /* 770e */ - "\\_SB.PCI0.VID", /* A21e, G4x, R50e, X30, X40 */ - "\\_SB.PCI0.AGP.VGA", /* X100e and a few others */ - "\\_SB.PCI0.AGP.VID", /* all others */ - ); /* R30, R31 */ - - /************************************************************************* * ACPI helpers */ @@ -4444,6 +4435,15 @@ static int video_orig_autosw; static int video_autosw_get(void); static int video_autosw_set(int enable); +TPACPI_HANDLE(vid, root, + "\\_SB.PCI.AGP.VGA", /* 570 */ + "\\_SB.PCI0.AGP0.VID0", /* 600e/x, 770x */ + "\\_SB.PCI0.VID0", /* 770e */ + "\\_SB.PCI0.VID", /* A21e, G4x, R50e, X30, X40 */ + "\\_SB.PCI0.AGP.VGA", /* X100e and a few others */ + "\\_SB.PCI0.AGP.VID", /* all others */ + ); /* R30, R31 */ + TPACPI_HANDLE(vid2, root, "\\_SB.PCI0.AGPB.VID"); /* G41 */ static int __init video_init(struct ibm_init_struct *iibm) -- cgit v1.2.3-70-g09d2 From 0978e012cfbaca8bd312933e98cdea2d11778e11 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 4 Apr 2011 10:06:25 -0700 Subject: thinkpad_acpi: Convert printks to pr_ Add pr_fmt. Removed local TPACPI_ #defines, convert to pr_. Neaten dbg_ macros. Added a few missing newlines to logging messages. Added static inline str_supported for !CONFIG_THINKPAD_ACPI_DEBUG vdbg_printk defect reported by Sedat Dilek . Signed-off-by: Joe Perches Signed-off-by: Matthew Garrett --- drivers/platform/x86/thinkpad_acpi.c | 472 ++++++++++++++--------------------- 1 file changed, 188 insertions(+), 284 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 7693e89459f..77f6e707a2a 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -21,6 +21,8 @@ * 02110-1301, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #define TPACPI_VERSION "0.24" #define TPACPI_SYSFS_VERSION 0x020700 @@ -224,17 +226,6 @@ enum tpacpi_hkey_event_t { #define TPACPI_MAX_ACPI_ARGS 3 -/* printk headers */ -#define TPACPI_LOG TPACPI_FILE ": " -#define TPACPI_EMERG KERN_EMERG TPACPI_LOG -#define TPACPI_ALERT KERN_ALERT TPACPI_LOG -#define TPACPI_CRIT KERN_CRIT TPACPI_LOG -#define TPACPI_ERR KERN_ERR TPACPI_LOG -#define TPACPI_WARN KERN_WARNING TPACPI_LOG -#define TPACPI_NOTICE KERN_NOTICE TPACPI_LOG -#define TPACPI_INFO KERN_INFO TPACPI_LOG -#define TPACPI_DEBUG KERN_DEBUG TPACPI_LOG - /* Debugging printk groups */ #define TPACPI_DBG_ALL 0xffff #define TPACPI_DBG_DISCLOSETASK 0x8000 @@ -389,34 +380,36 @@ static int tpacpi_uwb_emulstate; * Debugging helpers */ -#define dbg_printk(a_dbg_level, format, arg...) \ - do { if (dbg_level & (a_dbg_level)) \ - printk(TPACPI_DEBUG "%s: " format, __func__ , ## arg); \ - } while (0) +#define dbg_printk(a_dbg_level, format, arg...) \ +do { \ + if (dbg_level & (a_dbg_level)) \ + printk(KERN_DEBUG pr_fmt("%s: " format), \ + __func__, ##arg); \ +} while (0) #ifdef CONFIG_THINKPAD_ACPI_DEBUG #define vdbg_printk dbg_printk static const char *str_supported(int is_supported); #else -#define vdbg_printk(a_dbg_level, format, arg...) \ - do { } while (0) +static inline const char *str_supported(int is_supported) { return ""; } +#define vdbg_printk(a_dbg_level, format, arg...) \ + no_printk(format, ##arg) #endif static void tpacpi_log_usertask(const char * const what) { - printk(TPACPI_DEBUG "%s: access by process with PID %d\n", - what, task_tgid_vnr(current)); + printk(KERN_DEBUG pr_fmt("%s: access by process with PID %d\n"), + what, task_tgid_vnr(current)); } -#define tpacpi_disclose_usertask(what, format, arg...) \ - do { \ - if (unlikely( \ - (dbg_level & TPACPI_DBG_DISCLOSETASK) && \ - (tpacpi_lifecycle == TPACPI_LIFE_RUNNING))) { \ - printk(TPACPI_DEBUG "%s: PID %d: " format, \ - what, task_tgid_vnr(current), ## arg); \ - } \ - } while (0) +#define tpacpi_disclose_usertask(what, format, arg...) \ +do { \ + if (unlikely((dbg_level & TPACPI_DBG_DISCLOSETASK) && \ + (tpacpi_lifecycle == TPACPI_LIFE_RUNNING))) { \ + printk(KERN_DEBUG pr_fmt("%s: PID %d: " format), \ + what, task_tgid_vnr(current), ## arg); \ + } \ +} while (0) /* * Quirk handling helpers @@ -554,7 +547,7 @@ static int acpi_evalf(acpi_handle handle, int quiet; if (!*fmt) { - printk(TPACPI_ERR "acpi_evalf() called with empty format\n"); + pr_err("acpi_evalf() called with empty format\n"); return 0; } @@ -579,7 +572,7 @@ static int acpi_evalf(acpi_handle handle, break; /* add more types as needed */ default: - printk(TPACPI_ERR "acpi_evalf() called " + pr_err("acpi_evalf() called " "with invalid format character '%c'\n", c); va_end(ap); return 0; @@ -608,13 +601,13 @@ static int acpi_evalf(acpi_handle handle, break; /* add more types as needed */ default: - printk(TPACPI_ERR "acpi_evalf() called " + pr_err("acpi_evalf() called " "with invalid format character '%c'\n", res_type); return 0; } if (!success && !quiet) - printk(TPACPI_ERR "acpi_evalf(%s, %s, ...) failed: %s\n", + pr_err("acpi_evalf(%s, %s, ...) failed: %s\n", method, fmt0, acpi_format_exception(status)); return success; @@ -758,8 +751,7 @@ static int __init setup_acpi_notify(struct ibm_struct *ibm) rc = acpi_bus_get_device(*ibm->acpi->handle, &ibm->acpi->device); if (rc < 0) { - printk(TPACPI_ERR "acpi_bus_get_device(%s) failed: %d\n", - ibm->name, rc); + pr_err("acpi_bus_get_device(%s) failed: %d\n", ibm->name, rc); return -ENODEV; } @@ -772,12 +764,10 @@ static int __init setup_acpi_notify(struct ibm_struct *ibm) ibm->acpi->type, dispatch_acpi_notify, ibm); if (ACPI_FAILURE(status)) { if (status == AE_ALREADY_EXISTS) { - printk(TPACPI_NOTICE - "another device driver is already " - "handling %s events\n", ibm->name); + pr_notice("another device driver is already " + "handling %s events\n", ibm->name); } else { - printk(TPACPI_ERR - "acpi_install_notify_handler(%s) failed: %s\n", + pr_err("acpi_install_notify_handler(%s) failed: %s\n", ibm->name, acpi_format_exception(status)); } return -ENODEV; @@ -802,8 +792,7 @@ static int __init register_tpacpi_subdriver(struct ibm_struct *ibm) ibm->acpi->driver = kzalloc(sizeof(struct acpi_driver), GFP_KERNEL); if (!ibm->acpi->driver) { - printk(TPACPI_ERR - "failed to allocate memory for ibm->acpi->driver\n"); + pr_err("failed to allocate memory for ibm->acpi->driver\n"); return -ENOMEM; } @@ -814,7 +803,7 @@ static int __init register_tpacpi_subdriver(struct ibm_struct *ibm) rc = acpi_bus_register_driver(ibm->acpi->driver); if (rc < 0) { - printk(TPACPI_ERR "acpi_bus_register_driver(%s) failed: %d\n", + pr_err("acpi_bus_register_driver(%s) failed: %d\n", ibm->name, rc); kfree(ibm->acpi->driver); ibm->acpi->driver = NULL; @@ -1072,15 +1061,14 @@ static int parse_strtoul(const char *buf, static void tpacpi_disable_brightness_delay(void) { if (acpi_evalf(hkey_handle, NULL, "PWMS", "qvd", 0)) - printk(TPACPI_NOTICE - "ACPI backlight control delay disabled\n"); + pr_notice("ACPI backlight control delay disabled\n"); } static void printk_deprecated_attribute(const char * const what, const char * const details) { tpacpi_log_usertask("deprecated sysfs attribute"); - printk(TPACPI_WARN "WARNING: sysfs attribute %s is deprecated and " + pr_warn("WARNING: sysfs attribute %s is deprecated and " "will be removed. %s\n", what, details); } @@ -1255,8 +1243,7 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, &tpacpi_rfk_rfkill_ops, atp_rfk); if (!atp_rfk || !atp_rfk->rfkill) { - printk(TPACPI_ERR - "failed to allocate memory for rfkill class\n"); + pr_err("failed to allocate memory for rfkill class\n"); kfree(atp_rfk); return -ENOMEM; } @@ -1266,9 +1253,8 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, sw_status = (tp_rfkops->get_status)(); if (sw_status < 0) { - printk(TPACPI_ERR - "failed to read initial state for %s, error %d\n", - name, sw_status); + pr_err("failed to read initial state for %s, error %d\n", + name, sw_status); } else { sw_state = (sw_status == TPACPI_RFK_RADIO_OFF); if (set_default) { @@ -1282,9 +1268,7 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, res = rfkill_register(atp_rfk->rfkill); if (res < 0) { - printk(TPACPI_ERR - "failed to register %s rfkill switch: %d\n", - name, res); + pr_err("failed to register %s rfkill switch: %d\n", name, res); rfkill_destroy(atp_rfk->rfkill); kfree(atp_rfk); return res; @@ -1292,7 +1276,7 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, tpacpi_rfkill_switches[id] = atp_rfk; - printk(TPACPI_INFO "rfkill switch %s: radio is %sblocked\n", + pr_info("rfkill switch %s: radio is %sblocked\n", name, (sw_state || hw_state) ? "" : "un"); return 0; } @@ -1816,10 +1800,8 @@ static void __init tpacpi_check_outdated_fw(void) * broken, or really stable to begin with, so it is * best if the user upgrades the firmware anyway. */ - printk(TPACPI_WARN - "WARNING: Outdated ThinkPad BIOS/EC firmware\n"); - printk(TPACPI_WARN - "WARNING: This firmware may be missing critical bug " + pr_warn("WARNING: Outdated ThinkPad BIOS/EC firmware\n"); + pr_warn("WARNING: This firmware may be missing critical bug " "fixes and/or important features\n"); } } @@ -2108,9 +2090,7 @@ void static hotkey_mask_warn_incomplete_mask(void) (hotkey_all_mask | TPACPI_HKEY_NVRAM_KNOWN_MASK); if (wantedmask) - printk(TPACPI_NOTICE - "required events 0x%08x not enabled!\n", - wantedmask); + pr_notice("required events 0x%08x not enabled!\n", wantedmask); } /* @@ -2148,10 +2128,9 @@ static int hotkey_mask_set(u32 mask) * a given event. */ if (!hotkey_mask_get() && !rc && (fwmask & ~hotkey_acpi_mask)) { - printk(TPACPI_NOTICE - "asked for hotkey mask 0x%08x, but " - "firmware forced it to 0x%08x\n", - fwmask, hotkey_acpi_mask); + pr_notice("asked for hotkey mask 0x%08x, but " + "firmware forced it to 0x%08x\n", + fwmask, hotkey_acpi_mask); } if (tpacpi_lifecycle != TPACPI_LIFE_EXITING) @@ -2175,13 +2154,11 @@ static int hotkey_user_mask_set(const u32 mask) (mask == 0xffff || mask == 0xffffff || mask == 0xffffffff)) { tp_warned.hotkey_mask_ff = 1; - printk(TPACPI_NOTICE - "setting the hotkey mask to 0x%08x is likely " - "not the best way to go about it\n", mask); - printk(TPACPI_NOTICE - "please consider using the driver defaults, " - "and refer to up-to-date thinkpad-acpi " - "documentation\n"); + pr_notice("setting the hotkey mask to 0x%08x is likely " + "not the best way to go about it\n", mask); + pr_notice("please consider using the driver defaults, " + "and refer to up-to-date thinkpad-acpi " + "documentation\n"); } /* Try to enable what the user asked for, plus whatever we need. @@ -2565,8 +2542,7 @@ static void hotkey_poll_setup(const bool may_warn) NULL, TPACPI_NVRAM_KTHREAD_NAME); if (IS_ERR(tpacpi_hotkey_task)) { tpacpi_hotkey_task = NULL; - printk(TPACPI_ERR - "could not create kernel thread " + pr_err("could not create kernel thread " "for hotkey polling\n"); } } @@ -2574,11 +2550,10 @@ static void hotkey_poll_setup(const bool may_warn) hotkey_poll_stop_sync(); if (may_warn && (poll_driver_mask || poll_user_mask) && hotkey_poll_freq == 0) { - printk(TPACPI_NOTICE - "hot keys 0x%08x and/or events 0x%08x " - "require polling, which is currently " - "disabled\n", - poll_user_mask, poll_driver_mask); + pr_notice("hot keys 0x%08x and/or events 0x%08x " + "require polling, which is currently " + "disabled\n", + poll_user_mask, poll_driver_mask); } } } @@ -2802,13 +2777,13 @@ static ssize_t hotkey_source_mask_store(struct device *dev, mutex_unlock(&hotkey_mutex); if (rc < 0) - printk(TPACPI_ERR "hotkey_source_mask: failed to update the" - "firmware event mask!\n"); + pr_err("hotkey_source_mask: " + "failed to update the firmware event mask!\n"); if (r_ev) - printk(TPACPI_NOTICE "hotkey_source_mask: " - "some important events were disabled: " - "0x%04x\n", r_ev); + pr_notice("hotkey_source_mask: " + "some important events were disabled: 0x%04x\n", + r_ev); tpacpi_disclose_usertask("hotkey_source_mask", "set to 0x%08lx\n", t); @@ -3039,8 +3014,7 @@ static void hotkey_exit(void) if (((tp_features.hotkey_mask && hotkey_mask_set(hotkey_orig_mask)) | hotkey_status_set(false)) != 0) - printk(TPACPI_ERR - "failed to restore hot key mask " + pr_err("failed to restore hot key mask " "to BIOS defaults\n"); } @@ -3279,10 +3253,9 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) for HKEY interface version 0x100 */ if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) { if ((hkeyv >> 8) != 1) { - printk(TPACPI_ERR "unknown version of the " - "HKEY interface: 0x%x\n", hkeyv); - printk(TPACPI_ERR "please report this to %s\n", - TPACPI_MAIL); + pr_err("unknown version of the HKEY interface: 0x%x\n", + hkeyv); + pr_err("please report this to %s\n", TPACPI_MAIL); } else { /* * MHKV 0x100 in A31, R40, R40e, @@ -3295,8 +3268,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) /* Paranoia check AND init hotkey_all_mask */ if (!acpi_evalf(hkey_handle, &hotkey_all_mask, "MHKA", "qd")) { - printk(TPACPI_ERR - "missing MHKA handler, " + pr_err("missing MHKA handler, " "please report this to %s\n", TPACPI_MAIL); /* Fallback: pre-init for FN+F3,F4,F12 */ @@ -3334,16 +3306,14 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) if (dbg_wlswemul) { tp_features.hotkey_wlsw = 1; radiosw_state = !!tpacpi_wlsw_emulstate; - printk(TPACPI_INFO - "radio switch emulation enabled\n"); + pr_info("radio switch emulation enabled\n"); } else #endif /* Not all thinkpads have a hardware radio switch */ if (acpi_evalf(hkey_handle, &status, "WLSW", "qd")) { tp_features.hotkey_wlsw = 1; radiosw_state = !!status; - printk(TPACPI_INFO - "radio switch found; radios are %s\n", + pr_info("radio switch found; radios are %s\n", enabled(status, 0)); } if (tp_features.hotkey_wlsw) @@ -3354,8 +3324,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) if (!res && acpi_evalf(hkey_handle, &status, "MHKG", "qd")) { tp_features.hotkey_tablet = 1; tabletsw_state = !!(status & TP_HOTKEY_TABLET_MASK); - printk(TPACPI_INFO - "possible tablet mode switch found; " + pr_info("possible tablet mode switch found; " "ThinkPad in %s mode\n", (tabletsw_state) ? "tablet" : "laptop"); res = add_to_attr_set(hotkey_dev_attributes, @@ -3373,8 +3342,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) hotkey_keycode_map = kmalloc(TPACPI_HOTKEY_MAP_SIZE, GFP_KERNEL); if (!hotkey_keycode_map) { - printk(TPACPI_ERR - "failed to allocate memory for key map\n"); + pr_err("failed to allocate memory for key map\n"); res = -ENOMEM; goto err_exit; } @@ -3417,13 +3385,11 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) * userspace. tpacpi_detect_brightness_capabilities() must have * been called before this point */ if (tp_features.bright_acpimode && acpi_video_backlight_support()) { - printk(TPACPI_INFO - "This ThinkPad has standard ACPI backlight " - "brightness control, supported by the ACPI " - "video driver\n"); - printk(TPACPI_NOTICE - "Disabling thinkpad-acpi brightness events " - "by default...\n"); + pr_info("This ThinkPad has standard ACPI backlight " + "brightness control, supported by the ACPI " + "video driver\n"); + pr_notice("Disabling thinkpad-acpi brightness events " + "by default...\n"); /* Disable brightness up/down on Lenovo thinkpads when * ACPI is handling them, otherwise it is plain impossible @@ -3530,8 +3496,7 @@ static bool hotkey_notify_wakeup(const u32 hkey, case TP_HKEY_EV_WKUP_S3_BATLOW: /* Battery on critical low level/S3 */ case TP_HKEY_EV_WKUP_S4_BATLOW: /* Battery on critical low level/S4 */ - printk(TPACPI_ALERT - "EMERGENCY WAKEUP: battery almost empty\n"); + pr_alert("EMERGENCY WAKEUP: battery almost empty\n"); /* how to auto-heal: */ /* 2313: woke up from S3, go to S4/S5 */ /* 2413: woke up from S4, go to S5 */ @@ -3542,9 +3507,7 @@ static bool hotkey_notify_wakeup(const u32 hkey, } if (hotkey_wakeup_reason != TP_ACPI_WAKEUP_NONE) { - printk(TPACPI_INFO - "woke up due to a hot-unplug " - "request...\n"); + pr_info("woke up due to a hot-unplug request...\n"); hotkey_wakeup_reason_notify_change(); } return true; @@ -3596,37 +3559,31 @@ static bool hotkey_notify_thermal(const u32 hkey, switch (hkey) { case TP_HKEY_EV_THM_TABLE_CHANGED: - printk(TPACPI_INFO - "EC reports that Thermal Table has changed\n"); + pr_info("EC reports that Thermal Table has changed\n"); /* recommended action: do nothing, we don't have * Lenovo ATM information */ return true; case TP_HKEY_EV_ALARM_BAT_HOT: - printk(TPACPI_CRIT - "THERMAL ALARM: battery is too hot!\n"); + pr_crit("THERMAL ALARM: battery is too hot!\n"); /* recommended action: warn user through gui */ break; case TP_HKEY_EV_ALARM_BAT_XHOT: - printk(TPACPI_ALERT - "THERMAL EMERGENCY: battery is extremely hot!\n"); + pr_alert("THERMAL EMERGENCY: battery is extremely hot!\n"); /* recommended action: immediate sleep/hibernate */ break; case TP_HKEY_EV_ALARM_SENSOR_HOT: - printk(TPACPI_CRIT - "THERMAL ALARM: " + pr_crit("THERMAL ALARM: " "a sensor reports something is too hot!\n"); /* recommended action: warn user through gui, that */ /* some internal component is too hot */ break; case TP_HKEY_EV_ALARM_SENSOR_XHOT: - printk(TPACPI_ALERT - "THERMAL EMERGENCY: " - "a sensor reports something is extremely hot!\n"); + pr_alert("THERMAL EMERGENCY: " + "a sensor reports something is extremely hot!\n"); /* recommended action: immediate sleep/hibernate */ break; default: - printk(TPACPI_ALERT - "THERMAL ALERT: unknown thermal alarm received\n"); + pr_alert("THERMAL ALERT: unknown thermal alarm received\n"); known = false; } @@ -3643,8 +3600,7 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) bool known_ev; if (event != 0x80) { - printk(TPACPI_ERR - "unknown HKEY notification event %d\n", event); + pr_err("unknown HKEY notification event %d\n", event); /* forward it to userspace, maybe it knows how to handle it */ acpi_bus_generate_netlink_event( ibm->acpi->device->pnp.device_class, @@ -3655,7 +3611,7 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) while (1) { if (!acpi_evalf(hkey_handle, &hkey, "MHKP", "d")) { - printk(TPACPI_ERR "failed to retrieve HKEY event\n"); + pr_err("failed to retrieve HKEY event\n"); return; } @@ -3683,8 +3639,7 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) switch (hkey) { case TP_HKEY_EV_BAYEJ_ACK: hotkey_autosleep_ack = 1; - printk(TPACPI_INFO - "bay ejected\n"); + pr_info("bay ejected\n"); hotkey_wakeup_hotunplug_complete_notify_change(); known_ev = true; break; @@ -3700,8 +3655,7 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) /* 0x4000-0x4FFF: dock-related wakeups */ if (hkey == TP_HKEY_EV_UNDOCK_ACK) { hotkey_autosleep_ack = 1; - printk(TPACPI_INFO - "undocked\n"); + pr_info("undocked\n"); hotkey_wakeup_hotunplug_complete_notify_change(); known_ev = true; } else { @@ -3732,11 +3686,9 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) known_ev = false; } if (!known_ev) { - printk(TPACPI_NOTICE - "unhandled HKEY event 0x%04x\n", hkey); - printk(TPACPI_NOTICE - "please report the conditions when this " - "event happened to %s\n", TPACPI_MAIL); + pr_notice("unhandled HKEY event 0x%04x\n", hkey); + pr_notice("please report the conditions when this " + "event happened to %s\n", TPACPI_MAIL); } /* Legacy events */ @@ -3769,8 +3721,7 @@ static void hotkey_resume(void) if (hotkey_status_set(true) < 0 || hotkey_mask_set(hotkey_acpi_mask) < 0) - printk(TPACPI_ERR - "error while attempting to reset the event " + pr_err("error while attempting to reset the event " "firmware interface\n"); tpacpi_send_radiosw_update(); @@ -3815,14 +3766,12 @@ static void hotkey_enabledisable_warn(bool enable) { tpacpi_log_usertask("procfs hotkey enable/disable"); if (!WARN((tpacpi_lifecycle == TPACPI_LIFE_RUNNING || !enable), - TPACPI_WARN - "hotkey enable/disable functionality has been " - "removed from the driver. Hotkeys are always " - "enabled\n")) - printk(TPACPI_ERR - "Please remove the hotkey=enable module " - "parameter, it is deprecated. Hotkeys are always " - "enabled\n"); + pr_fmt("hotkey enable/disable functionality has been " + "removed from the driver. " + "Hotkeys are always enabled.\n"))) + pr_err("Please remove the hotkey=enable module " + "parameter, it is deprecated. " + "Hotkeys are always enabled.\n"); } static int hotkey_write(char *buf) @@ -4002,8 +3951,7 @@ static void bluetooth_shutdown(void) /* Order firmware to save current state to NVRAM */ if (!acpi_evalf(NULL, NULL, "\\BLTH", "vd", TP_ACPI_BLTH_SAVE_STATE)) - printk(TPACPI_NOTICE - "failed to save bluetooth state to NVRAM\n"); + pr_notice("failed to save bluetooth state to NVRAM\n"); else vdbg_printk(TPACPI_DBG_RFKILL, "bluestooth state saved to NVRAM\n"); @@ -4042,8 +3990,7 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm) #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_bluetoothemul) { tp_features.bluetooth = 1; - printk(TPACPI_INFO - "bluetooth switch emulation enabled\n"); + pr_info("bluetooth switch emulation enabled\n"); } else #endif if (tp_features.bluetooth && @@ -4194,8 +4141,7 @@ static void wan_shutdown(void) /* Order firmware to save current state to NVRAM */ if (!acpi_evalf(NULL, NULL, "\\WGSV", "vd", TP_ACPI_WGSV_SAVE_STATE)) - printk(TPACPI_NOTICE - "failed to save WWAN state to NVRAM\n"); + pr_notice("failed to save WWAN state to NVRAM\n"); else vdbg_printk(TPACPI_DBG_RFKILL, "WWAN state saved to NVRAM\n"); @@ -4232,8 +4178,7 @@ static int __init wan_init(struct ibm_init_struct *iibm) #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_wwanemul) { tp_features.wan = 1; - printk(TPACPI_INFO - "wwan switch emulation enabled\n"); + pr_info("wwan switch emulation enabled\n"); } else #endif if (tp_features.wan && @@ -4373,8 +4318,7 @@ static int __init uwb_init(struct ibm_init_struct *iibm) #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_uwbemul) { tp_features.uwb = 1; - printk(TPACPI_INFO - "uwb switch emulation enabled\n"); + pr_info("uwb switch emulation enabled\n"); } else #endif if (tp_features.uwb && @@ -4487,7 +4431,7 @@ static void video_exit(void) dbg_printk(TPACPI_DBG_EXIT, "restoring original video autoswitch mode\n"); if (video_autosw_set(video_orig_autosw)) - printk(TPACPI_ERR "error while trying to restore original " + pr_err("error while trying to restore original " "video autoswitch mode\n"); } @@ -4560,8 +4504,7 @@ static int video_outputsw_set(int status) res = acpi_evalf(vid_handle, NULL, "ASWT", "vdd", status * 0x100, 0); if (!autosw && video_autosw_set(autosw)) { - printk(TPACPI_ERR - "video auto-switch left enabled due to error\n"); + pr_err("video auto-switch left enabled due to error\n"); return -EIO; } break; @@ -4630,8 +4573,7 @@ static int video_outputsw_cycle(void) return -ENOSYS; } if (!autosw && video_autosw_set(autosw)) { - printk(TPACPI_ERR - "video auto-switch left enabled due to error\n"); + pr_err("video auto-switch left enabled due to error\n"); return -EIO; } @@ -5348,7 +5290,7 @@ static int __init led_init(struct ibm_init_struct *iibm) tpacpi_leds = kzalloc(sizeof(*tpacpi_leds) * TPACPI_LED_NUMLEDS, GFP_KERNEL); if (!tpacpi_leds) { - printk(TPACPI_ERR "Out of memory for LED data\n"); + pr_err("Out of memory for LED data\n"); return -ENOMEM; } @@ -5367,9 +5309,8 @@ static int __init led_init(struct ibm_init_struct *iibm) } #ifdef CONFIG_THINKPAD_ACPI_UNSAFE_LEDS - printk(TPACPI_NOTICE - "warning: userspace override of important " - "firmware LEDs is enabled\n"); + pr_notice("warning: userspace override of important " + "firmware LEDs is enabled\n"); #endif return 0; } @@ -5639,17 +5580,16 @@ static void thermal_dump_all_sensors(void) if (n <= 0) return; - printk(TPACPI_NOTICE - "temperatures (Celsius):"); + pr_notice("temperatures (Celsius):"); for (i = 0; i < n; i++) { if (t.temp[i] != TPACPI_THERMAL_SENSOR_NA) - printk(KERN_CONT " %d", (int)(t.temp[i] / 1000)); + pr_cont(" %d", (int)(t.temp[i] / 1000)); else - printk(KERN_CONT " N/A"); + pr_cont(" N/A"); } - printk(KERN_CONT "\n"); + pr_cont("\n"); } /* sysfs temp##_input -------------------------------------------------- */ @@ -5769,14 +5709,12 @@ static int __init thermal_init(struct ibm_init_struct *iibm) if (ta1 == 0) { /* This is sheer paranoia, but we handle it anyway */ if (acpi_tmp7) { - printk(TPACPI_ERR - "ThinkPad ACPI EC access misbehaving, " + pr_err("ThinkPad ACPI EC access misbehaving, " "falling back to ACPI TMPx access " "mode\n"); thermal_read_mode = TPACPI_THERMAL_ACPI_TMP07; } else { - printk(TPACPI_ERR - "ThinkPad ACPI EC access misbehaving, " + pr_err("ThinkPad ACPI EC access misbehaving, " "disabling thermal sensors access\n"); thermal_read_mode = TPACPI_THERMAL_NONE; } @@ -6129,8 +6067,8 @@ static int __init tpacpi_query_bcl_levels(acpi_handle handle) if (ACPI_SUCCESS(acpi_evaluate_object(handle, "_BCL", NULL, &buffer))) { obj = (union acpi_object *)buffer.pointer; if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) { - printk(TPACPI_ERR "Unknown _BCL data, " - "please report this to %s\n", TPACPI_MAIL); + pr_err("Unknown _BCL data, please report this to %s\n", + TPACPI_MAIL); rc = 0; } else { rc = obj->package.count; @@ -6214,18 +6152,15 @@ static void __init tpacpi_detect_brightness_capabilities(void) switch (b) { case 16: bright_maxlvl = 15; - printk(TPACPI_INFO - "detected a 16-level brightness capable ThinkPad\n"); + pr_info("detected a 16-level brightness capable ThinkPad\n"); break; case 8: case 0: bright_maxlvl = 7; - printk(TPACPI_INFO - "detected a 8-level brightness capable ThinkPad\n"); + pr_info("detected a 8-level brightness capable ThinkPad\n"); break; default: - printk(TPACPI_ERR - "Unsupported brightness interface, " + pr_err("Unsupported brightness interface, " "please contact %s\n", TPACPI_MAIL); tp_features.bright_unkfw = 1; bright_maxlvl = b - 1; @@ -6260,22 +6195,19 @@ static int __init brightness_init(struct ibm_init_struct *iibm) if (acpi_video_backlight_support()) { if (brightness_enable > 1) { - printk(TPACPI_INFO - "Standard ACPI backlight interface " - "available, not loading native one.\n"); + pr_info("Standard ACPI backlight interface " + "available, not loading native one\n"); return 1; } else if (brightness_enable == 1) { - printk(TPACPI_WARN - "Cannot enable backlight brightness support, " + pr_warn("Cannot enable backlight brightness support, " "ACPI is already handling it. Refer to the " - "acpi_backlight kernel parameter\n"); + "acpi_backlight kernel parameter.\n"); return 1; } } else if (tp_features.bright_acpimode && brightness_enable > 1) { - printk(TPACPI_NOTICE - "Standard ACPI backlight interface not " - "available, thinkpad_acpi native " - "brightness control enabled\n"); + pr_notice("Standard ACPI backlight interface not " + "available, thinkpad_acpi native " + "brightness control enabled\n"); } /* @@ -6319,19 +6251,17 @@ static int __init brightness_init(struct ibm_init_struct *iibm) if (IS_ERR(ibm_backlight_device)) { int rc = PTR_ERR(ibm_backlight_device); ibm_backlight_device = NULL; - printk(TPACPI_ERR "Could not register backlight device\n"); + pr_err("Could not register backlight device\n"); return rc; } vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_BRGHT, "brightness is supported\n"); if (quirks & TPACPI_BRGHT_Q_ASK) { - printk(TPACPI_NOTICE - "brightness: will use unverified default: " - "brightness_mode=%d\n", brightness_mode); - printk(TPACPI_NOTICE - "brightness: please report to %s whether it works well " - "or not on your ThinkPad\n", TPACPI_MAIL); + pr_notice("brightness: will use unverified default: " + "brightness_mode=%d\n", brightness_mode); + pr_notice("brightness: please report to %s whether it works well " + "or not on your ThinkPad\n", TPACPI_MAIL); } /* Added by mistake in early 2007. Probably useless, but it could @@ -6804,8 +6734,7 @@ static int __init volume_create_alsa_mixer(void) rc = snd_card_create(alsa_index, alsa_id, THIS_MODULE, sizeof(struct tpacpi_alsa_data), &card); if (rc < 0 || !card) { - printk(TPACPI_ERR - "Failed to create ALSA card structures: %d\n", rc); + pr_err("Failed to create ALSA card structures: %d\n", rc); return 1; } @@ -6839,9 +6768,8 @@ static int __init volume_create_alsa_mixer(void) ctl_vol = snd_ctl_new1(&volume_alsa_control_vol, NULL); rc = snd_ctl_add(card, ctl_vol); if (rc < 0) { - printk(TPACPI_ERR - "Failed to create ALSA volume control: %d\n", - rc); + pr_err("Failed to create ALSA volume control: %d\n", + rc); goto err_exit; } data->ctl_vol_id = &ctl_vol->id; @@ -6850,8 +6778,7 @@ static int __init volume_create_alsa_mixer(void) ctl_mute = snd_ctl_new1(&volume_alsa_control_mute, NULL); rc = snd_ctl_add(card, ctl_mute); if (rc < 0) { - printk(TPACPI_ERR "Failed to create ALSA mute control: %d\n", - rc); + pr_err("Failed to create ALSA mute control: %d\n", rc); goto err_exit; } data->ctl_mute_id = &ctl_mute->id; @@ -6859,7 +6786,7 @@ static int __init volume_create_alsa_mixer(void) snd_card_set_dev(card, &tpacpi_pdev->dev); rc = snd_card_register(card); if (rc < 0) { - printk(TPACPI_ERR "Failed to register ALSA card: %d\n", rc); + pr_err("Failed to register ALSA card: %d\n", rc); goto err_exit; } @@ -6915,9 +6842,8 @@ static int __init volume_init(struct ibm_init_struct *iibm) return -EINVAL; if (volume_mode == TPACPI_VOL_MODE_UCMS_STEP) { - printk(TPACPI_ERR - "UCMS step volume mode not implemented, " - "please contact %s\n", TPACPI_MAIL); + pr_err("UCMS step volume mode not implemented, " + "please contact %s\n", TPACPI_MAIL); return 1; } @@ -6981,13 +6907,11 @@ static int __init volume_init(struct ibm_init_struct *iibm) rc = volume_create_alsa_mixer(); if (rc) { - printk(TPACPI_ERR - "Could not create the ALSA mixer interface\n"); + pr_err("Could not create the ALSA mixer interface\n"); return rc; } - printk(TPACPI_INFO - "Console audio control enabled, mode: %s\n", + pr_info("Console audio control enabled, mode: %s\n", (volume_control_allowed) ? "override (read/write)" : "monitor (read only)"); @@ -7049,12 +6973,10 @@ static int volume_write(char *buf) if (!volume_control_allowed && tpacpi_lifecycle != TPACPI_LIFE_INIT) { if (unlikely(!tp_warned.volume_ctrl_forbidden)) { tp_warned.volume_ctrl_forbidden = 1; - printk(TPACPI_NOTICE - "Console audio control in monitor mode, " - "changes are not allowed.\n"); - printk(TPACPI_NOTICE - "Use the volume_control=1 module parameter " - "to enable volume control\n"); + pr_notice("Console audio control in monitor mode, " + "changes are not allowed\n"); + pr_notice("Use the volume_control=1 module parameter " + "to enable volume control\n"); } return -EPERM; } @@ -7129,8 +7051,7 @@ static void inline volume_alsa_notify_change(void) static int __init volume_init(struct ibm_init_struct *iibm) { - printk(TPACPI_INFO - "volume: disabled as there is no ALSA support in this kernel\n"); + pr_info("volume: disabled as there is no ALSA support in this kernel\n"); return 1; } @@ -7337,9 +7258,8 @@ TPACPI_HANDLE(sfan, ec, "SFAN", /* 570 */ static void fan_quirk1_setup(void) { if (fan_control_initial_status == 0x07) { - printk(TPACPI_NOTICE - "fan_init: initial fan status is unknown, " - "assuming it is in auto mode\n"); + pr_notice("fan_init: initial fan status is unknown, " + "assuming it is in auto mode\n"); tp_features.fan_ctrl_status_undef = 1; } } @@ -7726,8 +7646,7 @@ static void fan_watchdog_reset(void) if (!queue_delayed_work(tpacpi_wq, &fan_watchdog_task, msecs_to_jiffies(fan_watchdog_maxinterval * 1000))) { - printk(TPACPI_ERR - "failed to queue the fan watchdog, " + pr_err("failed to queue the fan watchdog, " "watchdog will not trigger\n"); } } else @@ -7741,11 +7660,11 @@ static void fan_watchdog_fire(struct work_struct *ignored) if (tpacpi_lifecycle != TPACPI_LIFE_RUNNING) return; - printk(TPACPI_NOTICE "fan watchdog: enabling fan\n"); + pr_notice("fan watchdog: enabling fan\n"); rc = fan_set_enable(); if (rc < 0) { - printk(TPACPI_ERR "fan watchdog: error %d while enabling fan, " - "will try again later...\n", -rc); + pr_err("fan watchdog: error %d while enabling fan, " + "will try again later...\n", -rc); /* reschedule for later */ fan_watchdog_reset(); } @@ -8049,8 +7968,7 @@ static int __init fan_init(struct ibm_init_struct *iibm) "secondary fan support enabled\n"); } } else { - printk(TPACPI_ERR - "ThinkPad ACPI EC access misbehaving, " + pr_err("ThinkPad ACPI EC access misbehaving, " "fan status and control unavailable\n"); return 1; } @@ -8150,9 +8068,8 @@ static void fan_suspend(pm_message_t state) fan_control_resume_level = 0; rc = fan_get_status_safe(&fan_control_resume_level); if (rc < 0) - printk(TPACPI_NOTICE - "failed to read fan level for later " - "restore during resume: %d\n", rc); + pr_notice("failed to read fan level for later " + "restore during resume: %d\n", rc); /* if it is undefined, don't attempt to restore it. * KEEP THIS LAST */ @@ -8207,13 +8124,11 @@ static void fan_resume(void) return; } if (do_set) { - printk(TPACPI_NOTICE - "restoring fan level to 0x%02x\n", - fan_control_resume_level); + pr_notice("restoring fan level to 0x%02x\n", + fan_control_resume_level); rc = fan_set_level_safe(fan_control_resume_level); if (rc < 0) - printk(TPACPI_NOTICE - "failed to restore fan level: %d\n", rc); + pr_notice("failed to restore fan level: %d\n", rc); } } @@ -8305,8 +8220,8 @@ static int fan_write_cmd_level(const char *cmd, int *rc) *rc = fan_set_level_safe(level); if (*rc == -ENXIO) - printk(TPACPI_ERR "level command accepted for unsupported " - "access mode %d", fan_control_access_mode); + pr_err("level command accepted for unsupported access mode %d\n", + fan_control_access_mode); else if (!*rc) tpacpi_disclose_usertask("procfs fan", "set level to %d\n", level); @@ -8321,8 +8236,8 @@ static int fan_write_cmd_enable(const char *cmd, int *rc) *rc = fan_set_enable(); if (*rc == -ENXIO) - printk(TPACPI_ERR "enable command accepted for unsupported " - "access mode %d", fan_control_access_mode); + pr_err("enable command accepted for unsupported access mode %d\n", + fan_control_access_mode); else if (!*rc) tpacpi_disclose_usertask("procfs fan", "enable\n"); @@ -8336,8 +8251,8 @@ static int fan_write_cmd_disable(const char *cmd, int *rc) *rc = fan_set_disable(); if (*rc == -ENXIO) - printk(TPACPI_ERR "disable command accepted for unsupported " - "access mode %d", fan_control_access_mode); + pr_err("disable command accepted for unsupported access mode %d\n", + fan_control_access_mode); else if (!*rc) tpacpi_disclose_usertask("procfs fan", "disable\n"); @@ -8356,8 +8271,8 @@ static int fan_write_cmd_speed(const char *cmd, int *rc) *rc = fan_set_speed(speed); if (*rc == -ENXIO) - printk(TPACPI_ERR "speed command accepted for unsupported " - "access mode %d", fan_control_access_mode); + pr_err("speed command accepted for unsupported access mode %d\n", + fan_control_access_mode); else if (!*rc) tpacpi_disclose_usertask("procfs fan", "set speed to %d\n", speed); @@ -8560,8 +8475,8 @@ static int __init ibm_init(struct ibm_init_struct *iibm) if (ibm->acpi->notify) { ret = setup_acpi_notify(ibm); if (ret == -ENODEV) { - printk(TPACPI_NOTICE "disabling subdriver %s\n", - ibm->name); + pr_notice("disabling subdriver %s\n", + ibm->name); ret = 0; goto err_out; } @@ -8583,8 +8498,7 @@ static int __init ibm_init(struct ibm_init_struct *iibm) entry = proc_create_data(ibm->name, mode, proc_dir, &dispatch_proc_fops, ibm); if (!entry) { - printk(TPACPI_ERR "unable to create proc entry %s\n", - ibm->name); + pr_err("unable to create proc entry %s\n", ibm->name); ret = -ENODEV; goto err_out; } @@ -8683,13 +8597,11 @@ static int __must_check __init get_thinkpad_model_data( tp->ec_release = (ec_fw_string[4] << 8) | ec_fw_string[5]; } else { - printk(TPACPI_NOTICE - "ThinkPad firmware release %s " - "doesn't match the known patterns\n", - ec_fw_string); - printk(TPACPI_NOTICE - "please report this to %s\n", - TPACPI_MAIL); + pr_notice("ThinkPad firmware release %s " + "doesn't match the known patterns\n", + ec_fw_string); + pr_notice("please report this to %s\n", + TPACPI_MAIL); } break; } @@ -8733,8 +8645,7 @@ static int __init probe_for_thinkpad(void) tpacpi_acpi_handle_locate("ec", TPACPI_ACPI_EC_HID, &ec_handle); if (!ec_handle) { if (is_thinkpad) - printk(TPACPI_ERR - "Not yet supported ThinkPad detected!\n"); + pr_err("Not yet supported ThinkPad detected!\n"); return -ENODEV; } @@ -8746,10 +8657,10 @@ static int __init probe_for_thinkpad(void) static void __init thinkpad_acpi_init_banner(void) { - printk(TPACPI_INFO "%s v%s\n", TPACPI_DESC, TPACPI_VERSION); - printk(TPACPI_INFO "%s\n", TPACPI_URL); + pr_info("%s v%s\n", TPACPI_DESC, TPACPI_VERSION); + pr_info("%s\n", TPACPI_URL); - printk(TPACPI_INFO "ThinkPad BIOS %s, EC %s\n", + pr_info("ThinkPad BIOS %s, EC %s\n", (thinkpad_id.bios_version_str) ? thinkpad_id.bios_version_str : "unknown", (thinkpad_id.ec_version_str) ? @@ -8758,7 +8669,7 @@ static void __init thinkpad_acpi_init_banner(void) BUG_ON(!thinkpad_id.vendor); if (thinkpad_id.model_str) - printk(TPACPI_INFO "%s %s, model %s\n", + pr_info("%s %s, model %s\n", (thinkpad_id.vendor == PCI_VENDOR_ID_IBM) ? "IBM" : ((thinkpad_id.vendor == PCI_VENDOR_ID_LENOVO) ? @@ -9024,8 +8935,7 @@ static int __init thinkpad_acpi_module_init(void) ret = get_thinkpad_model_data(&thinkpad_id); if (ret) { - printk(TPACPI_ERR - "unable to get DMI data: %d\n", ret); + pr_err("unable to get DMI data: %d\n", ret); thinkpad_acpi_module_exit(); return ret; } @@ -9051,16 +8961,14 @@ static int __init thinkpad_acpi_module_init(void) proc_dir = proc_mkdir(TPACPI_PROC_DIR, acpi_root_dir); if (!proc_dir) { - printk(TPACPI_ERR - "unable to create proc dir " TPACPI_PROC_DIR); + pr_err("unable to create proc dir " TPACPI_PROC_DIR "\n"); thinkpad_acpi_module_exit(); return -ENODEV; } ret = platform_driver_register(&tpacpi_pdriver); if (ret) { - printk(TPACPI_ERR - "unable to register main platform driver\n"); + pr_err("unable to register main platform driver\n"); thinkpad_acpi_module_exit(); return ret; } @@ -9068,8 +8976,7 @@ static int __init thinkpad_acpi_module_init(void) ret = platform_driver_register(&tpacpi_hwmon_pdriver); if (ret) { - printk(TPACPI_ERR - "unable to register hwmon platform driver\n"); + pr_err("unable to register hwmon platform driver\n"); thinkpad_acpi_module_exit(); return ret; } @@ -9082,8 +8989,7 @@ static int __init thinkpad_acpi_module_init(void) &tpacpi_hwmon_pdriver.driver); } if (ret) { - printk(TPACPI_ERR - "unable to create sysfs driver attributes\n"); + pr_err("unable to create sysfs driver attributes\n"); thinkpad_acpi_module_exit(); return ret; } @@ -9096,7 +9002,7 @@ static int __init thinkpad_acpi_module_init(void) if (IS_ERR(tpacpi_pdev)) { ret = PTR_ERR(tpacpi_pdev); tpacpi_pdev = NULL; - printk(TPACPI_ERR "unable to register platform device\n"); + pr_err("unable to register platform device\n"); thinkpad_acpi_module_exit(); return ret; } @@ -9106,16 +9012,14 @@ static int __init thinkpad_acpi_module_init(void) if (IS_ERR(tpacpi_sensors_pdev)) { ret = PTR_ERR(tpacpi_sensors_pdev); tpacpi_sensors_pdev = NULL; - printk(TPACPI_ERR - "unable to register hwmon platform device\n"); + pr_err("unable to register hwmon platform device\n"); thinkpad_acpi_module_exit(); return ret; } ret = device_create_file(&tpacpi_sensors_pdev->dev, &dev_attr_thinkpad_acpi_pdev_name); if (ret) { - printk(TPACPI_ERR - "unable to create sysfs hwmon device attributes\n"); + pr_err("unable to create sysfs hwmon device attributes\n"); thinkpad_acpi_module_exit(); return ret; } @@ -9124,14 +9028,14 @@ static int __init thinkpad_acpi_module_init(void) if (IS_ERR(tpacpi_hwmon)) { ret = PTR_ERR(tpacpi_hwmon); tpacpi_hwmon = NULL; - printk(TPACPI_ERR "unable to register hwmon device\n"); + pr_err("unable to register hwmon device\n"); thinkpad_acpi_module_exit(); return ret; } mutex_init(&tpacpi_inputdev_send_mutex); tpacpi_inputdev = input_allocate_device(); if (!tpacpi_inputdev) { - printk(TPACPI_ERR "unable to allocate input device\n"); + pr_err("unable to allocate input device\n"); thinkpad_acpi_module_exit(); return -ENOMEM; } else { @@ -9163,7 +9067,7 @@ static int __init thinkpad_acpi_module_init(void) ret = input_register_device(tpacpi_inputdev); if (ret < 0) { - printk(TPACPI_ERR "unable to register input device\n"); + pr_err("unable to register input device\n"); thinkpad_acpi_module_exit(); return ret; } else { -- cgit v1.2.3-70-g09d2 From bb3ce2020451bdebe5ceac770504f427724008a9 Mon Sep 17 00:00:00 2001 From: Yin Kangkai Date: Wed, 6 Apr 2011 15:05:24 +0100 Subject: platform/oaktrail: ACPI EC Extra driver for Oaktrail This driver implements an Extra ACPI EC driver for products based on Intel Oaktrail platform. This driver does below things: 1. registers itself in the Linux backlight control in /sys/class/backlight/intel_oaktrail/ 2. registers in the rfkill subsystem here: /sys/class/rfkill/rfkillX/ for these components: wifi, bluetooth, wwan (3g), gps Signed-off-by: Yin Kangkai [Extracted from a bigger patch by Yin Kangkai, this version leaves out some sysfs bits that probably want to be driver managed, and ACPI i2c enumeration] Signed-off-by: Alan Cox Signed-off-by: Matthew Garrett --- drivers/platform/x86/Kconfig | 9 + drivers/platform/x86/Makefile | 1 + drivers/platform/x86/intel_oaktrail.c | 396 ++++++++++++++++++++++++++++++++++ 3 files changed, 406 insertions(+) create mode 100644 drivers/platform/x86/intel_oaktrail.c diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 5cb999b50f9..83c2411acb5 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -760,4 +760,13 @@ config MXM_WMI MXM is a standard for laptop graphics cards, the WMI interface is required for switchable nvidia graphics machines +config INTEL_OAKTRAIL + tristate "Intel Oaktrail Platform Extras" + depends on ACPI + depends on RFKILL && BACKLIGHT_CLASS_DEVICE && ACPI + ---help--- + Intel Oaktrail platform need this driver to provide interfaces to + enable/disable the Camera, WiFi, BT etc. devices. If in doubt, say Y + here; it will only load on supported platforms. + endif # X86_PLATFORM_DEVICES diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index a7ab3bc7b3a..af683fb5a80 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -43,3 +43,4 @@ obj-$(CONFIG_IBM_RTL) += ibm_rtl.o obj-$(CONFIG_SAMSUNG_LAPTOP) += samsung-laptop.o obj-$(CONFIG_INTEL_MFLD_THERMAL) += intel_mid_thermal.o obj-$(CONFIG_MXM_WMI) += mxm-wmi.o +obj-$(CONFIG_INTEL_OAKTRAIL) += intel_oaktrail.o diff --git a/drivers/platform/x86/intel_oaktrail.c b/drivers/platform/x86/intel_oaktrail.c new file mode 100644 index 00000000000..e936364a609 --- /dev/null +++ b/drivers/platform/x86/intel_oaktrail.c @@ -0,0 +1,396 @@ +/* + * intel_oaktrail.c - Intel OakTrail Platform support. + * + * Copyright (C) 2010-2011 Intel Corporation + * Author: Yin Kangkai (kangkai.yin@intel.com) + * + * based on Compal driver, Copyright (C) 2008 Cezary Jackiewicz + * , based on MSI driver + * Copyright (C) 2006 Lennart Poettering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * This driver does below things: + * 1. registers itself in the Linux backlight control in + * /sys/class/backlight/intel_oaktrail/ + * + * 2. registers in the rfkill subsystem here: /sys/class/rfkill/rfkillX/ + * for these components: wifi, bluetooth, wwan (3g), gps + * + * This driver might work on other products based on Oaktrail. If you + * want to try it you can pass force=1 as argument to the module which + * will force it to load even when the DMI data doesn't identify the + * product as compatible. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define DRIVER_NAME "intel_oaktrail" +#define DRIVER_VERSION "0.4ac1" + +/* + * This is the devices status address in EC space, and the control bits + * definition: + * + * (1 << 0): Camera enable/disable, RW. + * (1 << 1): Bluetooth enable/disable, RW. + * (1 << 2): GPS enable/disable, RW. + * (1 << 3): WiFi enable/disable, RW. + * (1 << 4): WWAN (3G) enable/disalbe, RW. + * (1 << 5): Touchscreen enable/disable, Read Only. + */ +#define OT_EC_DEVICE_STATE_ADDRESS 0xD6 + +#define OT_EC_CAMERA_MASK (1 << 0) +#define OT_EC_BT_MASK (1 << 1) +#define OT_EC_GPS_MASK (1 << 2) +#define OT_EC_WIFI_MASK (1 << 3) +#define OT_EC_WWAN_MASK (1 << 4) +#define OT_EC_TS_MASK (1 << 5) + +/* + * This is the address in EC space and commands used to control LCD backlight: + * + * Two steps needed to change the LCD backlight: + * 1. write the backlight percentage into OT_EC_BL_BRIGHTNESS_ADDRESS; + * 2. write OT_EC_BL_CONTROL_ON_DATA into OT_EC_BL_CONTROL_ADDRESS. + * + * To read the LCD back light, just read out the value from + * OT_EC_BL_BRIGHTNESS_ADDRESS. + * + * LCD backlight brightness range: 0 - 100 (OT_EC_BL_BRIGHTNESS_MAX) + */ +#define OT_EC_BL_BRIGHTNESS_ADDRESS 0x44 +#define OT_EC_BL_BRIGHTNESS_MAX 100 +#define OT_EC_BL_CONTROL_ADDRESS 0x3A +#define OT_EC_BL_CONTROL_ON_DATA 0x1A + + +static int force; +module_param(force, bool, 0); +MODULE_PARM_DESC(force, "Force driver load, ignore DMI data"); + +static struct platform_device *oaktrail_device; +static struct backlight_device *oaktrail_bl_device; +static struct rfkill *bt_rfkill; +static struct rfkill *gps_rfkill; +static struct rfkill *wifi_rfkill; +static struct rfkill *wwan_rfkill; + + +/* rfkill */ +static int oaktrail_rfkill_set(void *data, bool blocked) +{ + u8 value; + u8 result; + unsigned long radio = (unsigned long) data; + + ec_read(OT_EC_DEVICE_STATE_ADDRESS, &result); + + if (!blocked) + value = (u8) (result | radio); + else + value = (u8) (result & ~radio); + + ec_write(OT_EC_DEVICE_STATE_ADDRESS, value); + + return 0; +} + +static const struct rfkill_ops oaktrail_rfkill_ops = { + .set_block = oaktrail_rfkill_set, +}; + +static struct rfkill *oaktrail_rfkill_new(char *name, enum rfkill_type type, + unsigned long mask) +{ + struct rfkill *rfkill_dev; + u8 value; + int err; + + rfkill_dev = rfkill_alloc(name, &oaktrail_device->dev, type, + &oaktrail_rfkill_ops, (void *)mask); + if (!rfkill_dev) + return ERR_PTR(-ENOMEM); + + ec_read(OT_EC_DEVICE_STATE_ADDRESS, &value); + rfkill_init_sw_state(rfkill_dev, (value & mask) != 1); + + err = rfkill_register(rfkill_dev); + if (err) { + rfkill_destroy(rfkill_dev); + return ERR_PTR(err); + } + + return rfkill_dev; +} + +static inline void __oaktrail_rfkill_cleanup(struct rfkill *rf) +{ + if (rf) { + rfkill_unregister(rf); + rfkill_destroy(rf); + } +} + +static void oaktrail_rfkill_cleanup(void) +{ + __oaktrail_rfkill_cleanup(wifi_rfkill); + __oaktrail_rfkill_cleanup(bt_rfkill); + __oaktrail_rfkill_cleanup(gps_rfkill); + __oaktrail_rfkill_cleanup(wwan_rfkill); +} + +static int oaktrail_rfkill_init(void) +{ + int ret; + + wifi_rfkill = oaktrail_rfkill_new("oaktrail-wifi", + RFKILL_TYPE_WLAN, + OT_EC_WIFI_MASK); + if (IS_ERR(wifi_rfkill)) { + ret = PTR_ERR(wifi_rfkill); + wifi_rfkill = NULL; + goto cleanup; + } + + bt_rfkill = oaktrail_rfkill_new("oaktrail-bluetooth", + RFKILL_TYPE_BLUETOOTH, + OT_EC_BT_MASK); + if (IS_ERR(bt_rfkill)) { + ret = PTR_ERR(bt_rfkill); + bt_rfkill = NULL; + goto cleanup; + } + + gps_rfkill = oaktrail_rfkill_new("oaktrail-gps", + RFKILL_TYPE_GPS, + OT_EC_GPS_MASK); + if (IS_ERR(gps_rfkill)) { + ret = PTR_ERR(gps_rfkill); + gps_rfkill = NULL; + goto cleanup; + } + + wwan_rfkill = oaktrail_rfkill_new("oaktrail-wwan", + RFKILL_TYPE_WWAN, + OT_EC_WWAN_MASK); + if (IS_ERR(wwan_rfkill)) { + ret = PTR_ERR(wwan_rfkill); + wwan_rfkill = NULL; + goto cleanup; + } + + return 0; + +cleanup: + oaktrail_rfkill_cleanup(); + return ret; +} + + +/* backlight */ +static int get_backlight_brightness(struct backlight_device *b) +{ + u8 value; + ec_read(OT_EC_BL_BRIGHTNESS_ADDRESS, &value); + + return value; +} + +static int set_backlight_brightness(struct backlight_device *b) +{ + u8 percent = (u8) b->props.brightness; + if (percent < 0 || percent > OT_EC_BL_BRIGHTNESS_MAX) + return -EINVAL; + + ec_write(OT_EC_BL_BRIGHTNESS_ADDRESS, percent); + ec_write(OT_EC_BL_CONTROL_ADDRESS, OT_EC_BL_CONTROL_ON_DATA); + + return 0; +} + +static const struct backlight_ops oaktrail_bl_ops = { + .get_brightness = get_backlight_brightness, + .update_status = set_backlight_brightness, +}; + +static int oaktrail_backlight_init(void) +{ + struct backlight_device *bd; + struct backlight_properties props; + + memset(&props, 0, sizeof(struct backlight_properties)); + props.max_brightness = OT_EC_BL_BRIGHTNESS_MAX; + bd = backlight_device_register(DRIVER_NAME, + &oaktrail_device->dev, NULL, + &oaktrail_bl_ops, + &props); + + if (IS_ERR(bd)) { + oaktrail_bl_device = NULL; + pr_warning("Unable to register backlight device\n"); + return PTR_ERR(bd); + } + + oaktrail_bl_device = bd; + + bd->props.brightness = get_backlight_brightness(bd); + bd->props.power = FB_BLANK_UNBLANK; + backlight_update_status(bd); + + return 0; +} + +static void oaktrail_backlight_exit(void) +{ + if (oaktrail_bl_device) + backlight_device_unregister(oaktrail_bl_device); +} + +static int __devinit oaktrail_probe(struct platform_device *pdev) +{ + return 0; +} + +static int __devexit oaktrail_remove(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver oaktrail_driver = { + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, + .probe = oaktrail_probe, + .remove = __devexit_p(oaktrail_remove) +}; + +static int dmi_check_cb(const struct dmi_system_id *id) +{ + pr_info("Identified model '%s'\n", id->ident); + return 0; +} + +static struct dmi_system_id __initdata oaktrail_dmi_table[] = { + { + .ident = "OakTrail platform", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "OakTrail platform"), + }, + .callback = dmi_check_cb + }, + { } +}; + +static int __init oaktrail_init(void) +{ + int ret; + + if (acpi_disabled) { + pr_err("ACPI needs to be enabled for this driver to work!\n"); + return -ENODEV; + } + + if (!force && !dmi_check_system(oaktrail_dmi_table)) { + pr_err("Platform not recognized (You could try the module's force-parameter)"); + return -ENODEV; + } + + ret = platform_driver_register(&oaktrail_driver); + if (ret) { + pr_warning("Unable to register platform driver\n"); + goto err_driver_reg; + } + + oaktrail_device = platform_device_alloc(DRIVER_NAME, -1); + if (!oaktrail_device) { + pr_warning("Unable to allocate platform device\n"); + ret = -ENOMEM; + goto err_device_alloc; + } + + ret = platform_device_add(oaktrail_device); + if (ret) { + pr_warning("Unable to add platform device\n"); + goto err_device_add; + } + + if (!acpi_video_backlight_support()) { + ret = oaktrail_backlight_init(); + if (ret) + goto err_backlight; + + } else + pr_info("Backlight controlled by ACPI video driver\n"); + + ret = oaktrail_rfkill_init(); + if (ret) { + pr_warning("Setup rfkill failed\n"); + goto err_rfkill; + } + + pr_info("Driver "DRIVER_VERSION" successfully loaded\n"); + return 0; + +err_rfkill: + oaktrail_backlight_exit(); +err_backlight: + platform_device_del(oaktrail_device); +err_device_add: + platform_device_put(oaktrail_device); +err_device_alloc: + platform_driver_unregister(&oaktrail_driver); +err_driver_reg: + + return ret; +} + +static void __exit oaktrail_cleanup(void) +{ + oaktrail_backlight_exit(); + oaktrail_rfkill_cleanup(); + platform_device_unregister(oaktrail_device); + platform_driver_unregister(&oaktrail_driver); + + pr_info("Driver unloaded\n"); +} + +module_init(oaktrail_init); +module_exit(oaktrail_cleanup); + +MODULE_AUTHOR("Yin Kangkai (kangkai.yin@intel.com)"); +MODULE_DESCRIPTION("Intel Oaktrail Platform ACPI Extras"); +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("dmi:*:svnIntelCorporation:pnOakTrailplatform:*"); -- cgit v1.2.3-70-g09d2 From c2647b5e99c8ff1b3f535c7c84564cdc53214edf Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Fri, 15 Apr 2011 18:42:47 +0800 Subject: acer-wmi: does not allow negative number set to initial device state The driver set module parameter value: mailled, threeg and brightness to BIOS by evaluate wmi method when driver was initialed. The default values for those parameters are -1, so, that will be better don't set negative value to BIOS. Cc: Carlos Corbacho Cc: Matthew Garrett Cc: Dmitry Torokhov Cc: Corentin Chary Signed-off-by: Lee, Chun-Yi Signed-off-by: Matthew Garrett --- drivers/platform/x86/acer-wmi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index fa8fa73f543..0682ecb3074 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -961,10 +961,12 @@ static void __init acer_commandline_init(void) * These will all fail silently if the value given is invalid, or the * capability isn't available on the given interface */ - set_u32(mailled, ACER_CAP_MAILLED); - if (!has_type_aa) + if (mailled >= 0) + set_u32(mailled, ACER_CAP_MAILLED); + if (!has_type_aa && threeg >= 0) set_u32(threeg, ACER_CAP_THREEG); - set_u32(brightness, ACER_CAP_BRIGHTNESS); + if (brightness >= 0) + set_u32(brightness, ACER_CAP_BRIGHTNESS); } /* -- cgit v1.2.3-70-g09d2 From 95c7021540661a1d19130296be3e799d5a9135f8 Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Mon, 2 May 2011 09:57:08 +0100 Subject: tc1100-wmi: Orphan driver I've never owned the hardware, this was a port of an existing driver to prove that the ACPI-WMI code was useful to more than just acer-wmi. Signed-off-by: Carlos Corbacho Signed-off-by: Matthew Garrett --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c4fc1daddcb..bcb07cb927b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3024,9 +3024,8 @@ S: Maintained F: drivers/net/wireless/hostap/ HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER -M: Carlos Corbacho L: platform-driver-x86@vger.kernel.org -S: Odd Fixes +S: Orphan F: drivers/platform/x86/tc1100-wmi.c HP100: Driver for HP 10/100 Mbit/s Voice Grade Network Adapter Series -- cgit v1.2.3-70-g09d2 From 5b9272594b5144538b96ca7eb82898c49519ec16 Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Mon, 2 May 2011 09:57:13 +0100 Subject: wmi: Orphan ACPI-WMI driver I no longer have the time to work on this, and haven't really been doing any work to this either. Time to let someone else take the reins. Signed-off-by: Carlos Corbacho Signed-off-by: Matthew Garrett --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index bcb07cb927b..390a4130a61 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -271,10 +271,8 @@ S: Supported F: drivers/acpi/video.c ACPI WMI DRIVER -M: Carlos Corbacho L: platform-driver-x86@vger.kernel.org -W: http://www.lesswatts.org/projects/acpi/ -S: Maintained +S: Orphan F: drivers/platform/x86/wmi.c AD1889 ALSA SOUND DRIVER -- cgit v1.2.3-70-g09d2 From d9269a7146e33147866ecbf1ae485f6d1a1379f1 Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Mon, 2 May 2011 09:57:18 +0100 Subject: acer-wmi: Update MAINTAINERS I don't have the time or much interest these days in maintaining acer-wmi, as I don't have access to newer Acer hardware. As he's been doing most of the work these days anyway, Joey Lee has kindly agreed to take over. Signed-off-by: Carlos Corbacho Cc: Joey Lee Signed-off-by: Matthew Garrett --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 390a4130a61..bad19e9d9dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -223,10 +223,8 @@ S: Maintained F: drivers/platform/x86/acerhdf.c ACER WMI LAPTOP EXTRAS -M: Carlos Corbacho -L: aceracpi@googlegroups.com (subscribers-only) +M: Joey Lee L: platform-driver-x86@vger.kernel.org -W: http://code.google.com/p/aceracpi S: Maintained F: drivers/platform/x86/acer-wmi.c -- cgit v1.2.3-70-g09d2 From e569b223d53cb2720f55be72932f5758a056ff6d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 24 Apr 2011 11:00:08 +0200 Subject: acerhdf: Drop pointless dependency on THERMAL_HWMON The THERMAL_HWMON config option simply exposes the thermal zone temperature values and limits to user-space. It makes no sense for a kernel driver to depend on this. Signed-off-by: Jean Delvare Cc: Peter Feuerer Cc: Matthew Garrett Signed-off-by: Matthew Garrett --- drivers/platform/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 83c2411acb5..45e0191c35d 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -39,7 +39,7 @@ config ACER_WMI config ACERHDF tristate "Acer Aspire One temperature and fan driver" - depends on THERMAL && THERMAL_HWMON && ACPI + depends on THERMAL && ACPI ---help--- This is a driver for Acer Aspire One netbooks. It allows to access the temperature sensor and to control the fan. -- cgit v1.2.3-70-g09d2 From c4bae98c4f913d3220d185c47d8817b5e2bba007 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 24 Apr 2011 18:07:55 +0200 Subject: acerhdf: Clean up includes * The acerhdf driver isn't an ACPI driver, so it needs not include . All it uses is ec_read() and ec_write(), for which is sufficient. * I couldn't find any reason why and were included. This should avoid unneeded rebuilds of the acerhdf driver. Signed-off-by: Jean Delvare Cc: Peter Feuerer Cc: Matthew Garrett Signed-off-by: Matthew Garrett --- drivers/platform/x86/acerhdf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 60f9cfcac93..fca3489218b 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -35,10 +35,8 @@ #include #include -#include #include -#include -#include +#include #include #include -- cgit v1.2.3-70-g09d2 From 020036678e810be10a352339faa9a1df2bd8f5a3 Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Mon, 2 May 2011 09:57:23 +0100 Subject: acer-wmi: Delete out-of-date documentation The documentation file for acer-wmi is long out of date, and there's not much point in keeping it around either. Signed-off-by: Carlos Corbacho Signed-off-by: Matthew Garrett --- Documentation/laptops/acer-wmi.txt | 184 ------------------------------------- 1 file changed, 184 deletions(-) delete mode 100644 Documentation/laptops/acer-wmi.txt diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt deleted file mode 100644 index 4beafa663dd..00000000000 --- a/Documentation/laptops/acer-wmi.txt +++ /dev/null @@ -1,184 +0,0 @@ -Acer Laptop WMI Extras Driver -http://code.google.com/p/aceracpi -Version 0.3 -4th April 2009 - -Copyright 2007-2009 Carlos Corbacho - -acer-wmi is a driver to allow you to control various parts of your Acer laptop -hardware under Linux which are exposed via ACPI-WMI. - -This driver completely replaces the old out-of-tree acer_acpi, which I am -currently maintaining for bug fixes only on pre-2.6.25 kernels. All development -work is now focused solely on acer-wmi. - -Disclaimer -********** - -Acer and Wistron have provided nothing towards the development acer_acpi or -acer-wmi. All information we have has been through the efforts of the developers -and the users to discover as much as possible about the hardware. - -As such, I do warn that this could break your hardware - this is extremely -unlikely of course, but please bear this in mind. - -Background -********** - -acer-wmi is derived from acer_acpi, originally developed by Mark -Smith in 2005, then taken over by Carlos Corbacho in 2007, in order to activate -the wireless LAN card under a 64-bit version of Linux, as acerhk[1] (the -previous solution to the problem) relied on making 32 bit BIOS calls which are -not possible in kernel space from a 64 bit OS. - -[1] acerhk: http://www.cakey.de/acerhk/ - -Supported Hardware -****************** - -NOTE: The Acer Aspire One is not supported hardware. It cannot work with -acer-wmi until Acer fix their ACPI-WMI implementation on them, so has been -blacklisted until that happens. - -Please see the website for the current list of known working hardware: - -http://code.google.com/p/aceracpi/wiki/SupportedHardware - -If your laptop is not listed, or listed as unknown, and works with acer-wmi, -please contact me with a copy of the DSDT. - -If your Acer laptop doesn't work with acer-wmi, I would also like to see the -DSDT. - -To send me the DSDT, as root/sudo: - -cat /sys/firmware/acpi/tables/DSDT > dsdt - -And send me the resulting 'dsdt' file. - -Usage -***** - -On Acer laptops, acer-wmi should already be autoloaded based on DMI matching. -For non-Acer laptops, until WMI based autoloading support is added, you will -need to manually load acer-wmi. - -acer-wmi creates /sys/devices/platform/acer-wmi, and fills it with various -files whose usage is detailed below, which enables you to control some of the -following (varies between models): - -* the wireless LAN card radio -* inbuilt Bluetooth adapter -* inbuilt 3G card -* mail LED of your laptop -* brightness of the LCD panel - -Wireless -******** - -With regards to wireless, all acer-wmi does is enable the radio on the card. It -is not responsible for the wireless LED - once the radio is enabled, this is -down to the wireless driver for your card. So the behaviour of the wireless LED, -once you enable the radio, will depend on your hardware and driver combination. - -e.g. With the BCM4318 on the Acer Aspire 5020 series: - -ndiswrapper: Light blinks on when transmitting -b43: Solid light, blinks off when transmitting - -Wireless radio control is unconditionally enabled - all Acer laptops that support -acer-wmi come with built-in wireless. However, should you feel so inclined to -ever wish to remove the card, or swap it out at some point, please get in touch -with me, as we may well be able to gain some data on wireless card detection. - -The wireless radio is exposed through rfkill. - -Bluetooth -********* - -For bluetooth, this is an internal USB dongle, so once enabled, you will get -a USB device connection event, and a new USB device appears. When you disable -bluetooth, you get the reverse - a USB device disconnect event, followed by the -device disappearing again. - -Bluetooth is autodetected by acer-wmi, so if you do not have a bluetooth module -installed in your laptop, this file won't exist (please be aware that it is -quite common for Acer not to fit bluetooth to their laptops - so just because -you have a bluetooth button on the laptop, doesn't mean that bluetooth is -installed). - -For the adventurously minded - if you want to buy an internal bluetooth -module off the internet that is compatible with your laptop and fit it, then -it will work just fine with acer-wmi. - -Bluetooth is exposed through rfkill. - -3G -** - -3G is currently not autodetected, so the 'threeg' file is always created under -sysfs. So far, no-one in possession of an Acer laptop with 3G built-in appears to -have tried Linux, or reported back, so we don't have any information on this. - -If you have an Acer laptop that does have a 3G card in, please contact me so we -can properly detect these, and find out a bit more about them. - -To read the status of the 3G card (0=off, 1=on): -cat /sys/devices/platform/acer-wmi/threeg - -To enable the 3G card: -echo 1 > /sys/devices/platform/acer-wmi/threeg - -To disable the 3G card: -echo 0 > /sys/devices/platform/acer-wmi/threeg - -To set the state of the 3G card when loading acer-wmi, pass: -threeg=X (where X is 0 or 1) - -Mail LED -******** - -This can be found in most older Acer laptops supported by acer-wmi, and many -newer ones - it is built into the 'mail' button, and blinks when active. - -On newer (WMID) laptops though, we have no way of detecting the mail LED. If -your laptop identifies itself in dmesg as a WMID model, then please try loading -acer_acpi with: - -force_series=2490 - -This will use a known alternative method of reading/ writing the mail LED. If -it works, please report back to me with the DMI data from your laptop so this -can be added to acer-wmi. - -The LED is exposed through the LED subsystem, and can be found in: - -/sys/devices/platform/acer-wmi/leds/acer-wmi::mail/ - -The mail LED is autodetected, so if you don't have one, the LED device won't -be registered. - -Backlight -********* - -The backlight brightness control is available on all acer-wmi supported -hardware. The maximum brightness level is usually 15, but on some newer laptops -it's 10 (this is again autodetected). - -The backlight is exposed through the backlight subsystem, and can be found in: - -/sys/devices/platform/acer-wmi/backlight/acer-wmi/ - -Credits -******* - -Olaf Tauber, who did the real hard work when he developed acerhk -http://www.cakey.de/acerhk/ -All the authors of laptop ACPI modules in the kernel, whose work -was an inspiration in the early days of acer_acpi -Mathieu Segaud, who solved the problem with having to modprobe the driver -twice in acer_acpi 0.2. -Jim Ramsay, who added support for the WMID interface -Mark Smith, who started the original acer_acpi - -And the many people who have used both acer_acpi and acer-wmi. -- cgit v1.2.3-70-g09d2 From b9e066942981c6de3897de94953bea295ae18ec8 Mon Sep 17 00:00:00 2001 From: Ameya Palande <2ameya@gmail.com> Date: Wed, 6 Apr 2011 17:44:37 +0300 Subject: platform/x86: Simplify intel_mid_powerbtn This patch: 1. Removes unnecessay #defines 2. Removes 'mfld_pb_priv' data structure which results in simpler error handling and less memory allocations. Signed-off-by: Ameya Palande <2ameya@gmail.com> Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_mid_powerbtn.c | 72 +++++++++++++------------------ 1 file changed, 30 insertions(+), 42 deletions(-) diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index 213e79ba68d..f1ae5078b7e 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -23,58 +23,48 @@ #include #include #include + #include #define DRIVER_NAME "msic_power_btn" -#define MSIC_IRQ_STAT 0x02 - #define MSIC_IRQ_PB (1 << 0) -#define MSIC_PB_CONFIG 0x3e #define MSIC_PB_STATUS 0x3f - #define MSIC_PB_LEVEL (1 << 3) /* 1 - release, 0 - press */ - -struct mfld_pb_priv { - struct input_dev *input; - unsigned int irq; -}; +#define MSIC_PB_LEVEL (1 << 3) /* 1 - release, 0 - press */ static irqreturn_t mfld_pb_isr(int irq, void *dev_id) { - struct mfld_pb_priv *priv = dev_id; + struct input_dev *input = dev_id; int ret; u8 pbstat; ret = intel_scu_ipc_ioread8(MSIC_PB_STATUS, &pbstat); - if (ret < 0) - return IRQ_HANDLED; - - input_event(priv->input, EV_KEY, KEY_POWER, !(pbstat & MSIC_PB_LEVEL)); - input_sync(priv->input); + if (ret < 0) { + dev_err(input->dev.parent, "Read error %d while reading" + " MSIC_PB_STATUS\n", ret); + } else { + input_event(input, EV_KEY, KEY_POWER, + !(pbstat & MSIC_PB_LEVEL)); + input_sync(input); + } return IRQ_HANDLED; } static int __devinit mfld_pb_probe(struct platform_device *pdev) { - struct mfld_pb_priv *priv; struct input_dev *input; - int irq; + int irq = platform_get_irq(pdev, 0); int error; - irq = platform_get_irq(pdev, 0); if (irq < 0) return -EINVAL; - priv = kzalloc(sizeof(struct mfld_pb_priv), GFP_KERNEL); input = input_allocate_device(); - if (!priv || !input) { - error = -ENOMEM; - goto err_free_mem; + if (!input) { + dev_err(&pdev->dev, "Input device allocation error\n"); + return -ENOMEM; } - priv->input = input; - priv->irq = irq; - input->name = pdev->name; input->phys = "power-button/input0"; input->id.bustype = BUS_HOST; @@ -82,42 +72,40 @@ static int __devinit mfld_pb_probe(struct platform_device *pdev) input_set_capability(input, EV_KEY, KEY_POWER); - error = request_threaded_irq(priv->irq, NULL, mfld_pb_isr, - 0, DRIVER_NAME, priv); + error = request_threaded_irq(irq, NULL, mfld_pb_isr, 0, + DRIVER_NAME, input); if (error) { - dev_err(&pdev->dev, - "unable to request irq %d for mfld power button\n", - irq); - goto err_free_mem; + dev_err(&pdev->dev, "Unable to request irq %d for mfld power" + "button\n", irq); + goto err_free_input; } error = input_register_device(input); if (error) { - dev_err(&pdev->dev, - "unable to register input dev, error %d\n", error); + dev_err(&pdev->dev, "Unable to register input dev, error " + "%d\n", error); goto err_free_irq; } - platform_set_drvdata(pdev, priv); + platform_set_drvdata(pdev, input); return 0; err_free_irq: - free_irq(priv->irq, priv); -err_free_mem: + free_irq(irq, input); +err_free_input: input_free_device(input); - kfree(priv); return error; } static int __devexit mfld_pb_remove(struct platform_device *pdev) { - struct mfld_pb_priv *priv = platform_get_drvdata(pdev); - - free_irq(priv->irq, priv); - input_unregister_device(priv->input); - kfree(priv); + struct input_dev *input = platform_get_drvdata(pdev); + int irq = platform_get_irq(pdev, 0); + free_irq(irq, input); + input_unregister_device(input); platform_set_drvdata(pdev, NULL); + return 0; } -- cgit v1.2.3-70-g09d2 From cb8b646d8b27053d311595b4fc7dbe161d92683e Mon Sep 17 00:00:00 2001 From: Ameya Palande <2ameya@gmail.com> Date: Wed, 6 Apr 2011 17:45:03 +0300 Subject: platform/x86: Fix Makefile for intel_mid_powerbtn Signed-off-by: Ameya Palande <2ameya@gmail.com> Signed-off-by: Matthew Garrett --- drivers/platform/x86/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index af683fb5a80..afc1f832aa6 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -41,6 +41,6 @@ obj-$(CONFIG_XO1_RFKILL) += xo1-rfkill.o obj-$(CONFIG_XO15_EBOOK) += xo15-ebook.o obj-$(CONFIG_IBM_RTL) += ibm_rtl.o obj-$(CONFIG_SAMSUNG_LAPTOP) += samsung-laptop.o -obj-$(CONFIG_INTEL_MFLD_THERMAL) += intel_mid_thermal.o obj-$(CONFIG_MXM_WMI) += mxm-wmi.o +obj-$(CONFIG_INTEL_MID_POWER_BUTTON) += intel_mid_powerbtn.o obj-$(CONFIG_INTEL_OAKTRAIL) += intel_oaktrail.o -- cgit v1.2.3-70-g09d2 From 239dca9e9aff6630c22fef696978ba3a878f7d96 Mon Sep 17 00:00:00 2001 From: Ameya Palande <2ameya@gmail.com> Date: Thu, 7 Apr 2011 16:30:02 +0300 Subject: platform-x86: intel_mid_thermal: Fix memory leak Signed-off-by: Ameya Palande <2ameya@gmail.com> Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_mid_thermal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c index f0776504521..3a578323122 100644 --- a/drivers/platform/x86/intel_mid_thermal.c +++ b/drivers/platform/x86/intel_mid_thermal.c @@ -527,6 +527,7 @@ static int mid_thermal_remove(struct platform_device *pdev) for (i = 0; i < MSIC_THERMAL_SENSORS; i++) thermal_zone_device_unregister(pinfo->tzd[i]); + kfree(pinfo); platform_set_drvdata(pdev, NULL); /* Stop the ADC */ -- cgit v1.2.3-70-g09d2 From 8ae68de15d5ddaa8d41e197a730e19223adab2e1 Mon Sep 17 00:00:00 2001 From: Melchior FRANZ Date: Tue, 24 May 2011 10:35:55 +0200 Subject: support wlan hotkey on Acer Travelmate 5735Z On an Acer Travelmate 5735Z-452G32Mnss the WLAN-enable/disable key doesn't send 0x1 as acpi event key code, but 0x3. This patch also makes the module ignore hotkey acpi events for functions that are already handled without. This avoids warning message "keyboard: can't emulate rawmode for keycode 240". Signed-off-by: Melchior FRANZ Signed-off-by: Matthew Garrett --- drivers/platform/x86/acer-wmi.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 0682ecb3074..57c6c8a4db2 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -98,13 +98,26 @@ enum acer_wmi_event_ids { static const struct key_entry acer_wmi_keymap[] = { {KE_KEY, 0x01, {KEY_WLAN} }, /* WiFi */ + {KE_KEY, 0x03, {KEY_WLAN} }, /* WiFi */ {KE_KEY, 0x12, {KEY_BLUETOOTH} }, /* BT */ {KE_KEY, 0x21, {KEY_PROG1} }, /* Backup */ {KE_KEY, 0x22, {KEY_PROG2} }, /* Arcade */ {KE_KEY, 0x23, {KEY_PROG3} }, /* P_Key */ {KE_KEY, 0x24, {KEY_PROG4} }, /* Social networking_Key */ + {KE_IGNORE, 0x41, {KEY_MUTE} }, + {KE_IGNORE, 0x42, {KEY_PREVIOUSSONG} }, + {KE_IGNORE, 0x43, {KEY_NEXTSONG} }, + {KE_IGNORE, 0x44, {KEY_PLAYPAUSE} }, + {KE_IGNORE, 0x45, {KEY_STOP} }, + {KE_IGNORE, 0x48, {KEY_VOLUMEUP} }, + {KE_IGNORE, 0x49, {KEY_VOLUMEDOWN} }, + {KE_IGNORE, 0x61, {KEY_SWITCHVIDEOMODE} }, + {KE_IGNORE, 0x62, {KEY_BRIGHTNESSUP} }, + {KE_IGNORE, 0x63, {KEY_BRIGHTNESSDOWN} }, {KE_KEY, 0x64, {KEY_SWITCHVIDEOMODE} }, /* Display Switch */ + {KE_IGNORE, 0x81, {KEY_SLEEP} }, {KE_KEY, 0x82, {KEY_TOUCHPAD_TOGGLE} }, /* Touch Pad On/Off */ + {KE_IGNORE, 0x83, {KEY_TOUCHPAD_TOGGLE} }, {KE_END, 0} }; @@ -1345,7 +1358,7 @@ static void acer_wmi_notify(u32 value, void *context) case WMID_HOTKEY_EVENT: if (return_value.device_state) { u16 device_state = return_value.device_state; - pr_debug("deivces states: 0x%x\n", device_state); + pr_debug("device state: 0x%x\n", device_state); if (has_cap(ACER_CAP_WIRELESS)) rfkill_set_sw_state(wireless_rfkill, !(device_state & ACER_WMID3_GDS_WIRELESS)); -- cgit v1.2.3-70-g09d2 From 5ddf9c5fa5a78036790ed72aaed3b84ee7dd4532 Mon Sep 17 00:00:00 2001 From: Weiping Pan Date: Mon, 16 May 2011 14:49:47 +0800 Subject: platform/x86:delete two unused variables variable handle is not used in these two functions, just delete them. Signed-off-by: Weiping Pan Signed-off-by: Matthew Garrett --- drivers/platform/x86/sony-laptop.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 12dd0a62f30..bbd182e178c 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -962,7 +962,6 @@ static int sony_backlight_get_brightness(struct backlight_device *bd) static int sony_nc_get_brightness_ng(struct backlight_device *bd) { int result; - int *handle = (int *)bl_get_data(bd); struct sony_backlight_props *sdev = (struct sony_backlight_props *)bl_get_data(bd); @@ -974,7 +973,6 @@ static int sony_nc_get_brightness_ng(struct backlight_device *bd) static int sony_nc_update_status_ng(struct backlight_device *bd) { int value, result; - int *handle = (int *)bl_get_data(bd); struct sony_backlight_props *sdev = (struct sony_backlight_props *)bl_get_data(bd); -- cgit v1.2.3-70-g09d2 From a8d1a266eee5f8b822449fe19d1735189377ef47 Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Sun, 22 May 2011 07:33:52 +0800 Subject: acer-wmi: check the existence of internal 3G device when set capability That will be better to check the existence of internal 3G device when we set threeg capability and generate killswitch for threeg. It can avoid userland access 3G rfkill but the machine doesn't have internal 3G device. Reference: bko#32862 https://bugzilla.kernel.org/show_bug.cgi?id=32862 Tested on Acer Aspire 8930G, Acer Travelmate 8572 Tested-by: Hector Martin Cc: Carlos Corbacho Cc: Matthew Garrett Cc: Dmitry Torokhov Cc: Corentin Chary Cc: Thomas Renninger Signed-off-by: Lee, Chun-Yi Signed-off-by: Matthew Garrett --- drivers/platform/x86/acer-wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 57c6c8a4db2..92ee4d63727 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -889,7 +889,8 @@ static acpi_status WMID_set_capabilities(void) dmi_walk(type_aa_dmi_decode, NULL); if (!has_type_aa) { interface->capability |= ACER_CAP_WIRELESS; - interface->capability |= ACER_CAP_THREEG; + if (devices & 0x40) + interface->capability |= ACER_CAP_THREEG; if (devices & 0x10) interface->capability |= ACER_CAP_BLUETOOTH; } -- cgit v1.2.3-70-g09d2 From ab6a931620cfa5c565b351d1982306c3c8b97f96 Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Sun, 22 May 2011 07:33:53 +0800 Subject: acer-wmi: allow 64-bits return buffer from WMI methods Acer WMID_GUID1/2 method's return buffer was declared to 64-bits on some Acer notebook, but WMI method only use 32-bits in return buffer. So, add this patch for allow 64-bits return buffer. Reference: bko#34142 https://bugzilla.kernel.org/show_bug.cgi?id=34142 Tested on Acer Travelmate 5735Z-452G32Mnss Tested-by: Melchior FRANZ Cc: Carlos Corbacho Cc: Matthew Garrett Cc: Dmitry Torokhov Cc: Corentin Chary Cc: Thomas Renninger Signed-off-by: Lee, Chun-Yi Signed-off-by: Matthew Garrett --- drivers/platform/x86/acer-wmi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 92ee4d63727..48219117823 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -750,7 +750,8 @@ WMI_execute_u32(u32 method_id, u32 in, u32 *out) obj = (union acpi_object *) result.pointer; if (obj && obj->type == ACPI_TYPE_BUFFER && - obj->buffer.length == sizeof(u32)) { + (obj->buffer.length == sizeof(u32) || + obj->buffer.length == sizeof(u64))) { tmp = *((u32 *) obj->buffer.pointer); } else { tmp = 0; @@ -879,7 +880,8 @@ static acpi_status WMID_set_capabilities(void) obj = (union acpi_object *) out.pointer; if (obj && obj->type == ACPI_TYPE_BUFFER && - obj->buffer.length == sizeof(u32)) { + (obj->buffer.length == sizeof(u32) || + obj->buffer.length == sizeof(u64))) { devices = *((u32 *) obj->buffer.pointer); } else { kfree(out.pointer); @@ -1522,7 +1524,8 @@ static u32 get_wmid_devices(void) obj = (union acpi_object *) out.pointer; if (obj && obj->type == ACPI_TYPE_BUFFER && - obj->buffer.length == sizeof(u32)) { + (obj->buffer.length == sizeof(u32) || + obj->buffer.length == sizeof(u64))) { devices = *((u32 *) obj->buffer.pointer); } -- cgit v1.2.3-70-g09d2 From 6d88ff0f8ef3529f68233d3963b2a7e07f8e4f69 Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Sun, 22 May 2011 07:33:54 +0800 Subject: acer-wmi: support to set communication device state by new wmid method Have many Acer notebooks' BIOS already support new WMID_GUID3 method. On those machines, that will be better set communication device by evaluate WMID_GUID3 method. Tested on Acer Travelmate 8572 Cc: Carlos Corbacho Cc: Matthew Garrett Cc: Dmitry Torokhov Cc: Corentin Chary Cc: Thomas Renninger Signed-off-by: Lee, Chun-Yi Signed-off-by: Matthew Garrett --- drivers/platform/x86/acer-wmi.c | 111 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 48219117823..5dc843bfc6c 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -135,6 +135,7 @@ struct event_return_value { */ #define ACER_WMID3_GDS_WIRELESS (1<<0) /* WiFi */ #define ACER_WMID3_GDS_THREEG (1<<6) /* 3G */ +#define ACER_WMID3_GDS_WIMAX (1<<7) /* WiMAX */ #define ACER_WMID3_GDS_BLUETOOTH (1<<11) /* BT */ struct lm_input_params { @@ -1142,6 +1143,114 @@ static acpi_status get_device_status(u32 *value, u32 cap) } } +static acpi_status wmid3_set_device_status(u32 value, u16 device) +{ + struct wmid3_gds_return_value return_value; + acpi_status status; + union acpi_object *obj; + u16 devices; + struct wmid3_gds_input_param params = { + .function_num = 0x1, + .hotkey_number = 0x01, + .devices = ACER_WMID3_GDS_WIRELESS & + ACER_WMID3_GDS_THREEG & + ACER_WMID3_GDS_WIMAX & + ACER_WMID3_GDS_BLUETOOTH, + }; + struct acpi_buffer input = { + sizeof(struct wmid3_gds_input_param), + ¶ms + }; + struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer output2 = { ACPI_ALLOCATE_BUFFER, NULL }; + + status = wmi_evaluate_method(WMID_GUID3, 0, 0x2, &input, &output); + if (ACPI_FAILURE(status)) + return status; + + obj = output.pointer; + + if (!obj) + return AE_ERROR; + else if (obj->type != ACPI_TYPE_BUFFER) { + kfree(obj); + return AE_ERROR; + } + if (obj->buffer.length != 8) { + pr_warning("Unknown buffer length %d\n", obj->buffer.length); + kfree(obj); + return AE_ERROR; + } + + return_value = *((struct wmid3_gds_return_value *)obj->buffer.pointer); + kfree(obj); + + if (return_value.error_code || return_value.ec_return_value) { + pr_warning("Get Current Device Status failed: " + "0x%x - 0x%x\n", return_value.error_code, + return_value.ec_return_value); + return status; + } + + devices = return_value.devices; + params.function_num = 0x2; + params.hotkey_number = 0x01; + params.devices = (value) ? (devices | device) : (devices & ~device); + + status = wmi_evaluate_method(WMID_GUID3, 0, 0x1, &input, &output2); + if (ACPI_FAILURE(status)) + return status; + + obj = output2.pointer; + + if (!obj) + return AE_ERROR; + else if (obj->type != ACPI_TYPE_BUFFER) { + kfree(obj); + return AE_ERROR; + } + if (obj->buffer.length != 4) { + pr_warning("Unknown buffer length %d\n", obj->buffer.length); + kfree(obj); + return AE_ERROR; + } + + return_value = *((struct wmid3_gds_return_value *)obj->buffer.pointer); + kfree(obj); + + if (return_value.error_code || return_value.ec_return_value) + pr_warning("Set Device Status failed: " + "0x%x - 0x%x\n", return_value.error_code, + return_value.ec_return_value); + + return status; +} + +static acpi_status set_device_status(u32 value, u32 cap) +{ + if (wmi_has_guid(WMID_GUID3)) { + u16 device; + + switch (cap) { + case ACER_CAP_WIRELESS: + device = ACER_WMID3_GDS_WIRELESS; + break; + case ACER_CAP_BLUETOOTH: + device = ACER_WMID3_GDS_BLUETOOTH; + break; + case ACER_CAP_THREEG: + device = ACER_WMID3_GDS_THREEG; + break; + default: + return AE_ERROR; + } + return wmid3_set_device_status(value, device); + + } else { + return set_u32(value, cap); + } +} + /* * Rfkill devices */ @@ -1178,7 +1287,7 @@ static int acer_rfkill_set(void *data, bool blocked) u32 cap = (unsigned long)data; if (rfkill_inited) { - status = set_u32(!blocked, cap); + status = set_device_status(!blocked, cap); if (ACPI_FAILURE(status)) return -ENODEV; } -- cgit v1.2.3-70-g09d2 From d436514e21b827ab602d1714028c34179c42d09f Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Thu, 26 May 2011 11:09:19 +0800 Subject: msi-laptop: fix section mismatch in reference from the function load_scm_model_init There have section mismatch warning message shows up when building the kernel with make CONFIG_DEBUG_SECTION_MISMATCH=y. The problem is the load_scm_model_init() calls msi_laptop_input_setup() which is an __init function, but load_scm_model_init() lacks a __init annotation. This patch add __init on load_scm_model_init() to avoid warning message. Cc: Matthew Garrett Cc: Dmitry Torokhov Cc: Corentin Chary Cc: Thomas Renninger Signed-off-by: Lee, Chun-Yi Signed-off-by: Matthew Garrett --- drivers/platform/x86/msi-laptop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index f89c0b6a9ae..d7213e40686 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -800,7 +800,7 @@ static void msi_laptop_input_destroy(void) input_unregister_device(msi_laptop_input_dev); } -static int load_scm_model_init(struct platform_device *sdev) +static int __init load_scm_model_init(struct platform_device *sdev) { u8 data; int result; -- cgit v1.2.3-70-g09d2 From 987dfbaa65b2c3568b85e29d2598da08a011ee09 Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Fri, 27 May 2011 14:52:14 +0800 Subject: acer-wmi: support integer return type from WMI methods Acer WMID_GUID1/2 method's return value was declared to integer type on Gateway notebook. So, add this patch for support integer return type. Reference: bko#33032 https://bugzilla.kernel.org/show_bug.cgi?id=33032 Tested on Gateway NV5909H laptop Tested-by: Filipus Klutiero Cc: Carlos Corbacho Cc: Matthew Garrett Cc: Dmitry Torokhov Cc: Corentin Chary Cc: Thomas Renninger Signed-off-by: Lee, Chun-Yi Signed-off-by: Matthew Garrett --- drivers/platform/x86/acer-wmi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 5dc843bfc6c..005417bd429 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -754,6 +754,8 @@ WMI_execute_u32(u32 method_id, u32 in, u32 *out) (obj->buffer.length == sizeof(u32) || obj->buffer.length == sizeof(u64))) { tmp = *((u32 *) obj->buffer.pointer); + } else if (obj->type == ACPI_TYPE_INTEGER) { + tmp = (u32) obj->integer.value; } else { tmp = 0; } @@ -884,6 +886,8 @@ static acpi_status WMID_set_capabilities(void) (obj->buffer.length == sizeof(u32) || obj->buffer.length == sizeof(u64))) { devices = *((u32 *) obj->buffer.pointer); + } else if (obj->type == ACPI_TYPE_INTEGER) { + devices = (u32) obj->integer.value; } else { kfree(out.pointer); return AE_ERROR; @@ -1636,6 +1640,8 @@ static u32 get_wmid_devices(void) (obj->buffer.length == sizeof(u32) || obj->buffer.length == sizeof(u64))) { devices = *((u32 *) obj->buffer.pointer); + } else if (obj->type == ACPI_TYPE_INTEGER) { + devices = (u32) obj->integer.value; } kfree(out.pointer); -- cgit v1.2.3-70-g09d2 From aac11c1b351413aa3412e258e2b2dcba31777209 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 24 May 2011 16:28:55 +0200 Subject: iwl4965: fix 5GHz operation rx_status.band is used uninitialized, what disallow to work on 5GHz . Cc: stable@kernel.org # 2.6.39+ Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/iwlegacy/iwl-4965-lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlegacy/iwl-4965-lib.c b/drivers/net/wireless/iwlegacy/iwl-4965-lib.c index 7e5e85a017b..a7a4739880d 100644 --- a/drivers/net/wireless/iwlegacy/iwl-4965-lib.c +++ b/drivers/net/wireless/iwlegacy/iwl-4965-lib.c @@ -628,11 +628,11 @@ void iwl4965_rx_reply_rx(struct iwl_priv *priv, /* rx_status carries information about the packet to mac80211 */ rx_status.mactime = le64_to_cpu(phy_res->timestamp); + rx_status.band = (phy_res->phy_flags & RX_RES_PHY_FLAGS_BAND_24_MSK) ? + IEEE80211_BAND_2GHZ : IEEE80211_BAND_5GHZ; rx_status.freq = ieee80211_channel_to_frequency(le16_to_cpu(phy_res->channel), rx_status.band); - rx_status.band = (phy_res->phy_flags & RX_RES_PHY_FLAGS_BAND_24_MSK) ? - IEEE80211_BAND_2GHZ : IEEE80211_BAND_5GHZ; rx_status.rate_idx = iwl4965_hwrate_to_mac80211_idx(rate_n_flags, rx_status.band); rx_status.flag = 0; -- cgit v1.2.3-70-g09d2 From 64bd0821a3b66c3307d7a4ee5523e3e35ec2df0e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 25 May 2011 09:44:05 +0800 Subject: wireless: Default to 'n' for 2 new added devices in Kconfig. We make oldconfig every time when a new kernel arrives, but if we don't have such a device(I guess this is the most common case for a new device), the default value should be 'n' so that the kernel size we build doesn't grow up too much quickly. For anyone who has the device, it is OK for them to turn it on by themselves. Cc: "John W. Linville" Cc: Johannes Berg Signed-off-by: Tao Ma Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/Kconfig | 1 - drivers/net/wireless/rt2x00/Kconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig index d9ff8413ab9..d9c08c619a3 100644 --- a/drivers/net/wireless/ath/ath9k/Kconfig +++ b/drivers/net/wireless/ath/ath9k/Kconfig @@ -26,7 +26,6 @@ config ATH9K config ATH9K_PCI bool "Atheros ath9k PCI/PCIe bus support" depends on ATH9K && PCI - default PCI ---help--- This option enables the PCI bus support in ath9k. diff --git a/drivers/net/wireless/rt2x00/Kconfig b/drivers/net/wireless/rt2x00/Kconfig index 9def1e5369a..b2f8b8fd4d2 100644 --- a/drivers/net/wireless/rt2x00/Kconfig +++ b/drivers/net/wireless/rt2x00/Kconfig @@ -166,7 +166,6 @@ config RT2800USB_RT35XX config RT2800USB_RT53XX bool "rt2800usb - Include support for rt53xx devices (EXPERIMENTAL)" depends on EXPERIMENTAL - default y ---help--- This adds support for rt53xx wireless chipset family to the rt2800pci driver. -- cgit v1.2.3-70-g09d2 From a331400bf01231253a0d9ab211c83212d2ac4edb Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Thu, 26 May 2011 11:46:37 +0300 Subject: mac80211: clear local->ps_data on disassoc local->ps_data wasn't cleared on disassociation, which (in some corner cases) caused reconnections to enter psm before association completed. Signed-off-by: Eliad Peller Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 746595597b6..456cccf26b5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1095,6 +1095,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, local->hw.conf.flags &= ~IEEE80211_CONF_PS; config_changed |= IEEE80211_CONF_CHANGE_PS; } + local->ps_sdata = NULL; ieee80211_hw_config(local, config_changed); -- cgit v1.2.3-70-g09d2 From 64c754ed3b0009e4fa248f739000dc234eb0d2c9 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 26 May 2011 10:53:17 +0200 Subject: mac80211: Remove duplicate linux/slab.h include from net/mac80211/scan.c Commit 79f460ca49d8d5700756ab7071c951311c7f29cc add a duplicate linux/slab.h include to net/mac80211/scan.c - remove it. Signed-off-by: Jesper Juhl Acked-by: Luciano Coelho Signed-off-by: John W. Linville --- net/mac80211/scan.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 27af6723cb5..58ffa7d069c 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3-70-g09d2 From 1df85ecec36ad5da3f0165760704310d6c03f65f Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 27 May 2011 01:08:04 +0800 Subject: ath9k: Fix AR9287 calibration The AR9287 calibration code was not being called because of an incorrect MAC revision check. This forced the AR9287 to use the AR9285 initial calibration code and bypass the AR9287 code entirely. Signed-off-by: Adrian Chadd Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9002_calib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9002_calib.c b/drivers/net/wireless/ath/ath9k/ar9002_calib.c index 015d9743993..2d4c0910295 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_calib.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_calib.c @@ -829,7 +829,7 @@ static bool ar9002_hw_init_cal(struct ath_hw *ah, struct ath9k_channel *chan) if (AR_SREV_9271(ah)) { if (!ar9285_hw_cl_cal(ah, chan)) return false; - } else if (AR_SREV_9285_12_OR_LATER(ah)) { + } else if (AR_SREV_9285(ah) && AR_SREV_9285_12_OR_LATER(ah)) { if (!ar9285_hw_clc(ah, chan)) return false; } else { -- cgit v1.2.3-70-g09d2 From 646aaea615704010b5fd2c8c8891ff1a3a4b4f1a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 May 2011 11:00:41 -0300 Subject: perf tools: Make sure kptr_restrict warnings fit 80 col terms Suggested-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Link: http://lkml.kernel.org/n/tip-i1p8vrhq7xveyui6t1sc914e@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 19 ++++++++----------- tools/perf/builtin-report.c | 17 +++++++---------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2ca107f3efd..8e2c8579818 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -824,17 +824,14 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) symbol__init(); if (symbol_conf.kptr_restrict) - pr_warning("WARNING: Kernel address maps " - "(/proc/{kallsyms,modules}) are restricted, " - "check /proc/sys/kernel/kptr_restrict.\n\n" - "Samples in kernel functions may not be resolved " - "if a suitable vmlinux file is not found in the " - "buildid cache or in the vmlinux path.\n\n" - "Samples in kernel modules won't be resolved " - "at all.\n\n" - "If some relocation was applied (e.g. kexec) " - "symbols may be misresolved even with a suitable " - "vmlinux or kallsyms file.\n\n"); + pr_warning( +"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" +"check /proc/sys/kernel/kptr_restrict.\n\n" +"Samples in kernel functions may not be resolved if a suitable vmlinux\n" +"file is not found in the buildid cache or in the vmlinux path.\n\n" +"Samples in kernel modules won't be resolved at all.\n\n" +"If some relocation was applied (e.g. kexec) symbols may be misresolved\n" +"even with a suitable vmlinux or kallsyms file.\n\n"); if (no_buildid_cache || no_buildid) disable_buildid_cache(); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 99156c35bc6..287a173523a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -281,17 +281,14 @@ static int __cmd_report(void) kernel_kmap->ref_reloc_sym->addr == 0))) { const struct dso *kdso = kernel_map->dso; - ui__warning("Kernel address maps " - "(/proc/{kallsyms,modules}) were restricted, " - "check /proc/sys/kernel/kptr_restrict before " - "running 'perf record'.\n\n%s\n\n" - "Samples in kernel modules can't be resolved " - "as well.\n\n", + ui__warning( +"Kernel address maps (/proc/{kallsyms,modules}) were restricted.\n\n" +"Check /proc/sys/kernel/kptr_restrict before running 'perf record'.\n\n%s\n\n" +"Samples in kernel modules can't be resolved as well.\n\n", RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION]) ? - "As no suitable kallsyms nor vmlinux was found, " - "kernel samples can't be resolved." : - "If some relocation was applied (e.g. kexec) " - "symbols may be misresolved."); +"As no suitable kallsyms nor vmlinux was found, kernel samples\n" +"can't be resolved." : +"If some relocation was applied (e.g. kexec) symbols may be misresolved."); } if (dump_trace) { -- cgit v1.2.3-70-g09d2 From 4af4c9550ccaaf0b53013ff730bc15068ffe6abc Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 27 May 2011 09:58:34 -0600 Subject: perf events: initialize fd array to -1 instead of 0 perf_evsel__alloc_fd allocates an array of file descriptors with the memory initialized to 0. The array has dimensions for cpus and threads. Later, __perf_evsel__open calls sys_perf_event_open for each cpu and thread dimensions. If the open fails for any of the cpus or threads then the fd's for this event are closed and the fd entry in the array is set to -1. Now, if the first attempt fails for the event (e.g., the event is not supported) the remaining dimensions (cpu > 0 and thread > 0) are not touched and left at the initialized value of 0. builtin-stat catches ENOENT and ENOSYS failures and allows the command to continue. The end result is that stat attempts to read from an fd of 0 which of course is stdin and so the command hangs until you type ctrl-D. Resolve by initializing the array to -1 since an fd < 0 is already handled. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1306511914-8016-1-git-send-email-dsahern@gmail.com Signed-off-by: David Ahern Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ee0fe0dffa7..cca29ededb5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -35,7 +35,17 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { + int cpu, thread; evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); + + if (evsel->fd) { + for (cpu = 0; cpu < ncpus; cpu++) { + for (thread = 0; thread < nthreads; thread++) { + FD(evsel, cpu, thread) = -1; + } + } + } + return evsel->fd != NULL ? 0 : -ENOMEM; } -- cgit v1.2.3-70-g09d2 From 59fb1ee95e74e8e0777289c44300cbe812aca836 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 May 2011 11:14:00 -0300 Subject: perf top: Remove unused macro Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Link: http://lkml.kernel.org/n/tip-weqbs0tkk2u0qp1xxdxxosfg@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2d7934e9de3..375ed160d93 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -62,8 +62,6 @@ #include #include -#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) - static struct perf_top top = { .count_filter = 5, .delay_secs = 2, -- cgit v1.2.3-70-g09d2 From 5f6f55809758e106eca72c6e01402c8080a88ee8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 May 2011 11:53:28 -0300 Subject: perf top: Handle kptr_restrict Reported-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Link: http://lkml.kernel.org/n/tip-cyl5zmi1nu35vyu7l5im2pyv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 375ed160d93..472f6279002 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -80,6 +80,7 @@ static bool use_tui, use_stdio; static int default_interval = 0; +static bool kptr_restrict_warned; static bool inherit = false; static int realtime_prio = 0; static bool group = false; @@ -738,6 +739,20 @@ static void perf_event__process_sample(const union perf_event *event, al.filtered) return; + if (!kptr_restrict_warned && + symbol_conf.kptr_restrict && + al.cpumode == PERF_RECORD_MISC_KERNEL) { + ui__warning( +"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" +"Check /proc/sys/kernel/kptr_restrict.\n\n" +"Kernel%s samples will not be resolved.\n", + !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? + " modules" : ""); + if (use_browser <= 0) + sleep(5); + kptr_restrict_warned = true; + } + if (al.sym == NULL) { /* * As we do lazy loading of symtabs we only will know if the -- cgit v1.2.3-70-g09d2 From e4a338d05df93ab1ebf291aca1e753064319d301 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 May 2011 13:42:16 -0300 Subject: perf top: Don't stop if no kernel symtab is found We now just warn the user about the fact and go on providing just userspace samples. This fixes a problem when no vmlinux is explicetely passed by the user, thus symbol_conf.vmlinux_name is NULL, no suitable vmlinux is found, and then we get: aldebaran:~> perf top -p 7557 [kernel.kallsyms] with build id 44d9a989eabbd79e486bc079d6b743d397c204e0 not found, continuing without symbols The (null) file can't be used Reported-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Link: http://lkml.kernel.org/n/tip-cj2g81hn64wv2bipmqk4fy2m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 472f6279002..f2f3f4937aa 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -81,6 +81,7 @@ static bool use_tui, use_stdio; static int default_interval = 0; static bool kptr_restrict_warned; +static bool vmlinux_warned; static bool inherit = false; static int realtime_prio = 0; static bool group = false; @@ -754,6 +755,7 @@ static void perf_event__process_sample(const union perf_event *event, } if (al.sym == NULL) { + const char *msg = "Kernel samples will not be resolved.\n"; /* * As we do lazy loading of symtabs we only will know if the * specified vmlinux file is invalid when we actually have a @@ -765,12 +767,20 @@ static void perf_event__process_sample(const union perf_event *event, * --hide-kernel-symbols, even if the user specifies an * invalid --vmlinux ;-) */ - if (al.map == machine->vmlinux_maps[MAP__FUNCTION] && + if (!kptr_restrict_warned && !vmlinux_warned && + al.map == machine->vmlinux_maps[MAP__FUNCTION] && RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { - ui__warning("The %s file can't be used\n", - symbol_conf.vmlinux_name); - exit_browser(0); - exit(1); + if (symbol_conf.vmlinux_name) { + ui__warning("The %s file can't be used.\n%s", + symbol_conf.vmlinux_name, msg); + } else { + ui__warning("A vmlinux file was not found.\n%s", + msg); + } + + if (use_browser <= 0) + sleep(5); + vmlinux_warned = true; } return; -- cgit v1.2.3-70-g09d2 From 69e848c2090aebba5698a1620604c7dccb448684 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Tue, 22 Feb 2011 09:57:15 -0800 Subject: Intel xhci: Support EHCI/xHCI port switching. The Intel Panther Point chipsets contain an EHCI and xHCI host controller that shares some number of skew-dependent ports. These ports can be switched from the EHCI to the xHCI host (and vice versa) by a hardware MUX that is controlled by registers in the xHCI PCI configuration space. The USB 3.0 SuperSpeed terminations on the xHCI ports can be controlled separately from the USB 2.0 data wires. This switchover mechanism is there to support users who do a custom install of certain non-Linux operating systems that don't have official USB 3.0 support. By default, the ports are under EHCI, SuperSpeed terminations are off, and USB 3.0 devices will show up under the EHCI controller at reduced speeds. (This was more palatable for the marketing folks than having completely dead USB 3.0 ports if no xHCI drivers are available.) Users should be able to turn on xHCI by default through a BIOS option, but users are happiest when they don't have to change random BIOS settings. This patch introduces a driver method to switchover the ports from EHCI to xHCI before the EHCI driver finishes PCI enumeration. We want to switch the ports over before the USB core has the chance to enumerate devices under EHCI, or boot from USB mass storage will fail if the boot device connects under EHCI first, and then gets disconnected when the port switches over to xHCI. Add code to the xHCI PCI quirk to switch the ports from EHCI to xHCI. The PCI quirks code will run before any other PCI probe function is called, so this avoids the issue with boot devices. Another issue is with BIOS behavior during system resume from hibernate. If the BIOS doesn't support xHCI, it may switch the devices under EHCI to allow use of the USB keyboard, mice, and mass storage devices. It's supposed to remember the value of the port routing registers and switch them back when the OS attempts to take control of the xHCI host controller, but we all know not to trust BIOS writers. Make both the xHCI driver and the EHCI driver attempt to switchover the ports in their PCI resume functions. We can't guarantee which PCI device will be resumed first, so this avoids any race conditions. Writing a '1' to an already set port switchover bit or a '0' to a cleared port switchover bit should have no effect. The xHCI PCI configuration registers will be documented in the EDS-level chipset spec, which is not public yet. I have permission from legal and the Intel chipset group to release this patch early to allow good Linux support at product launch. I've tried to document the registers as much as possible, so please let me know if anything is unclear. Signed-off-by: Sarah Sharp --- drivers/usb/host/ehci-pci.c | 39 +++++++++++++++++++++++++++ drivers/usb/host/pci-quirks.c | 63 +++++++++++++++++++++++++++++++++++++++++++ drivers/usb/host/pci-quirks.h | 2 ++ drivers/usb/host/xhci-pci.c | 20 ++++++++++++++ 4 files changed, 124 insertions(+) diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 660b80a75ca..1102ce65a3a 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -348,11 +348,50 @@ static int ehci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) return rc; } +static bool usb_is_intel_switchable_ehci(struct pci_dev *pdev) +{ + return pdev->class == PCI_CLASS_SERIAL_USB_EHCI && + pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == 0x1E26; +} + +static void ehci_enable_xhci_companion(void) +{ + struct pci_dev *companion = NULL; + + /* The xHCI and EHCI controllers are not on the same PCI slot */ + for_each_pci_dev(companion) { + if (!usb_is_intel_switchable_xhci(companion)) + continue; + usb_enable_xhci_ports(companion); + return; + } +} + static int ehci_pci_resume(struct usb_hcd *hcd, bool hibernated) { struct ehci_hcd *ehci = hcd_to_ehci(hcd); struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + /* The BIOS on systems with the Intel Panther Point chipset may or may + * not support xHCI natively. That means that during system resume, it + * may switch the ports back to EHCI so that users can use their + * keyboard to select a kernel from GRUB after resume from hibernate. + * + * The BIOS is supposed to remember whether the OS had xHCI ports + * enabled before resume, and switch the ports back to xHCI when the + * BIOS/OS semaphore is written, but we all know we can't trust BIOS + * writers. + * + * Unconditionally switch the ports back to xHCI after a system resume. + * We can't tell whether the EHCI or xHCI controller will be resumed + * first, so we have to do the port switchover in both drivers. Writing + * a '1' to the port switchover registers should have no effect if the + * port was already switched over. + */ + if (usb_is_intel_switchable_ehci(pdev)) + ehci_enable_xhci_companion(); + // maybe restore FLADJ if (time_before(jiffies, ehci->next_statechange)) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index f16c59d5f48..fd930618c28 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -69,6 +69,9 @@ #define NB_PIF0_PWRDOWN_0 0x01100012 #define NB_PIF0_PWRDOWN_1 0x01100013 +#define USB_INTEL_XUSB2PR 0xD0 +#define USB_INTEL_USB3_PSSEN 0xD8 + static struct amd_chipset_info { struct pci_dev *nb_dev; struct pci_dev *smbus_dev; @@ -673,6 +676,64 @@ static int handshake(void __iomem *ptr, u32 mask, u32 done, return -ETIMEDOUT; } +bool usb_is_intel_switchable_xhci(struct pci_dev *pdev) +{ + return pdev->class == PCI_CLASS_SERIAL_USB_XHCI && + pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI; +} +EXPORT_SYMBOL_GPL(usb_is_intel_switchable_xhci); + +/* + * Intel's Panther Point chipset has two host controllers (EHCI and xHCI) that + * share some number of ports. These ports can be switched between either + * controller. Not all of the ports under the EHCI host controller may be + * switchable. + * + * The ports should be switched over to xHCI before PCI probes for any device + * start. This avoids active devices under EHCI being disconnected during the + * port switchover, which could cause loss of data on USB storage devices, or + * failed boot when the root file system is on a USB mass storage device and is + * enumerated under EHCI first. + * + * We write into the xHC's PCI configuration space in some Intel-specific + * registers to switch the ports over. The USB 3.0 terminations and the USB + * 2.0 data wires are switched separately. We want to enable the SuperSpeed + * terminations before switching the USB 2.0 wires over, so that USB 3.0 + * devices connect at SuperSpeed, rather than at USB 2.0 speeds. + */ +void usb_enable_xhci_ports(struct pci_dev *xhci_pdev) +{ + u32 ports_available; + + ports_available = 0xffffffff; + /* Write USB3_PSSEN, the USB 3.0 Port SuperSpeed Enable + * Register, to turn on SuperSpeed terminations for all + * available ports. + */ + pci_write_config_dword(xhci_pdev, USB_INTEL_USB3_PSSEN, + cpu_to_le32(ports_available)); + + pci_read_config_dword(xhci_pdev, USB_INTEL_USB3_PSSEN, + &ports_available); + dev_dbg(&xhci_pdev->dev, "USB 3.0 ports that are now enabled " + "under xHCI: 0x%x\n", ports_available); + + ports_available = 0xffffffff; + /* Write XUSB2PR, the xHC USB 2.0 Port Routing Register, to + * switch the USB 2.0 power and data lines over to the xHCI + * host. + */ + pci_write_config_dword(xhci_pdev, USB_INTEL_XUSB2PR, + cpu_to_le32(ports_available)); + + pci_read_config_dword(xhci_pdev, USB_INTEL_XUSB2PR, + &ports_available); + dev_dbg(&xhci_pdev->dev, "USB 2.0 ports that are now switched over " + "to xHCI: 0x%x\n", ports_available); +} +EXPORT_SYMBOL_GPL(usb_enable_xhci_ports); + /** * PCI Quirks for xHCI. * @@ -732,6 +793,8 @@ static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev) writel(XHCI_LEGACY_DISABLE_SMI, base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET); + if (usb_is_intel_switchable_xhci(pdev)) + usb_enable_xhci_ports(pdev); hc_init: op_reg_base = base + XHCI_HC_LENGTH(readl(base)); diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h index 6ae9f78e993..b1002a8ef96 100644 --- a/drivers/usb/host/pci-quirks.h +++ b/drivers/usb/host/pci-quirks.h @@ -8,6 +8,8 @@ int usb_amd_find_chipset_info(void); void usb_amd_dev_put(void); void usb_amd_quirk_pll_disable(void); void usb_amd_quirk_pll_enable(void); +bool usb_is_intel_switchable_xhci(struct pci_dev *pdev); +void usb_enable_xhci_ports(struct pci_dev *xhci_pdev); #else static inline void usb_amd_quirk_pll_disable(void) {} static inline void usb_amd_quirk_pll_enable(void) {} diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index cbc4d491e62..faf039ac657 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -242,8 +242,28 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); int retval = 0; + /* The BIOS on systems with the Intel Panther Point chipset may or may + * not support xHCI natively. That means that during system resume, it + * may switch the ports back to EHCI so that users can use their + * keyboard to select a kernel from GRUB after resume from hibernate. + * + * The BIOS is supposed to remember whether the OS had xHCI ports + * enabled before resume, and switch the ports back to xHCI when the + * BIOS/OS semaphore is written, but we all know we can't trust BIOS + * writers. + * + * Unconditionally switch the ports back to xHCI after a system resume. + * We can't tell whether the EHCI or xHCI controller will be resumed + * first, so we have to do the port switchover in both drivers. Writing + * a '1' to the port switchover registers should have no effect if the + * port was already switched over. + */ + if (usb_is_intel_switchable_xhci(pdev)) + usb_enable_xhci_ports(pdev); + retval = xhci_resume(xhci, hibernated); return retval; } -- cgit v1.2.3-70-g09d2 From ad808333d8201d53075a11bc8dd83b81f3d68f0b Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Wed, 25 May 2011 10:43:56 -0700 Subject: Intel xhci: Ignore spurious successful event. The xHCI host controller in the Panther Point chipset sometimes produces spurious events on the event ring. If it receives a short packet, it first puts a Transfer Event with a short transfer completion code on the event ring. Then it puts a Transfer Event with a successful completion code on the ring for the same TD. The xHCI driver correctly processes the short transfer completion code, gives the URB back to the driver, and then prints a warning in dmesg about the spurious event. These warning messages really fill up dmesg when an HD webcam is plugged into xHCI. This spurious successful event behavior isn't technically disallowed by the xHCI specification, so make the xHCI driver just ignore the spurious completion event. Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci-pci.c | 4 ++++ drivers/usb/host/xhci-ring.c | 14 ++++++++++++++ drivers/usb/host/xhci.h | 2 ++ 3 files changed, 20 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index faf039ac657..eafd17fae94 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -118,6 +118,10 @@ static int xhci_pci_setup(struct usb_hcd *hcd) /* AMD PLL quirk */ if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info()) xhci->quirks |= XHCI_AMD_PLL_FIX; + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) { + xhci->quirks |= XHCI_SPURIOUS_SUCCESS; + } /* Make sure the HC is halted. */ retval = xhci_halt(xhci); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index d1498c03c4c..56f6c584c65 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2061,6 +2061,16 @@ static int handle_tx_event(struct xhci_hcd *xhci, if (!event_seg) { if (!ep->skip || !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + /* Some host controllers give a spurious + * successful event after a short transfer. + * Ignore it. + */ + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + ep_ring->last_td_was_short) { + ep_ring->last_td_was_short = false; + ret = 0; + goto cleanup; + } /* HC is busted, give up! */ xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " @@ -2071,6 +2081,10 @@ static int handle_tx_event(struct xhci_hcd *xhci, ret = skip_isoc_td(xhci, td, event, ep, &status); goto cleanup; } + if (trb_comp_code == COMP_SHORT_TX) + ep_ring->last_td_was_short = true; + else + ep_ring->last_td_was_short = false; if (ep->skip) { xhci_dbg(xhci, "Found td. Clear skip flag.\n"); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 0772a8cfea2..5cfeb8614b8 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1123,6 +1123,7 @@ struct xhci_ring { */ u32 cycle_state; unsigned int stream_id; + bool last_td_was_short; }; struct xhci_erst_entry { @@ -1290,6 +1291,7 @@ struct xhci_hcd { #define XHCI_RESET_EP_QUIRK (1 << 1) #define XHCI_NEC_HOST (1 << 2) #define XHCI_AMD_PLL_FIX (1 << 3) +#define XHCI_SPURIOUS_SUCCESS (1 << 4) /* There are two roothubs to keep track of bus suspend info for */ struct xhci_bus_state bus_state[2]; /* Is each xHCI roothub port a USB 3.0, USB 2.0, or USB 1.1 port? */ -- cgit v1.2.3-70-g09d2 From 2cf95c18d5069e13c02a8667d91e064df8e17e09 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Wed, 11 May 2011 16:14:58 -0700 Subject: Intel xhci: Limit number of active endpoints to 64. The Panther Point chipset has an xHCI host controller that has a limit to the number of active endpoints it can handle. Ideally, it would signal that it can't handle anymore endpoints by returning a Resource Error for the Configure Endpoint command, but they don't. Instead it needs software to keep track of the number of active endpoints, across configure endpoint commands, reset device commands, disable slot commands, and address device commands. Add a new endpoint context counter, xhci_hcd->num_active_eps, and use it to track the number of endpoints the xHC has active. This gets a little tricky, because commands to change the number of active endpoints can fail. This patch adds a new xHCI quirk for these Intel hosts, and the new code should not have any effect on other xHCI host controllers. Fail a new device allocation if we don't have room for the new default control endpoint. Use the endpoint ring pointers to determine what endpoints were active before a Reset Device command or a Disable Slot command, and drop those once the command completes. Fail a configure endpoint command if it would add too many new endpoints. We have to be a bit over zealous here, and only count the number of new endpoints to be added, without subtracting the number of dropped endpoints. That's because a second configure endpoint command for a different device could sneak in before we know if the first command is completed. If the first command dropped resources, the host controller fails the command for some reason, and we're nearing the limit of endpoints, we could end up oversubscribing the host. To fix this race condition, when evaluating whether a configure endpoint command will fix in our bandwidth budget, only add the new endpoints to xhci->num_active_eps, and don't subtract the dropped endpoints. Ignore changed endpoints (ones that are dropped and then re-added), as that shouldn't effect the host's endpoint resources. When the configure endpoint command completes, subtract off the dropped endpoints. This may mean some configuration changes may temporarily fail, but it's always better to under-subscribe than over-subscribe resources. (Originally my plan had been to push the resource allocation down into the ring allocation functions. However, that would cause us to allocate unnecessary resources when endpoints were changed, because the xHCI driver allocates a new ring for the changed endpoint, and only deletes the old ring once the Configure Endpoint command succeeds. A further complication would have been dealing with the per-device endpoint ring cache.) Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci-pci.c | 2 + drivers/usb/host/xhci-ring.c | 7 +- drivers/usb/host/xhci.c | 232 +++++++++++++++++++++++++++++++++++++++++-- drivers/usb/host/xhci.h | 14 +++ 4 files changed, 244 insertions(+), 11 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index eafd17fae94..c408e9f6a70 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -121,6 +121,8 @@ static int xhci_pci_setup(struct usb_hcd *hcd) if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) { xhci->quirks |= XHCI_SPURIOUS_SUCCESS; + xhci->quirks |= XHCI_EP_LIMIT_QUIRK; + xhci->limit_active_eps = 64; } /* Make sure the HC is halted. */ diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 56f6c584c65..cc1485bfed3 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1081,8 +1081,13 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, complete(&xhci->addr_dev); break; case TRB_TYPE(TRB_DISABLE_SLOT): - if (xhci->devs[slot_id]) + if (xhci->devs[slot_id]) { + if (xhci->quirks & XHCI_EP_LIMIT_QUIRK) + /* Delete default control endpoint resources */ + xhci_free_device_endpoint_resources(xhci, + xhci->devs[slot_id], true); xhci_free_virt_device(xhci, slot_id); + } break; case TRB_TYPE(TRB_CONFIG_EP): virt_dev = xhci->devs[slot_id]; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 58183d2a808..d9660eb97eb 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1582,6 +1582,113 @@ static int xhci_evaluate_context_result(struct xhci_hcd *xhci, return ret; } +static u32 xhci_count_num_new_endpoints(struct xhci_hcd *xhci, + struct xhci_container_ctx *in_ctx) +{ + struct xhci_input_control_ctx *ctrl_ctx; + u32 valid_add_flags; + u32 valid_drop_flags; + + ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx); + /* Ignore the slot flag (bit 0), and the default control endpoint flag + * (bit 1). The default control endpoint is added during the Address + * Device command and is never removed until the slot is disabled. + */ + valid_add_flags = ctrl_ctx->add_flags >> 2; + valid_drop_flags = ctrl_ctx->drop_flags >> 2; + + /* Use hweight32 to count the number of ones in the add flags, or + * number of endpoints added. Don't count endpoints that are changed + * (both added and dropped). + */ + return hweight32(valid_add_flags) - + hweight32(valid_add_flags & valid_drop_flags); +} + +static unsigned int xhci_count_num_dropped_endpoints(struct xhci_hcd *xhci, + struct xhci_container_ctx *in_ctx) +{ + struct xhci_input_control_ctx *ctrl_ctx; + u32 valid_add_flags; + u32 valid_drop_flags; + + ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx); + valid_add_flags = ctrl_ctx->add_flags >> 2; + valid_drop_flags = ctrl_ctx->drop_flags >> 2; + + return hweight32(valid_drop_flags) - + hweight32(valid_add_flags & valid_drop_flags); +} + +/* + * We need to reserve the new number of endpoints before the configure endpoint + * command completes. We can't subtract the dropped endpoints from the number + * of active endpoints until the command completes because we can oversubscribe + * the host in this case: + * + * - the first configure endpoint command drops more endpoints than it adds + * - a second configure endpoint command that adds more endpoints is queued + * - the first configure endpoint command fails, so the config is unchanged + * - the second command may succeed, even though there isn't enough resources + * + * Must be called with xhci->lock held. + */ +static int xhci_reserve_host_resources(struct xhci_hcd *xhci, + struct xhci_container_ctx *in_ctx) +{ + u32 added_eps; + + added_eps = xhci_count_num_new_endpoints(xhci, in_ctx); + if (xhci->num_active_eps + added_eps > xhci->limit_active_eps) { + xhci_dbg(xhci, "Not enough ep ctxs: " + "%u active, need to add %u, limit is %u.\n", + xhci->num_active_eps, added_eps, + xhci->limit_active_eps); + return -ENOMEM; + } + xhci->num_active_eps += added_eps; + xhci_dbg(xhci, "Adding %u ep ctxs, %u now active.\n", added_eps, + xhci->num_active_eps); + return 0; +} + +/* + * The configure endpoint was failed by the xHC for some other reason, so we + * need to revert the resources that failed configuration would have used. + * + * Must be called with xhci->lock held. + */ +static void xhci_free_host_resources(struct xhci_hcd *xhci, + struct xhci_container_ctx *in_ctx) +{ + u32 num_failed_eps; + + num_failed_eps = xhci_count_num_new_endpoints(xhci, in_ctx); + xhci->num_active_eps -= num_failed_eps; + xhci_dbg(xhci, "Removing %u failed ep ctxs, %u now active.\n", + num_failed_eps, + xhci->num_active_eps); +} + +/* + * Now that the command has completed, clean up the active endpoint count by + * subtracting out the endpoints that were dropped (but not changed). + * + * Must be called with xhci->lock held. + */ +static void xhci_finish_resource_reservation(struct xhci_hcd *xhci, + struct xhci_container_ctx *in_ctx) +{ + u32 num_dropped_eps; + + num_dropped_eps = xhci_count_num_dropped_endpoints(xhci, in_ctx); + xhci->num_active_eps -= num_dropped_eps; + if (num_dropped_eps) + xhci_dbg(xhci, "Removing %u dropped ep ctxs, %u now active.\n", + num_dropped_eps, + xhci->num_active_eps); +} + /* Issue a configure endpoint command or evaluate context command * and wait for it to finish. */ @@ -1602,6 +1709,15 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, virt_dev = xhci->devs[udev->slot_id]; if (command) { in_ctx = command->in_ctx; + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK) && + xhci_reserve_host_resources(xhci, in_ctx)) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_warn(xhci, "Not enough host resources, " + "active endpoint contexts = %u\n", + xhci->num_active_eps); + return -ENOMEM; + } + cmd_completion = command->completion; cmd_status = &command->status; command->command_trb = xhci->cmd_ring->enqueue; @@ -1617,6 +1733,14 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, list_add_tail(&command->cmd_list, &virt_dev->cmd_list); } else { in_ctx = virt_dev->in_ctx; + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK) && + xhci_reserve_host_resources(xhci, in_ctx)) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_warn(xhci, "Not enough host resources, " + "active endpoint contexts = %u\n", + xhci->num_active_eps); + return -ENOMEM; + } cmd_completion = &virt_dev->cmd_completion; cmd_status = &virt_dev->cmd_status; } @@ -1631,6 +1755,8 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, if (ret < 0) { if (command) list_del(&command->cmd_list); + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) + xhci_free_host_resources(xhci, in_ctx); spin_unlock_irqrestore(&xhci->lock, flags); xhci_dbg(xhci, "FIXME allocate a new ring segment\n"); return -ENOMEM; @@ -1653,8 +1779,22 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, } if (!ctx_change) - return xhci_configure_endpoint_result(xhci, udev, cmd_status); - return xhci_evaluate_context_result(xhci, udev, cmd_status); + ret = xhci_configure_endpoint_result(xhci, udev, cmd_status); + else + ret = xhci_evaluate_context_result(xhci, udev, cmd_status); + + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) { + spin_lock_irqsave(&xhci->lock, flags); + /* If the command failed, remove the reserved resources. + * Otherwise, clean up the estimate to include dropped eps. + */ + if (ret) + xhci_free_host_resources(xhci, in_ctx); + else + xhci_finish_resource_reservation(xhci, in_ctx); + spin_unlock_irqrestore(&xhci->lock, flags); + } + return ret; } /* Called after one or more calls to xhci_add_endpoint() or @@ -2271,6 +2411,34 @@ int xhci_free_streams(struct usb_hcd *hcd, struct usb_device *udev, return 0; } +/* + * Deletes endpoint resources for endpoints that were active before a Reset + * Device command, or a Disable Slot command. The Reset Device command leaves + * the control endpoint intact, whereas the Disable Slot command deletes it. + * + * Must be called with xhci->lock held. + */ +void xhci_free_device_endpoint_resources(struct xhci_hcd *xhci, + struct xhci_virt_device *virt_dev, bool drop_control_ep) +{ + int i; + unsigned int num_dropped_eps = 0; + unsigned int drop_flags = 0; + + for (i = (drop_control_ep ? 0 : 1); i < 31; i++) { + if (virt_dev->eps[i].ring) { + drop_flags |= 1 << i; + num_dropped_eps++; + } + } + xhci->num_active_eps -= num_dropped_eps; + if (num_dropped_eps) + xhci_dbg(xhci, "Dropped %u ep ctxs, flags = 0x%x, " + "%u now active.\n", + num_dropped_eps, drop_flags, + xhci->num_active_eps); +} + /* * This submits a Reset Device Command, which will set the device state to 0, * set the device address to 0, and disable all the endpoints except the default @@ -2412,6 +2580,14 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) goto command_cleanup; } + /* Free up host controller endpoint resources */ + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) { + spin_lock_irqsave(&xhci->lock, flags); + /* Don't delete the default control endpoint resources */ + xhci_free_device_endpoint_resources(xhci, virt_dev, false); + spin_unlock_irqrestore(&xhci->lock, flags); + } + /* Everything but endpoint 0 is disabled, so free or cache the rings. */ last_freed_endpoint = 1; for (i = 1; i < 31; ++i) { @@ -2484,6 +2660,27 @@ void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) */ } +/* + * Checks if we have enough host controller resources for the default control + * endpoint. + * + * Must be called with xhci->lock held. + */ +static int xhci_reserve_host_control_ep_resources(struct xhci_hcd *xhci) +{ + if (xhci->num_active_eps + 1 > xhci->limit_active_eps) { + xhci_dbg(xhci, "Not enough ep ctxs: " + "%u active, need to add 1, limit is %u.\n", + xhci->num_active_eps, xhci->limit_active_eps); + return -ENOMEM; + } + xhci->num_active_eps += 1; + xhci_dbg(xhci, "Adding 1 ep ctx, %u now active.\n", + xhci->num_active_eps); + return 0; +} + + /* * Returns 0 if the xHC ran out of device slots, the Enable Slot command * timed out, or allocating memory failed. Returns 1 on success. @@ -2519,24 +2716,39 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) xhci_err(xhci, "Error while assigning device slot ID\n"); return 0; } - /* xhci_alloc_virt_device() does not touch rings; no need to lock. - * Use GFP_NOIO, since this function can be called from + + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) { + spin_lock_irqsave(&xhci->lock, flags); + ret = xhci_reserve_host_control_ep_resources(xhci); + if (ret) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_warn(xhci, "Not enough host resources, " + "active endpoint contexts = %u\n", + xhci->num_active_eps); + goto disable_slot; + } + spin_unlock_irqrestore(&xhci->lock, flags); + } + /* Use GFP_NOIO, since this function can be called from * xhci_discover_or_reset_device(), which may be called as part of * mass storage driver error handling. */ if (!xhci_alloc_virt_device(xhci, xhci->slot_id, udev, GFP_NOIO)) { - /* Disable slot, if we can do it without mem alloc */ xhci_warn(xhci, "Could not allocate xHCI USB device data structures\n"); - spin_lock_irqsave(&xhci->lock, flags); - if (!xhci_queue_slot_control(xhci, TRB_DISABLE_SLOT, udev->slot_id)) - xhci_ring_cmd_db(xhci); - spin_unlock_irqrestore(&xhci->lock, flags); - return 0; + goto disable_slot; } udev->slot_id = xhci->slot_id; /* Is this a LS or FS device under a HS hub? */ /* Hub or peripherial? */ return 1; + +disable_slot: + /* Disable slot, if we can do it without mem alloc */ + spin_lock_irqsave(&xhci->lock, flags); + if (!xhci_queue_slot_control(xhci, TRB_DISABLE_SLOT, udev->slot_id)) + xhci_ring_cmd_db(xhci); + spin_unlock_irqrestore(&xhci->lock, flags); + return 0; } /* diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 5cfeb8614b8..ac0196e7fcf 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1292,6 +1292,18 @@ struct xhci_hcd { #define XHCI_NEC_HOST (1 << 2) #define XHCI_AMD_PLL_FIX (1 << 3) #define XHCI_SPURIOUS_SUCCESS (1 << 4) +/* + * Certain Intel host controllers have a limit to the number of endpoint + * contexts they can handle. Ideally, they would signal that they can't handle + * anymore endpoint contexts by returning a Resource Error for the Configure + * Endpoint command, but they don't. Instead they expect software to keep track + * of the number of active endpoints for them, across configure endpoint + * commands, reset device commands, disable slot commands, and address device + * commands. + */ +#define XHCI_EP_LIMIT_QUIRK (1 << 5) + unsigned int num_active_eps; + unsigned int limit_active_eps; /* There are two roothubs to keep track of bus suspend info for */ struct xhci_bus_state bus_state[2]; /* Is each xHCI roothub port a USB 3.0, USB 2.0, or USB 1.1 port? */ @@ -1435,6 +1447,8 @@ void xhci_setup_streams_ep_input_ctx(struct xhci_hcd *xhci, void xhci_setup_no_streams_ep_input_ctx(struct xhci_hcd *xhci, struct xhci_ep_ctx *ep_ctx, struct xhci_virt_ep *ep); +void xhci_free_device_endpoint_resources(struct xhci_hcd *xhci, + struct xhci_virt_device *virt_dev, bool drop_control_ep); struct xhci_ring *xhci_dma_to_transfer_ring( struct xhci_virt_ep *ep, u64 address); -- cgit v1.2.3-70-g09d2 From fe19a96b10032035a35779f42ad59e35d6dd8ffd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Mar 2011 20:21:23 -0400 Subject: SUNRPC: Deal with the lack of a SYN_SENT sk->sk_state_change callback... The TCP connection state code depends on the state_change() callback being called when the SYN_SENT state is set. However the networking layer doesn't actually call us back in that case. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- net/sunrpc/xprtsock.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bf005d3c65e..f34f5ab341a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1344,7 +1344,6 @@ static void xs_tcp_state_change(struct sock *sk) case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ xprt_force_disconnect(xprt); - case TCP_SYN_SENT: xprt->connect_cookie++; case TCP_CLOSING: /* @@ -1758,6 +1757,7 @@ static void xs_tcp_reuse_connection(struct sock_xprt *transport) static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret = -ENOTCONN; if (!transport->inet) { struct sock *sk = sock->sk; @@ -1789,12 +1789,22 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) } if (!xprt_bound(xprt)) - return -ENOTCONN; + goto out; /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; - return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + switch (ret) { + case 0: + case -EINPROGRESS: + /* SYN_SENT! */ + xprt->connect_cookie++; + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + } +out: + return ret; } /** -- cgit v1.2.3-70-g09d2 From 0ced63d1a245ac11241a5d37932e6d04d9c8040d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 May 2011 14:26:35 -0400 Subject: NFSv4: Handle expired stateids when the lease is still valid Currently, if the server returns NFS4ERR_EXPIRED in reply to a READ or WRITE, but the RENEW test determines that the lease is still active, we fail to recover and end up looping forever in a READ/WRITE + RENEW death spiral. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/nfs4proc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf1b339c393..d0e15dba7a5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -267,9 +267,11 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc break; nfs4_schedule_stateid_recovery(server, state); goto wait_on_recovery; + case -NFS4ERR_EXPIRED: + if (state != NULL) + nfs4_schedule_stateid_recovery(server, state); case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_EXPIRED: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) @@ -3670,9 +3672,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, break; nfs4_schedule_stateid_recovery(server, state); goto wait_on_recovery; + case -NFS4ERR_EXPIRED: + if (state != NULL) + nfs4_schedule_stateid_recovery(server, state); case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_EXPIRED: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) @@ -4543,6 +4547,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) case -ESTALE: goto out; case -NFS4ERR_EXPIRED: + nfs4_schedule_stateid_recovery(server, state); case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: nfs4_schedule_lease_recovery(server->nfs_client); -- cgit v1.2.3-70-g09d2 From 444f72fe7e7b5f4db34cee933fa3546ebb8e9122 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 May 2011 14:26:35 -0400 Subject: NFSv4.1: Fix the handling of NFS4ERR_SEQ_MISORDERED errors Currently, the call to nfs4_schedule_session_recovery() will actually just result in a test of the lease when what we really want is to force a session reset. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/nfs4state.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 036f5adc9e1..e97dd219f84 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1466,7 +1466,10 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) #ifdef CONFIG_NFS_V4_1 void nfs4_schedule_session_recovery(struct nfs4_session *session) { - nfs4_schedule_lease_recovery(session->clp); + struct nfs_client *clp = session->clp; + + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + nfs4_schedule_lease_recovery(clp); } EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); @@ -1549,6 +1552,7 @@ static int nfs4_reset_session(struct nfs_client *clp) status = nfs4_recovery_handle_error(clp, status); goto out; } + clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); /* create_session negotiated new slot table */ clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); -- cgit v1.2.3-70-g09d2 From 60c16ea877546e559988a8b1e4f4256afbbd83e6 Mon Sep 17 00:00:00 2001 From: Harshula Jayasuriya Date: Mon, 23 May 2011 22:52:11 +1000 Subject: NFS: nfs_update_inode: print current and new inode size in debug output Hi Trond, In nfs_update_inode debug output, print the current and new inode size when the file size changes on the NFS server. Signed-off-by: Harshula Jayasuriya Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 57bb31ad7a5..873c6fa8bc3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1298,8 +1298,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } - dprintk("NFS: isize change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); + dprintk("NFS: isize change on server for file %s/%ld " + "(%Ld to %Ld)\n", + inode->i_sb->s_id, + inode->i_ino, + (long long)cur_isize, + (long long)new_isize); } } else invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR -- cgit v1.2.3-70-g09d2 From 4b8ee2b82e8b0b6e17ee33feb74fcdb5c6d8dbdd Mon Sep 17 00:00:00 2001 From: Vitaliy Gusev Date: Fri, 20 May 2011 01:34:46 +0400 Subject: nfs41: Correct offset for LAYOUTCOMMIT A client sends offset to MDS as it was seen by DS. As result, file size after copy is only half of original file size in case of 2 DS. Signed-off-by: Vitaliy Gusev Cc: stable@kernel.org [2.6.39] Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f57f5281a52..101c85a3644 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1009,7 +1009,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata) { struct nfs_inode *nfsi = NFS_I(wdata->inode); - loff_t end_pos = wdata->args.offset + wdata->res.count; + loff_t end_pos = wdata->mds_offset + wdata->res.count; bool mark_as_dirty = false; spin_lock(&nfsi->vfs_inode.i_lock); -- cgit v1.2.3-70-g09d2 From 26f04dde681c6a48b2bacfc5fe01fef204419b0c Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 1 May 2011 06:21:54 -0700 Subject: nfs,rcu: convert call_rcu(nfs_free_delegation_callback) to kfree_rcu() The rcu callback nfs_free_delegation_callback() just calls a kfree(), so we use kfree_rcu() instead of the call_rcu(nfs_free_delegation_callback). Signed-off-by: Lai Jiangshan Signed-off-by: Paul E. McKenney Cc: Trond Myklebust Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index bbbc6bf5cb2..dd25c2aec37 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -21,25 +21,13 @@ #include "delegation.h" #include "internal.h" -static void nfs_do_free_delegation(struct nfs_delegation *delegation) -{ - kfree(delegation); -} - -static void nfs_free_delegation_callback(struct rcu_head *head) -{ - struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu); - - nfs_do_free_delegation(delegation); -} - static void nfs_free_delegation(struct nfs_delegation *delegation) { if (delegation->cred) { put_rpccred(delegation->cred); delegation->cred = NULL; } - call_rcu(&delegation->rcu, nfs_free_delegation_callback); + kfree_rcu(delegation, rcu); } /** -- cgit v1.2.3-70-g09d2 From 61677eeec29e87edc03a1061ae0a04b92507450d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 May 2011 15:22:34 -0400 Subject: SUNRPC: Rename xs_encode_tcp_fragment_header() Clean up: Use a more generic name for xs_encode_tcp_fragment_header(); it's appropriate to use for all stream transport types. We're about to add new stream transport. Also, move it to a place where it is more easily shared amongst the various send_request methods. And finally, replace the "htonl" macro invocation with its modern equivalent. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f34f5ab341a..079366974e1 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -495,6 +495,16 @@ static int xs_nospace(struct rpc_task *task) return ret; } +/* + * Construct a stream transport record marker in @buf. + */ +static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) +{ + u32 reclen = buf->len - sizeof(rpc_fraghdr); + rpc_fraghdr *base = buf->head[0].iov_base; + *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen); +} + /** * xs_udp_send_request - write an RPC request to a UDP socket * @task: address of RPC task that manages the state of an RPC request @@ -574,13 +584,6 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) kernel_sock_shutdown(sock, SHUT_WR); } -static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) -{ - u32 reclen = buf->len - sizeof(rpc_fraghdr); - rpc_fraghdr *base = buf->head[0].iov_base; - *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); -} - /** * xs_tcp_send_request - write an RPC request to a TCP socket * @task: address of RPC task that manages the state of an RPC request @@ -603,7 +606,7 @@ static int xs_tcp_send_request(struct rpc_task *task) struct xdr_buf *xdr = &req->rq_snd_buf; int status; - xs_encode_tcp_record_marker(&req->rq_snd_buf); + xs_encode_stream_record_marker(&req->rq_snd_buf); xs_pktdump("packet data:", req->rq_svec->iov_base, @@ -2024,10 +2027,7 @@ static int bc_sendto(struct rpc_rqst *req) unsigned long headoff; unsigned long tailoff; - /* - * Set up the rpc header and record marker stuff - */ - xs_encode_tcp_record_marker(xbufp); + xs_encode_stream_record_marker(xbufp); tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; -- cgit v1.2.3-70-g09d2 From 4251c94833aa516c1fc7a0f8f504a26eadd4b91e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 May 2011 15:22:15 -0400 Subject: NFS: Revert NFSROOT default mount options Marek Belisko reports that recent attempts to fix regressions in NFSROOT have broken his configuration: > After update from 2.6.38-rc8 to 2.6.38 is mounting rootfs over nfs not possible. > Log: > VFS: Mounted root (nfs filesystem) on device 0:14. > Freeing init memory: 132K > nfs: server 10.146.1.21 not responding, still trying > nfs: server 10.146.1.21 not responding, still trying > > This is never ending. I make short bisect (not too much commits > between versions) > and bad commit was reported: 53d4737580535e073963b91ce87d4216e434fab5 > > NFS: NFSROOT should default to "proto=udp" > > I've tested on mini2440 board (DM9000, static IP). > Is there some missing option or something else to be checked? An examination of a network trace captured during the failure shows that the mount is actually succeeding, but that the client is not seeing READ replies larger than 16KB. This could be a local packet filtering issue on the client, but we didn't troubleshoot this further because of the reported "git bisect" result. Last fall we removed the ad hoc mount option parser in fs/nfs/nfsroot.c in favor of using the main parser in fs/nfs/super.c (see commit 56463e50 "NFS: Use super.c for NFSROOT mount option parsing"). That commit changed the default NFSROOT mount options to be the same as those employed by user space mounts. As it turns out, these new default mount options are not tolerated by many embedded systems. So far these problems have been due to specific behavior of certain embedded NICs. The NFS community does not have such hardware on hand for running tests. Commit 53d47375 recently introduced a clean way to specify default mount options for NFSROOT, so we can now easily restore the traditional defaults for NFSROOT: vers=2,udp,rsize=4096,wsize=4096 This should revert the new default NFSROOT mount options introduced with commit 56463e50. Tested-by: Marek Belisto Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfsroot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index c541093a5bf..c4744e1d513 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -87,7 +87,7 @@ #define NFS_ROOT "/tftpboot/%s" /* Default NFSROOT mount options. */ -#define NFS_DEF_OPTIONS "udp" +#define NFS_DEF_OPTIONS "vers=2,udp,rsize=4096,wsize=4096" /* Parameters passed from the kernel command line */ static char nfs_root_parms[256] __initdata = ""; -- cgit v1.2.3-70-g09d2 From da09eb93033e7204cb3e3f3140b46cf108c42c8f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 May 2011 15:22:25 -0400 Subject: SUNRPC: Clean up use of curly braces in switch cases Clean up. Preferred style is not to use curly braces around switch cases. I'm about to add another case that needs a third type cast. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8d83f9d4871..b91f8f943c5 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -298,22 +298,21 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * up a string representation of the passed-in address. */ if (args->servername == NULL) { + struct sockaddr_in *sin = + (struct sockaddr_in *)args->address; + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)args->address; + servername[0] = '\0'; switch (args->address->sa_family) { - case AF_INET: { - struct sockaddr_in *sin = - (struct sockaddr_in *)args->address; + case AF_INET: snprintf(servername, sizeof(servername), "%pI4", &sin->sin_addr.s_addr); break; - } - case AF_INET6: { - struct sockaddr_in6 *sin = - (struct sockaddr_in6 *)args->address; + case AF_INET6: snprintf(servername, sizeof(servername), "%pI6", - &sin->sin6_addr); + &sin6->sin6_addr); break; - } default: /* caller wants default server name, but * address family isn't recognized. */ -- cgit v1.2.3-70-g09d2 From 7402ab19cdd5943c7dd4f3399afe3abda8077ef5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 May 2011 15:22:55 -0400 Subject: SUNRPC: Use AF_LOCAL for rpcbind upcalls As libtirpc does in user space, have our registration API try using an AF_LOCAL transport first when registering and unregistering. This means we don't chew up privileged ports, and our registration is bound to an "owner" (the effective uid of the process on the sending end of the transport). Only that "owner" may unregister the service. The kernel could probe rpcbind via an rpcbind query to determine whether rpcbind has an AF_LOCAL service. For simplicity, we use the same technique that libtirpc uses: simply fail over to network loopback if creating an AF_LOCAL transport to the well-known rpcbind service socket fails. This means we open-code the pathname of the rpcbind socket in the kernel. For now we have to do that anyway because the kernel's RPC over AF_LOCAL implementation does not support autobind. That may be undesirable in the long term. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 97 ++++++++++++++++++++++++++++++++++++++++++-------- net/sunrpc/svc.c | 2 ++ 2 files changed, 85 insertions(+), 14 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index c652e4cc9fe..9a80a922c52 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -32,6 +33,8 @@ # define RPCDBG_FACILITY RPCDBG_BIND #endif +#define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock" + #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) @@ -158,20 +161,69 @@ static void rpcb_map_release(void *data) kfree(map); } -static const struct sockaddr_in rpcb_inaddr_loopback = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - .sin_port = htons(RPCBIND_PORT), -}; +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local_unix(void) +{ + static const struct sockaddr_un rpcb_localaddr_rpcbind = { + .sun_family = AF_LOCAL, + .sun_path = RPCBIND_SOCK_PATHNAME, + }; + struct rpc_create_args args = { + .net = &init_net, + .protocol = XPRT_TRANSPORT_LOCAL, + .address = (struct sockaddr *)&rpcb_localaddr_rpcbind, + .addrsize = sizeof(rpcb_localaddr_rpcbind), + .servername = "localhost", + .program = &rpcb_program, + .version = RPCBVERS_2, + .authflavor = RPC_AUTH_NULL, + }; + struct rpc_clnt *clnt, *clnt4; + int result = 0; + + /* + * Because we requested an RPC PING at transport creation time, + * this works only if the user space portmapper is rpcbind, and + * it's listening on AF_LOCAL on the named socket. + */ + clnt = rpc_create(&args); + if (IS_ERR(clnt)) { + dprintk("RPC: failed to create AF_LOCAL rpcbind " + "client (errno %ld).\n", PTR_ERR(clnt)); + result = -PTR_ERR(clnt); + goto out; + } + + clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); + if (IS_ERR(clnt4)) { + dprintk("RPC: failed to bind second program to " + "rpcbind v4 client (errno %ld).\n", + PTR_ERR(clnt4)); + clnt4 = NULL; + } + + /* Protected by rpcb_create_local_mutex */ + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; -static DEFINE_MUTEX(rpcb_create_local_mutex); +out: + return result; +} /* * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local(void) +static int rpcb_create_local_net(void) { + static const struct sockaddr_in rpcb_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(RPCBIND_PORT), + }; struct rpc_create_args args = { .net = &init_net, .protocol = XPRT_TRANSPORT_TCP, @@ -186,13 +238,6 @@ static int rpcb_create_local(void) struct rpc_clnt *clnt, *clnt4; int result = 0; - if (rpcb_local_clnt) - return result; - - mutex_lock(&rpcb_create_local_mutex); - if (rpcb_local_clnt) - goto out; - clnt = rpc_create(&args); if (IS_ERR(clnt)) { dprintk("RPC: failed to create local rpcbind " @@ -214,9 +259,33 @@ static int rpcb_create_local(void) clnt4 = NULL; } + /* Protected by rpcb_create_local_mutex */ rpcb_local_clnt = clnt; rpcb_local_clnt4 = clnt4; +out: + return result; +} + +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local(void) +{ + static DEFINE_MUTEX(rpcb_create_local_mutex); + int result = 0; + + if (rpcb_local_clnt) + return result; + + mutex_lock(&rpcb_create_local_mutex); + if (rpcb_local_clnt) + goto out; + + if (rpcb_create_local_unix() != 0) + result = rpcb_create_local_net(); + out: mutex_unlock(&rpcb_create_local_mutex); return result; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 08e05a8ce02..2b90292e950 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -942,6 +942,8 @@ static void svc_unregister(const struct svc_serv *serv) if (progp->pg_vers[i]->vs_hidden) continue; + dprintk("svc: attempting to unregister %sv%u\n", + progp->pg_name, i); __svc_unregister(progp->pg_prog, i, progp->pg_name); } } -- cgit v1.2.3-70-g09d2 From 559649efb9b0d248541933197bdf7b75529da457 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 May 2011 15:23:04 -0400 Subject: SUNRPC: Remove obsolete comment Clean up. The documenting comment at the top of net/sunrpc/clnt.c is out of date. We adopted BSD's RTO estimation mechanism years ago. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b91f8f943c5..08ed49629b8 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -13,10 +13,6 @@ * and need to be refreshed, or when a packet was damaged in transit. * This may be have to be moved to the VFS layer. * - * NB: BSD uses a more intelligent approach to guessing when a request - * or reply has been lost by keeping the RTO estimate for each procedure. - * We currently make do with a constant timeout value. - * * Copyright (C) 1992,1993 Rick Sladkey * Copyright (C) 1995,1996 Olaf Kirch */ -- cgit v1.2.3-70-g09d2 From 176e21ee2ec89cae8d45cf1a850ea45a45428fb8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 May 2011 15:22:44 -0400 Subject: SUNRPC: Support for RPC over AF_LOCAL transports TI-RPC introduces the capability of performing RPC over AF_LOCAL sockets. It uses this mainly for registering and unregistering local RPC services securely with the local rpcbind, but we could also conceivably use it as a generic upcall mechanism. This patch provides a client-side only implementation for the moment. We might also consider a server-side implementation to provide AF_LOCAL access to NLM (for statd downcalls, and such like). Autobinding is not supported on kernel AF_LOCAL transports at this time. Kernel ULPs must specify the pathname of the remote endpoint when an AF_LOCAL transport is created. rpcbind supports registering services available via AF_LOCAL, so the kernel could handle it with some adjustment to ->rpcbind and ->set_port. But we don't need this feature for doing upcalls via well-known named sockets. This has not been tested with ULPs that move a substantial amount of data. Thus, I can't attest to how robust the write_space and congestion management logic is. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/msg_prot.h | 1 + include/linux/sunrpc/xprt.h | 3 +- net/sunrpc/clnt.c | 8 + net/sunrpc/xprtsock.c | 395 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 403 insertions(+), 4 deletions(-) diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 77e62488339..c68a147939a 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -145,6 +145,7 @@ typedef __be32 rpc_fraghdr; #define RPCBIND_NETID_TCP "tcp" #define RPCBIND_NETID_UDP6 "udp6" #define RPCBIND_NETID_TCP6 "tcp6" +#define RPCBIND_NETID_LOCAL "local" /* * Note that RFC 1833 does not put any size restrictions on the diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a0f998c07c6..81cce3b3ee6 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -141,7 +141,8 @@ enum xprt_transports { XPRT_TRANSPORT_UDP = IPPROTO_UDP, XPRT_TRANSPORT_TCP = IPPROTO_TCP, XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, - XPRT_TRANSPORT_RDMA = 256 + XPRT_TRANSPORT_RDMA = 256, + XPRT_TRANSPORT_LOCAL = 257, }; struct rpc_xprt { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 08ed49629b8..b84d7395535 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -28,7 +28,9 @@ #include #include #include +#include #include +#include #include #include @@ -294,6 +296,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * up a string representation of the passed-in address. */ if (args->servername == NULL) { + struct sockaddr_un *sun = + (struct sockaddr_un *)args->address; struct sockaddr_in *sin = (struct sockaddr_in *)args->address; struct sockaddr_in6 *sin6 = @@ -301,6 +305,10 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) servername[0] = '\0'; switch (args->address->sa_family) { + case AF_LOCAL: + snprintf(servername, sizeof(servername), "%s", + sun->sun_path); + break; case AF_INET: snprintf(servername, sizeof(servername), "%pI4", &sin->sin_addr.s_addr); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 079366974e1..72abb735893 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -19,6 +19,7 @@ */ #include +#include #include #include #include @@ -28,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +47,9 @@ #include #include "sunrpc.h" + +static void xs_close(struct rpc_xprt *xprt); + /* * xprtsock tunables */ @@ -261,6 +266,11 @@ static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) return (struct sockaddr *) &xprt->addr; } +static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt) +{ + return (struct sockaddr_un *) &xprt->addr; +} + static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) { return (struct sockaddr_in *) &xprt->addr; @@ -276,23 +286,34 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) struct sockaddr *sap = xs_addr(xprt); struct sockaddr_in6 *sin6; struct sockaddr_in *sin; + struct sockaddr_un *sun; char buf[128]; - (void)rpc_ntop(sap, buf, sizeof(buf)); - xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - switch (sap->sa_family) { + case AF_LOCAL: + sun = xs_addr_un(xprt); + strlcpy(buf, sun->sun_path, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); + break; case AF_INET: + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); sin = xs_addr_in(xprt); snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); break; case AF_INET6: + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); sin6 = xs_addr_in6(xprt); snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); break; default: BUG(); } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); } @@ -505,6 +526,60 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen); } +/** + * xs_local_send_request - write an RPC request to an AF_LOCAL socket + * @task: RPC task that manages the state of an RPC request + * + * Return values: + * 0: The request has been sent + * EAGAIN: The socket was blocked, please call again later to + * complete the request + * ENOTCONN: Caller needs to invoke connect logic then call again + * other: Some other error occured, the request was not sent + */ +static int xs_local_send_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct xdr_buf *xdr = &req->rq_snd_buf; + int status; + + xs_encode_stream_record_marker(&req->rq_snd_buf); + + xs_pktdump("packet data:", + req->rq_svec->iov_base, req->rq_svec->iov_len); + + status = xs_sendpages(transport->sock, NULL, 0, + xdr, req->rq_bytes_sent); + dprintk("RPC: %s(%u) = %d\n", + __func__, xdr->len - req->rq_bytes_sent, status); + if (likely(status >= 0)) { + req->rq_bytes_sent += status; + req->rq_xmit_bytes_sent += status; + if (likely(req->rq_bytes_sent >= req->rq_slen)) { + req->rq_bytes_sent = 0; + return 0; + } + status = -EAGAIN; + } + + switch (status) { + case -EAGAIN: + status = xs_nospace(task); + break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); + case -EPIPE: + xs_close(xprt); + status = -ENOTCONN; + } + + return status; +} + /** * xs_udp_send_request - write an RPC request to a UDP socket * @task: address of RPC task that manages the state of an RPC request @@ -788,6 +863,88 @@ static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) return (struct rpc_xprt *) sk->sk_user_data; } +static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) +{ + struct xdr_skb_reader desc = { + .skb = skb, + .offset = sizeof(rpc_fraghdr), + .count = skb->len - sizeof(rpc_fraghdr), + }; + + if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0) + return -1; + if (desc.count) + return -1; + return 0; +} + +/** + * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets + * @sk: socket with data to read + * @len: how much data to read + * + * Currently this assumes we can read the whole reply in a single gulp. + */ +static void xs_local_data_ready(struct sock *sk, int len) +{ + struct rpc_task *task; + struct rpc_xprt *xprt; + struct rpc_rqst *rovr; + struct sk_buff *skb; + int err, repsize, copied; + u32 _xid; + __be32 *xp; + + read_lock_bh(&sk->sk_callback_lock); + dprintk("RPC: %s...\n", __func__); + xprt = xprt_from_sock(sk); + if (xprt == NULL) + goto out; + + skb = skb_recv_datagram(sk, 0, 1, &err); + if (skb == NULL) + goto out; + + if (xprt->shutdown) + goto dropit; + + repsize = skb->len - sizeof(rpc_fraghdr); + if (repsize < 4) { + dprintk("RPC: impossible RPC reply size %d\n", repsize); + goto dropit; + } + + /* Copy the XID from the skb... */ + xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid); + if (xp == NULL) + goto dropit; + + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->transport_lock); + rovr = xprt_lookup_rqst(xprt, *xp); + if (!rovr) + goto out_unlock; + task = rovr->rq_task; + + copied = rovr->rq_private_buf.buflen; + if (copied > repsize) + copied = repsize; + + if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { + dprintk("RPC: sk_buff copy failed\n"); + goto out_unlock; + } + + xprt_complete_rqst(task, copied); + + out_unlock: + spin_unlock(&xprt->transport_lock); + dropit: + skb_free_datagram(sk, skb); + out: + read_unlock_bh(&sk->sk_callback_lock); +} + /** * xs_udp_data_ready - "data ready" callback for UDP sockets * @sk: socket with data to read @@ -1573,11 +1730,31 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) return err; } +/* + * We don't support autobind on AF_LOCAL sockets + */ +static void xs_local_rpcbind(struct rpc_task *task) +{ + xprt_set_bound(task->tk_xprt); +} + +static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) +{ +} #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; +static inline void xs_reclassify_socketu(struct socket *sock) +{ + struct sock *sk = sock->sk; + + BUG_ON(sock_owned_by_user(sk)); + sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC", + &xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]); +} + static inline void xs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; @@ -1599,6 +1776,9 @@ static inline void xs_reclassify_socket6(struct socket *sock) static inline void xs_reclassify_socket(int family, struct socket *sock) { switch (family) { + case AF_LOCAL: + xs_reclassify_socketu(sock); + break; case AF_INET: xs_reclassify_socket4(sock); break; @@ -1608,6 +1788,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock) } } #else +static inline void xs_reclassify_socketu(struct socket *sock) +{ +} + static inline void xs_reclassify_socket4(struct socket *sock) { } @@ -1646,6 +1830,94 @@ out: return ERR_PTR(err); } +static int xs_local_finish_connecting(struct rpc_xprt *xprt, + struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + xs_save_old_callbacks(transport, sk); + + sk->sk_user_data = xprt; + sk->sk_data_ready = xs_local_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_error_report = xs_error_report; + sk->sk_allocation = GFP_ATOMIC; + + xprt_clear_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + xprt->stat.connect_count++; + xprt->stat.connect_start = jiffies; + return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); +} + +/** + * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint + * @xprt: RPC transport to connect + * @transport: socket transport to connect + * @create_sock: function to create a socket of the correct type + * + * Invoked by a work queue tasklet. + */ +static void xs_local_setup_socket(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock; + int status = -EIO; + + if (xprt->shutdown) + goto out; + + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + status = __sock_create(xprt->xprt_net, AF_LOCAL, + SOCK_STREAM, 0, &sock, 1); + if (status < 0) { + dprintk("RPC: can't create AF_LOCAL " + "transport socket (%d).\n", -status); + goto out; + } + xs_reclassify_socketu(sock); + + dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + + status = xs_local_finish_connecting(xprt, sock); + switch (status) { + case 0: + dprintk("RPC: xprt %p connected to %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + xprt_set_connected(xprt); + break; + case -ENOENT: + dprintk("RPC: xprt %p: socket %s does not exist\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + break; + default: + printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n", + __func__, -status, + xprt->address_strings[RPC_DISPLAY_ADDR]); + } + +out: + xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); +} + static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1929,6 +2201,32 @@ static void xs_connect(struct rpc_task *task) } } +/** + * xs_local_print_stats - display AF_LOCAL socket-specifc stats + * @xprt: rpc_xprt struct containing statistics + * @seq: output file + * + */ +static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) +{ + long idle_time = 0; + + if (xprt_connected(xprt)) + idle_time = (long)(jiffies - xprt->last_used) / HZ; + + seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu " + "%llu %llu\n", + xprt->stat.bind_count, + xprt->stat.connect_count, + xprt->stat.connect_time, + idle_time, + xprt->stat.sends, + xprt->stat.recvs, + xprt->stat.bad_xids, + xprt->stat.req_u, + xprt->stat.bklog_u); +} + /** * xs_udp_print_stats - display UDP socket-specifc stats * @xprt: rpc_xprt struct containing statistics @@ -2099,6 +2397,21 @@ static void bc_destroy(struct rpc_xprt *xprt) { } +static struct rpc_xprt_ops xs_local_ops = { + .reserve_xprt = xprt_reserve_xprt, + .release_xprt = xs_tcp_release_xprt, + .rpcbind = xs_local_rpcbind, + .set_port = xs_local_set_port, + .connect = xs_connect, + .buf_alloc = rpc_malloc, + .buf_free = rpc_free, + .send_request = xs_local_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_def, + .close = xs_close, + .destroy = xs_destroy, + .print_stats = xs_local_print_stats, +}; + static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, @@ -2160,6 +2473,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap) }; switch (family) { + case AF_LOCAL: + break; case AF_INET: memcpy(sap, &sin, sizeof(sin)); break; @@ -2207,6 +2522,70 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return xprt; } +static const struct rpc_timeout xs_local_default_timeout = { + .to_initval = 10 * HZ, + .to_maxval = 10 * HZ, + .to_retries = 2, +}; + +/** + * xs_setup_local - Set up transport to use an AF_LOCAL socket + * @args: rpc transport creation arguments + * + * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP + */ +static struct rpc_xprt *xs_setup_local(struct xprt_create *args) +{ + struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr; + struct sock_xprt *transport; + struct rpc_xprt *xprt; + struct rpc_xprt *ret; + + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + if (IS_ERR(xprt)) + return xprt; + transport = container_of(xprt, struct sock_xprt, xprt); + + xprt->prot = 0; + xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; + + xprt->bind_timeout = XS_BIND_TO; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + xprt->idle_timeout = XS_IDLE_DISC_TO; + + xprt->ops = &xs_local_ops; + xprt->timeout = &xs_local_default_timeout; + + switch (sun->sun_family) { + case AF_LOCAL: + if (sun->sun_path[0] != '/') { + dprintk("RPC: bad AF_LOCAL address: %s\n", + sun->sun_path); + ret = ERR_PTR(-EINVAL); + goto out_err; + } + xprt_set_bound(xprt); + INIT_DELAYED_WORK(&transport->connect_worker, + xs_local_setup_socket); + xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); + break; + default: + ret = ERR_PTR(-EAFNOSUPPORT); + goto out_err; + } + + dprintk("RPC: set up xprt to %s via AF_LOCAL\n", + xprt->address_strings[RPC_DISPLAY_ADDR]); + + if (try_module_get(THIS_MODULE)) + return xprt; + ret = ERR_PTR(-EINVAL); +out_err: + xprt_free(xprt); + return ret; +} + static const struct rpc_timeout xs_udp_default_timeout = { .to_initval = 5 * HZ, .to_maxval = 30 * HZ, @@ -2448,6 +2827,14 @@ out_err: return ret; } +static struct xprt_class xs_local_transport = { + .list = LIST_HEAD_INIT(xs_local_transport.list), + .name = "named UNIX socket", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_LOCAL, + .setup = xs_setup_local, +}; + static struct xprt_class xs_udp_transport = { .list = LIST_HEAD_INIT(xs_udp_transport.list), .name = "udp", @@ -2483,6 +2870,7 @@ int init_socket_xprt(void) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif + xprt_register_transport(&xs_local_transport); xprt_register_transport(&xs_udp_transport); xprt_register_transport(&xs_tcp_transport); xprt_register_transport(&xs_bc_tcp_transport); @@ -2503,6 +2891,7 @@ void cleanup_socket_xprt(void) } #endif + xprt_unregister_transport(&xs_local_transport); xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); xprt_unregister_transport(&xs_bc_tcp_transport); -- cgit v1.2.3-70-g09d2 From 89e1be50c68eb5e58b873dce87bbac627ee18d1f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 May 2011 23:11:24 -0400 Subject: x86: Put back -pg to tsc.o and add no GCOV to vread_tsc_64.o The commit 44259b1abfaa8bb819d25d41d71e8e33e25dd36a Author: Andy Lutomirski x86-64: Move vread_tsc into a new file with sensible options Removed the -pg from tsc.o which caused the function graph tracer to go into an infinite function call recursion as it uses the tsc internally outside its recursion protection, thus tracing the tsc breaks the function graph tracer. This commit also added the file vread_tsc_64.c that gets used by vdso but failed to prevent GCOV from monkeying with it, causing userspace to try to access kernel data when GCOV was enabled. Thanks to Thomas Gleixner for pointing out GCOV as the likely culprit that added strange kernel accesses into the vread_tsc() call. Cc: Author: Andy Lutomirski Cc: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f5abe3a245b..90b06d4daee 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -8,6 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities +CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_pvclock.o = -pg @@ -28,6 +29,7 @@ CFLAGS_paravirt.o := $(nostackp) GCOV_PROFILE_vsyscall_64.o := n GCOV_PROFILE_hpet.o := n GCOV_PROFILE_tsc.o := n +GCOV_PROFILE_vread_tsc_64.o := n GCOV_PROFILE_paravirt.o := n # vread_tsc_64 is hot and should be fully optimized: -- cgit v1.2.3-70-g09d2 From 76f0b8d2d2c8d417623142069cdfde1cf1e108d5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:54 -0700 Subject: bfs: remove unnecessary dentry_unhash on dir rename Bfs does not have problems with references to unlinked directories. CC: tigran@aivazian.fsnet.co.uk Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/bfs/dir.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index c7d1d06b048..b14cebfd904 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -224,9 +224,6 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct bfs_sb_info *info; int error = -ENOENT; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - old_bh = new_bh = NULL; old_inode = old_dentry->d_inode; if (S_ISDIR(old_inode->i_mode)) -- cgit v1.2.3-70-g09d2 From 873ae4d5a8b282c6e2bbc09c6d59eeb1bec97ef7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:55 -0700 Subject: sysv: remove unnecessary dentry_unhash from rmdir, dir rename sysv does not have problems with references to unlinked directories. CC: Christoph Hellwig Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/sysv/namei.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index e2cc6756f3b..e474fbcf8bd 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -196,8 +196,6 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry) struct inode *inode = dentry->d_inode; int err = -ENOTEMPTY; - dentry_unhash(dentry); - if (sysv_empty_dir(inode)) { err = sysv_unlink(dir, dentry); if (!err) { @@ -224,9 +222,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry, struct sysv_dir_entry * old_de; int err = -ENOENT; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - old_de = sysv_find_entry(old_dentry, &old_page); if (!old_de) goto out; -- cgit v1.2.3-70-g09d2 From cf0f0536fa65ca1353476c49feed34891f3f7134 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:56 -0700 Subject: jffs2: remove unnecessary dentry_unhash from rmdir, dir rename jffs2 does not have problems with references to unlinked directories. CC: David Woodhouse CC: linux-mtd@lists.infradead.org Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/jffs2/dir.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 05f73328b28..82faddd1f32 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -609,8 +609,6 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) int ret; uint32_t now = get_seconds(); - dentry_unhash(dentry); - for (fd = f->dents ; fd; fd = fd->next) { if (fd->ino) return -ENOTEMPTY; @@ -786,9 +784,6 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, uint8_t type; uint32_t now; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - /* The VFS will check for us and prevent trying to rename a * file over a directory and vice versa, but if it's a directory, * the VFS can't check whether the victim is empty. The filesystem -- cgit v1.2.3-70-g09d2 From 44a8e6364e48ab93a1d86385b5fc9efe81395fa9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:57 -0700 Subject: jfs: remove unnecessary dentry_unhash from rmdir, dir rename jfs does not have problems with references to unlinked directories. CC: Dave Kleikamp CC: jfs-discussion@lists.sourceforge.net Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/jfs/namei.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 865df16a6cf..eaaf2b511e8 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -360,8 +360,6 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); - dentry_unhash(dentry); - /* Init inode for quota operations. */ dquot_initialize(dip); dquot_initialize(ip); @@ -1097,9 +1095,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, new_dentry->d_name.name); - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - dquot_initialize(old_dir); dquot_initialize(new_dir); -- cgit v1.2.3-70-g09d2 From 64370e2f9df572977757d70b1ad293b743dd9e20 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:58 -0700 Subject: logfs: remove unnecessary dentry_unhash from rmdir, dir rename logfs does not have problems with references to unlinked directories. CC: Joern Engel CC: logfs@logfs.org Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/logfs/dir.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index f34c9cde9e9..9ed89d1663f 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -273,8 +273,6 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - dentry_unhash(dentry); - if (!logfs_empty_dir(inode)) return -ENOTEMPTY; @@ -624,9 +622,6 @@ static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry, loff_t pos; int err; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - /* 1. locate source dd */ err = logfs_get_dd(old_dir, old_dentry, &dd, &pos); if (err) -- cgit v1.2.3-70-g09d2 From dfb55de89879a1c32a70d0a510b3701ed9a6b855 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:41:59 -0700 Subject: nilfs2: remove unnecessary dentry_unhash from rmdir, dir rename nilfs2 does not have problems with references to unlinked directories. CC: KONISHI Ryusuke CC: linux-nilfs@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/nilfs2/namei.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 1102a5fbb74..546849b3e88 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -334,8 +334,6 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) struct nilfs_transaction_info ti; int err; - dentry_unhash(dentry); - err = nilfs_transaction_begin(dir->i_sb, &ti, 0); if (err) return err; @@ -371,9 +369,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct nilfs_transaction_info ti; int err; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); if (unlikely(err)) return err; -- cgit v1.2.3-70-g09d2 From 0e54ec1c3a002a9d5e57b5ac73a934cc15a0fe06 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:00 -0700 Subject: ubifs: remove unnecessary dentry_unhash from rmdir, dir rename ubifs does not have problems with references to unlinked directories. CC: Artem Bityutskiy CC: Adrian Hunter CC: linux-mtd@lists.infradead.org Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/ubifs/dir.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index c2b80943560..ef5abd38f0b 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -656,8 +656,6 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; - dentry_unhash(dentry); - /* * Budget request settings: deletion direntry, deletion inode and * changing the parent inode. If budgeting fails, go ahead anyway @@ -978,9 +976,6 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct timespec time; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - /* * Budget request settings: deletion direntry, new direntry, removing * the old inode, and changing old and new parent directory inodes. -- cgit v1.2.3-70-g09d2 From 87161faae26503a8ebe1be5ba72073ae860dbfc7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:01 -0700 Subject: ufs: remove unnecessary dentry_unhash from rmdir, dir rename ufs does not have problems with references to unlinked directories. CC: Evgeniy Dushistov Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/ufs/namei.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 953ebdfc5bf..29309e25417 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -258,8 +258,6 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err= -ENOTEMPTY; - dentry_unhash(dentry); - lock_ufs(dir->i_sb); if (ufs_empty_dir (inode)) { err = ufs_unlink(dir, dentry); @@ -284,9 +282,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ufs_dir_entry *old_de; int err = -ENOENT; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; -- cgit v1.2.3-70-g09d2 From cc350c2764a657ee012efd5bd260a6cd5be2f877 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:02 -0700 Subject: reiserfs: remove unnecessary dentry_unhash from rmdir, dir rename Reiserfs does not have problems with references to unlinked directories. CC: reiserfs-devel@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/reiserfs/namei.c | 5 ----- fs/reiserfs/xattr.c | 1 - 2 files changed, 6 deletions(-) diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 76c8164d565..118662690cd 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -831,8 +831,6 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) INITIALIZE_PATH(path); struct reiserfs_dir_entry de; - dentry_unhash(dentry); - /* we will be doing 2 balancings and update 2 stat data, we change quotas * of the owner of the directory and of the owner of the parent directory. * The quota structure is possibly deleted only on last iput => outside @@ -1227,9 +1225,6 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, unsigned long savelink = 1; struct timespec ctime; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - /* three balancings: (1) old name removal, (2) new name insertion and (3) maybe "save" link insertion stat data updates: (1) old directory, diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 50f1abccd1c..e8a62f41b45 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -98,7 +98,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, I_MUTEX_CHILD, dir->i_sb); - dentry_unhash(dentry); error = dir->i_op->rmdir(dir, dentry); if (!error) dentry->d_inode->i_flags |= S_DEAD; -- cgit v1.2.3-70-g09d2 From 7020739df2fa0e2126fc9739987e016860f14323 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:03 -0700 Subject: udf: remove unnecessary dentry_unhash from rmdir, dir rename udf does not have problems with references to unlinked directories. CC: Jan Kara Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/udf/namei.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 4d76594c2a8..f1dce848ef9 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -783,8 +783,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) struct fileIdentDesc *fi, cfi; struct kernel_lb_addr tloc; - dentry_unhash(dentry); - retval = -ENOENT; fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); if (!fi) @@ -1083,9 +1081,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, struct kernel_lb_addr tloc; struct udf_inode_info *old_iinfo = UDF_I(old_inode); - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); if (ofi) { if (ofibh.sbh != ofibh.ebh) -- cgit v1.2.3-70-g09d2 From 8aaa0f5431d8d1181b3d1a1bcd8f3330c0ce275f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:04 -0700 Subject: omfs: remove unnecessary dentry_unhash on rmdir, dir rneame omfs does not have problems with references to unlinked directories. CC: Bob Copeland CC: linux-karma-devel@lists.sourceforge.net Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/omfs/dir.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index c368360c35a..3b8d3979e03 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -241,11 +241,9 @@ static int omfs_remove(struct inode *dir, struct dentry *dentry) int ret; - if (S_ISDIR(inode->i_mode)) { - dentry_unhash(dentry); - if (!omfs_dir_is_empty(inode)) - return -ENOTEMPTY; - } + if (S_ISDIR(inode->i_mode) && + !omfs_dir_is_empty(inode)) + return -ENOTEMPTY; ret = omfs_delete_entry(dentry); if (ret) @@ -382,9 +380,6 @@ static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, int err; if (new_inode) { - if (S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - /* overwriting existing file/dir */ err = omfs_remove(new_dir, new_dentry); if (err) -- cgit v1.2.3-70-g09d2 From 4e82d61b6ac4966b3b61c2d97ddf04928f037be1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:05 -0700 Subject: hfs: remove unnecessary dentry_unhash on rmdir, dir rename hfs does not have problems with references to unlinked directories. Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/hfs/dir.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 1cb70cdba2c..b4d70b13be9 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -253,9 +253,6 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int res; - if (S_ISDIR(inode->i_mode)) - dentry_unhash(dentry); - if (S_ISDIR(inode->i_mode) && inode->i_size != 2) return -ENOTEMPTY; res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); @@ -286,9 +283,6 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* Unlink destination if it already exists */ if (new_dentry->d_inode) { - if (S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - res = hfs_remove(new_dir, new_dentry); if (res) return res; -- cgit v1.2.3-70-g09d2 From e3911785b8ae6897b3dae2af4fa296aa5a0f2c56 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:06 -0700 Subject: hfsplus: remove unnecessary dentry_unhash on rmdir, dir rename hfsplus does not have problems with references to unlinked directories. Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/hfsplus/dir.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index b28835091dd..4df5059c25d 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -370,8 +370,6 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int res; - dentry_unhash(dentry); - if (inode->i_size != 2) return -ENOTEMPTY; @@ -469,12 +467,10 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, /* Unlink destination if it already exists */ if (new_dentry->d_inode) { - if (S_ISDIR(new_dentry->d_inode->i_mode)) { - dentry_unhash(new_dentry); + if (S_ISDIR(new_dentry->d_inode->i_mode)) res = hfsplus_rmdir(new_dir, new_dentry); - } else { + else res = hfsplus_unlink(new_dir, new_dentry); - } if (res) return res; } -- cgit v1.2.3-70-g09d2 From e41a59e0550b7bb40fe8c3438d690712e9fd511c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:07 -0700 Subject: hostfs: remove unnecessary dentry_unhash on rmdir, dir rename hostfs does not have problems with references to unlinked directories. CC: Jeff Dike CC: Richard Weinberger CC: user-mode-linux-devel@lists.sourceforge.net Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index e6816b9e690..2638c834ed2 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -683,8 +683,6 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) char *file; int err; - dentry_unhash(dentry); - if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; err = do_rmdir(file); @@ -738,9 +736,6 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, char *from_name, *to_name; int err; - if (to->d_inode && S_ISDIR(to->d_inode->i_mode)) - dentry_unhash(to); - if ((from_name = dentry_name(from)) == NULL) return -ENOMEM; if ((to_name = dentry_name(to)) == NULL) { -- cgit v1.2.3-70-g09d2 From 55e5b7e022eaaa805a44e3b6ecd5c8638d862050 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:08 -0700 Subject: ecryptfs: remove unnecessary dentry_unhash on rmdir, dir rename ecryptfs does not have problems with references to unlinked directories. CC: Tyler Hicks CC: Dustin Kirkland CC: ecryptfs-devel@lists.launchpad.net Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/ecryptfs/inode.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 227b409b840..4d4cc6a90cd 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -521,8 +521,6 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) struct dentry *lower_dir_dentry; int rc; - dentry_unhash(dentry); - lower_dentry = ecryptfs_dentry_to_lower(dentry); dget(dentry); lower_dir_dentry = lock_parent(lower_dentry); @@ -573,9 +571,6 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_new_dir_dentry; struct dentry *trap = NULL; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); dget(lower_old_dentry); -- cgit v1.2.3-70-g09d2 From 7ce605d93b775e8a960b0be244f7be565e73b3c1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:09 -0700 Subject: ncpfs: document dentry_unhash usage ncpfs returns EBUSY if there are any references to the directory. The dentry_unhash call only unhashes the dentry if there are no references. CC: Petr Vandrovec CC: linux-kernel@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/ncpfs/dir.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index e3e646b0640..81c287d105d 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1033,8 +1033,11 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) DPRINTK("ncp_rmdir: removing %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); + /* + * fail with EBUSY if there are still references to this + * directory. + */ dentry_unhash(dentry); - error = -EBUSY; if (!d_unhashed(dentry)) goto out; -- cgit v1.2.3-70-g09d2 From 76cc071a06afb4d2dd17bb1b855a233a419e7e02 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:10 -0700 Subject: ncpfs: fix rename over directory with dangling references ncpfs does not handle references to unlinked directories (or so it would seem given the ncp_rmdir check). Since it is also possible to rename over an empty directory, perform the same check here. CC: Petr Vandrovec CC: linux-kernel@vger.kernel.org Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/ncpfs/dir.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 81c287d105d..9c51f621e90 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1144,8 +1144,16 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name); - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) { + /* + * fail with EBUSY if there are still references to this + * directory. + */ dentry_unhash(new_dentry); + error = -EBUSY; + if (!d_unhashed(new_dentry)) + goto out; + } ncp_age_dentry(server, old_dentry); ncp_age_dentry(server, new_dentry); -- cgit v1.2.3-70-g09d2 From 86905d6d96f138a99326016e4f0ca933200e0729 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:11 -0700 Subject: 9p: remove unnecessary dentry_unhash on rmdir, dir rename 9p has no problems with references to unlinked directories. CC: Eric Van Hensbergen CC: Ron Minnich CC: Latchesar Ionkov CC: v9fs-developer@lists.sourceforge.net Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/9p/vfs_inode.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 8d7f3e69ae2..7f6c6770319 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -814,7 +814,6 @@ int v9fs_vfs_unlink(struct inode *i, struct dentry *d) int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) { - dentry_unhash(d); return v9fs_remove(i, d, 1); } @@ -840,9 +839,6 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct p9_fid *newdirfid; struct p9_wstat wstat; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - P9_DPRINTK(P9_DEBUG_VFS, "\n"); retval = 0; old_inode = old_dentry->d_inode; -- cgit v1.2.3-70-g09d2 From 94f9901b6c05d5e1e38d887179b8307efbc8e10c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:12 -0700 Subject: affs: remove unnecessary dentry_unhash on rmdir, dir rename affs has no problems with references to unlinked directories. Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/affs/namei.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 03330e2e390..e3e9efc1fdd 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -320,8 +320,6 @@ affs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name); - dentry_unhash(dentry); - return affs_remove_header(dentry); } @@ -419,9 +417,6 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, struct buffer_head *bh = NULL; int retval; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n", (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); -- cgit v1.2.3-70-g09d2 From 705da2de95ce79199a52f21d82dd9f36b89afd12 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:13 -0700 Subject: afs: remove unnecessary dentry_unhash on rmdir, dir rename afs has no problems with references to unlinked directories. CC: David Howells CC: linux-afs@lists.infradead.org Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/afs/dir.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2c4e0516004..20c106f2492 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -845,8 +845,6 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) _enter("{%x:%u},{%s}", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); - dentry_unhash(dentry); - ret = -ENAMETOOLONG; if (dentry->d_name.len >= AFSNAMEMAX) goto error; @@ -1148,9 +1146,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct key *key; int ret; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - vnode = AFS_FS_I(old_dentry->d_inode); orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); -- cgit v1.2.3-70-g09d2 From 42b850b2806f8f55f5c3a0e3c78af738d5054fdc Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:14 -0700 Subject: coda: remove unnecessary dentry_unhash on rmdir, dir rename Coda has no problems with references to unlinked directories. CC: Jan Harkes CC: coda@cs.cmu.edu CC: codalist@coda.cs.cmu.edu Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/coda/dir.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/coda/dir.c b/fs/coda/dir.c index a46126fd573..2b8dae4d121 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -336,8 +336,6 @@ static int coda_rmdir(struct inode *dir, struct dentry *de) int len = de->d_name.len; int error; - dentry_unhash(de); - error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); if (!error) { /* VFS may delete the child */ @@ -361,9 +359,6 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, int new_length = new_dentry->d_name.len; int error; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), coda_i2f(new_dir), old_length, new_length, (const char *) old_name, (const char *)new_name); -- cgit v1.2.3-70-g09d2 From 526e7ce5528c8addcf154acec41b615f71dce7e1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:15 -0700 Subject: fuse: remove unnecessary dentry_unhash on rmdir, dir rename Fuse has no problems with references to unlinked directories. CC: Miklos Szeredi CC: fuse-devel@lists.sourceforge.net Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/fuse/dir.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0d0e3faddcf..d5016071459 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -667,8 +667,6 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) if (IS_ERR(req)) return PTR_ERR(req); - dentry_unhash(entry); - req->in.h.opcode = FUSE_RMDIR; req->in.h.nodeid = get_node_id(dir); req->in.numargs = 1; @@ -694,9 +692,6 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); struct fuse_req *req = fuse_get_req(fc); - if (newent->d_inode && S_ISDIR(newent->d_inode->i_mode)) - dentry_unhash(newent); - if (IS_ERR(req)) return PTR_ERR(req); -- cgit v1.2.3-70-g09d2 From b80d2c228d8206cadc32226750dbbf3b707bdd19 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:16 -0700 Subject: minix: remove unnecessary dentry_unhash on rmdir, dir rename Minix has no issues with references to unlinked directories. Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/minix/namei.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/minix/namei.c b/fs/minix/namei.c index f60aed8db9c..6e6777f1b4b 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -168,8 +168,6 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err = -ENOTEMPTY; - dentry_unhash(dentry); - if (minix_empty_dir(inode)) { err = minix_unlink(dir, dentry); if (!err) { @@ -192,9 +190,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, struct minix_dir_entry * old_de; int err = -ENOENT; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - old_de = minix_find_entry(old_dentry, &old_page); if (!old_de) goto out; -- cgit v1.2.3-70-g09d2 From 45adfef7d023004ff95bf63b5f2f0e2d88afac3f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:17 -0700 Subject: hpfs: remove unnecessary dentry_unhash on rmdir, dir rename Hpfs has no problems with references to unlinked directories. We leave one dentry_unhash call in place, in hpfs_unlink's strange path where it tries to truncate a file because the disk is full. I'm not sure what the full story is there. CC: Mikulas Patocka Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/hpfs/namei.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index ff0ce21c086..acf95dab2aa 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -439,8 +439,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) int err; int r; - dentry_unhash(dentry); - hpfs_adjust_length(name, &len); hpfs_lock(dir->i_sb); err = -ENOENT; @@ -535,9 +533,6 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct fnode *fnode; int err; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - if ((err = hpfs_chk_name(new_name, &new_len))) return err; err = 0; hpfs_adjust_length(old_name, &old_len); -- cgit v1.2.3-70-g09d2 From f4ff0e25c5093dd89e9cac4a8f71a57587ada787 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:18 -0700 Subject: fat: remove unnecessary dentry_unhash on rmdir, dir rename fat does not have problems with references to unlinked directories. CC: OGAWA Hirofumi Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/fat/namei_msdos.c | 5 ----- fs/fat/namei_vfat.c | 5 ----- 2 files changed, 10 deletions(-) diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index be15437c272..3b222dafd15 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -326,8 +326,6 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) struct fat_slot_info sinfo; int err; - dentry_unhash(dentry); - lock_super(sb); /* * Check whether the directory is not in use, then check @@ -459,9 +457,6 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - err = fat_scan(old_dir, old_name, &old_sinfo); if (err) { err = -EIO; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index c61a6789f36..20b4ea53fdc 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -824,8 +824,6 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) struct fat_slot_info sinfo; int err; - dentry_unhash(dentry); - lock_super(sb); err = fat_dir_empty(inode); @@ -933,9 +931,6 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, int err, is_dir, update_dotdot, corrupt = 0; struct super_block *sb = old_dir->i_sb; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; -- cgit v1.2.3-70-g09d2 From 98702467f829177b3993f17da9fe5c202d160e5e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 May 2011 13:42:19 -0700 Subject: configfs: remove unnecessary dentry_unhash on rmdir, dir rename configfs does not have problems with references to unlinked directories. CC: Joel Becker Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/configfs/dir.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 9d17d350abc..9a37a9b6de3 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1359,8 +1359,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; - dentry_unhash(dentry); - if (dentry->d_parent == configfs_sb->s_root) return -EPERM; -- cgit v1.2.3-70-g09d2 From 3d08bcc887a1c8d12be8d81f747ffa2e8a44b67b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 27 May 2011 12:23:34 -0700 Subject: mm: Wait for writeback when grabbing pages to begin a write When grabbing a page for a buffered IO write, the mm should wait for writeback on the page to complete so that the page does not become writable during the IO operation. This change is needed to provide page stability during writes for all filesystems. Signed-off-by: Darrick J. Wong Signed-off-by: Al Viro --- mm/filemap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index bcdc393b658..dac95a24dea 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2327,7 +2327,7 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, repeat: page = find_lock_page(mapping, index); if (page) - return page; + goto found; page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask); if (!page) @@ -2340,6 +2340,8 @@ repeat: goto repeat; return NULL; } +found: + wait_on_page_writeback(page); return page; } EXPORT_SYMBOL(grab_cache_page_write_begin); -- cgit v1.2.3-70-g09d2 From d76ee18a8551e33ad7dbd55cac38bc7b094f3abb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 27 May 2011 12:23:41 -0700 Subject: fs: block_page_mkwrite should wait for writeback to finish For filesystems such as nilfs2 and xfs that use block_page_mkwrite, modify that function to wait for pending writeback before allowing the page to become writable. This is needed to stabilize pages during writeback for those two filesystems. Signed-off-by: Darrick J. Wong Signed-off-by: Al Viro --- fs/buffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/buffer.c b/fs/buffer.c index 698c6b2cc46..49c9aada037 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2382,6 +2382,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, ret = -EAGAIN; goto out_unlock; } + wait_on_page_writeback(page); return 0; out_unlock: unlock_page(page); -- cgit v1.2.3-70-g09d2 From 1ed7891f8dcdba62041dfa265aea038afb49635c Mon Sep 17 00:00:00 2001 From: Mattias Wallin Date: Fri, 27 May 2011 11:49:43 +0200 Subject: mfd: Use mfd cell platform_data for db8500-prcmu cells platform bits With the addition of a device platform mfd_cell pointer, MFD drivers can go back to passing platform data back to their sub drivers. This allows for an mfd_cell->mfd_data removal and thus keep the sub drivers MFD agnostic. This is mostly needed for non MFD aware sub drivers. Signed-off-by: Mattias Wallin Signed-off-by: Samuel Ortiz --- drivers/mfd/db8500-prcmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index e63782107e2..02a15d7cb3b 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -2005,7 +2005,8 @@ static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = { static struct mfd_cell db8500_prcmu_devs[] = { { .name = "db8500-prcmu-regulators", - .mfd_data = &db8500_regulators, + .platform_data = &db8500_regulators, + .pdata_size = sizeof(db8500_regulators), }, { .name = "cpufreq-u8500", -- cgit v1.2.3-70-g09d2 From 66c500ff36d74c6669ad260deda4ce8125b55304 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Fri, 27 May 2011 22:13:08 +0100 Subject: mfd: Fix build breakage caused by tps65910 gpio directory move Signed-off-by: Liam Girdwood Acked-by: Randy Dunlap Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index b6c267724e1..0f09c057e79 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -721,7 +721,7 @@ config MFD_PM8XXX_IRQ config MFD_TPS65910 bool "TPS65910 Power Management chip" - depends on I2C=y + depends on I2C=y && GPIOLIB select MFD_CORE select GPIO_TPS65910 help -- cgit v1.2.3-70-g09d2 From 6df87e65dbe4528ef07b917af89913abb8caaaba Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 28 May 2011 14:44:46 +0200 Subject: block: improve the bio_add_page() and bio_add_pc_page() descriptions The descriptions of bio_add_page() and bio_add_pc_page() are slightly inconsistent; improve them. Signed-off-by: Andreas Gruenbacher Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- fs/bio.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/bio.c b/fs/bio.c index 840a0d75524..9bfade8a609 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -638,10 +638,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page * @offset: vec entry offset * * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block - * device limitations. The target block device must allow bio's - * smaller than PAGE_SIZE, so it is always possible to add a single - * page to an empty bio. This should only be used by REQ_PC bios. + * number of reasons, such as the bio being full or target block device + * limitations. The target block device must allow bio's up to PAGE_SIZE, + * so it is always possible to add a single page to an empty bio. + * + * This should only be used by REQ_PC bios. */ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset) @@ -659,10 +660,9 @@ EXPORT_SYMBOL(bio_add_pc_page); * @offset: vec entry offset * * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block - * device limitations. The target block device must allow bio's - * smaller than PAGE_SIZE, so it is always possible to add a single - * page to an empty bio. + * number of reasons, such as the bio being full or target block device + * limitations. The target block device must allow bio's up to PAGE_SIZE, + * so it is always possible to add a single page to an empty bio. */ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) -- cgit v1.2.3-70-g09d2 From 35fbf5bcf497d6ddbe7b6478141e7526d1474ff5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 28 May 2011 14:44:46 +0200 Subject: nbd: pass MSG_* flags to kernel_recvmsg() Unlike kernel_sendmsg(), kernel_recvmsg() requires passing flags explicitly via last parameter instead of struct msghdr.msg_flags. Therefore calls to sock_xmit(lo, 0, ..., MSG_WAITALL) have not been processed properly by tcp layer wrt. the flag. Fix it. Signed-off-by: Namhyung Kim Cc: Paul Clements Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index e6fc716aca4..1df3bfe5225 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -192,7 +192,8 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, if (lo->xmit_timeout) del_timer_sync(&ti); } else - result = kernel_recvmsg(sock, &msg, &iov, 1, size, 0); + result = kernel_recvmsg(sock, &msg, &iov, 1, size, + msg.msg_flags); if (signal_pending(current)) { siginfo_t info; -- cgit v1.2.3-70-g09d2 From 3b2710824e00d238554c13b5add347e6c701ab1a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 28 May 2011 14:44:46 +0200 Subject: nbd: limit module parameters to a sane value The 'max_part' parameter controls the number of maximum partition a nbd device can have. However if a user specifies very large value it would exceed the limitation of device minor number and can cause a kernel oops (or, at least, produce invalid device nodes in some cases). In addition, specifying large 'nbds_max' value causes same problem for the same reason. On my desktop, following command results to the kernel bug: $ sudo modprobe nbd max_part=100000 kernel BUG at /media/Linux_Data/project/linux/fs/sysfs/group.c:65! invalid opcode: 0000 [#1] SMP last sysfs file: /sys/devices/virtual/block/nbd4/range CPU 1 Modules linked in: nbd(+) bridge stp llc kvm_intel kvm asus_atk0110 sg sr_mod cdrom Pid: 2522, comm: modprobe Tainted: G W 2.6.39-leonard+ #159 System manufacturer System Product Name/P5G41TD-M PRO RIP: 0010:[] [] internal_create_group+0x2f/0x166 RSP: 0018:ffff8801009f1de8 EFLAGS: 00010246 RAX: 00000000ffffffef RBX: ffff880103920478 RCX: 00000000000a7bd3 RDX: ffffffff81a2dbe0 RSI: 0000000000000000 RDI: ffff880103920478 RBP: ffff8801009f1e38 R08: ffff880103920468 R09: ffff880103920478 R10: ffff8801009f1de8 R11: ffff88011eccbb68 R12: ffffffff81a2dbe0 R13: ffff880103920468 R14: 0000000000000000 R15: ffff880103920400 FS: 00007f3c49de9700(0000) GS:ffff88011f800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f3b7fe7c000 CR3: 00000000cd58d000 CR4: 00000000000406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process modprobe (pid: 2522, threadinfo ffff8801009f0000, task ffff8801009a93a0) Stack: ffff8801009f1e58 ffffffff812e8f6e ffff8801009f1e58 ffffffff812e7a80 ffff880000000010 ffff880103920400 ffff8801002fd0c0 ffff880103920468 0000000000000011 ffff880103920400 ffff8801009f1e48 ffffffff8115ab6a Call Trace: [] ? device_add+0x4f1/0x5e4 [] ? dev_set_name+0x41/0x43 [] sysfs_create_group+0x13/0x15 [] blk_trace_init_sysfs+0x14/0x16 [] blk_register_queue+0x4c/0xfd [] add_disk+0xe4/0x29c [] nbd_init+0x2ab/0x30d [nbd] [] ? 0xffffffffa007dfff [] do_one_initcall+0x7f/0x13e [] sys_init_module+0xa1/0x1e3 [] system_call_fastpath+0x16/0x1b Code: 41 57 41 56 41 55 41 54 53 48 83 ec 28 0f 1f 44 00 00 48 89 fb 41 89 f6 49 89 d4 48 85 ff 74 0b 85 f6 75 0b 48 83 7f 30 00 75 14 <0f> 0b eb fe b9 ea ff ff ff 48 83 7f 30 00 0f 84 09 01 00 00 49 RIP [] internal_create_group+0x2f/0x166 RSP ---[ end trace 753285ffbf72c57c ]--- Signed-off-by: Namhyung Kim Cc: Laurent Vivier Cc: Paul Clements Cc: stable@kernel.org Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1df3bfe5225..fdee7567fd1 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -757,6 +757,12 @@ static int __init nbd_init(void) if (max_part > 0) part_shift = fls(max_part); + if ((1UL << part_shift) > DISK_MAX_PARTS) + return -EINVAL; + + if (nbds_max > 1UL << (MINORBITS - part_shift)) + return -EINVAL; + for (i = 0; i < nbds_max; i++) { struct gendisk *disk = alloc_disk(1 << part_shift); if (!disk) -- cgit v1.2.3-70-g09d2 From 5988ce239682854d4e632fb58bff000700830394 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 28 May 2011 14:44:46 +0200 Subject: nbd: adjust 'max_part' according to part_shift The 'max_part' parameter determines how many partitions are supported on each nbd device. However the actual number can be changed to the power of 2 minus 1 form during the module initialization as alloc_disk() is called with (1 << part_shift) for some reason. So adjust 'max_part' also at least for consistency with loop and brd. It is exported via sysfs already, and a user should check this value after module loading if [s]he wants to use that number correctly (i.e. fdisk or something). Signed-off-by: Namhyung Kim Cc: Laurent Vivier Cc: Paul Clements Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index fdee7567fd1..f533f3375e2 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -754,9 +754,20 @@ static int __init nbd_init(void) return -ENOMEM; part_shift = 0; - if (max_part > 0) + if (max_part > 0) { part_shift = fls(max_part); + /* + * Adjust max_part according to part_shift as it is exported + * to user space so that user can know the max number of + * partition kernel should be able to manage. + * + * Note that -1 is required because partition 0 is reserved + * for the whole disk. + */ + max_part = (1UL << part_shift) - 1; + } + if ((1UL << part_shift) > DISK_MAX_PARTS) return -EINVAL; -- cgit v1.2.3-70-g09d2 From cd4ae6adf8b1c21d88e83ed56afeeef97b28f356 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Fri, 22 Apr 2011 18:53:54 +0800 Subject: sched: More sched_domain iterations fixes sched_domain iterations needs to be protected by rcu_read_lock() now, this patch adds another two places which needs the rcu lock, which is spotted by following suspicious rcu_dereference_check() usage warnings. kernel/sched_rt.c:1244 invoked rcu_dereference_check() without protection! kernel/sched_stats.h:41 invoked rcu_dereference_check() without protection! Signed-off-by: Xiaotian Feng Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1303469634-11678-1-git-send-email-dfeng@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 10 ++++++++-- kernel/sched_stats.h | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 64b2a37c07d..88725c939e0 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1263,6 +1263,7 @@ static int find_lowest_rq(struct task_struct *task) if (!cpumask_test_cpu(this_cpu, lowest_mask)) this_cpu = -1; /* Skip this_cpu opt if not among lowest */ + rcu_read_lock(); for_each_domain(cpu, sd) { if (sd->flags & SD_WAKE_AFFINE) { int best_cpu; @@ -1272,15 +1273,20 @@ static int find_lowest_rq(struct task_struct *task) * remote processor. */ if (this_cpu != -1 && - cpumask_test_cpu(this_cpu, sched_domain_span(sd))) + cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { + rcu_read_unlock(); return this_cpu; + } best_cpu = cpumask_first_and(lowest_mask, sched_domain_span(sd)); - if (best_cpu < nr_cpu_ids) + if (best_cpu < nr_cpu_ids) { + rcu_read_unlock(); return best_cpu; + } } } + rcu_read_unlock(); /* * And finally, if there were no matches within the domains diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 48ddf431db0..331e01bcd02 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -37,7 +37,7 @@ static int show_schedstat(struct seq_file *seq, void *v) #ifdef CONFIG_SMP /* domain-specific stats */ - preempt_disable(); + rcu_read_lock(); for_each_domain(cpu, sd) { enum cpu_idle_type itype; @@ -64,7 +64,7 @@ static int show_schedstat(struct seq_file *seq, void *v) sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); } - preempt_enable(); + rcu_read_unlock(); #endif } kfree(mask_str); -- cgit v1.2.3-70-g09d2 From d6aa8f85f16379d42c147b22b59e33b67f9ff466 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 May 2011 14:21:33 +0200 Subject: sched: Fix ttwu() for __ARCH_WANT_INTERRUPTS_ON_CTXSW Marc reported that e4a52bcb9 (sched: Remove rq->lock from the first half of ttwu()) broke his ARM-SMP machine. Now ARM is one of the few __ARCH_WANT_INTERRUPTS_ON_CTXSW users, so that exception in the ttwu() code was suspect. Yong found that the interrupt could hit after context_switch() changes current but before it clears p->on_cpu, if that interrupt were to attempt a wake-up of p we would indeed find ourselves spinning in IRQ context. Fix this by reverting to the old behaviour for this situation and perform a full remote wake-up. Cc: Frank Rowand Cc: Yong Zhang Cc: Oleg Nesterov Reported-by: Marc Zyngier Tested-by: Marc Zyngier Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 5e43e9dc65d..a80ee911900 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2573,7 +2573,26 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu) if (!next) smp_send_reschedule(cpu); } -#endif + +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW +static int ttwu_activate_remote(struct task_struct *p, int wake_flags) +{ + struct rq *rq; + int ret = 0; + + rq = __task_rq_lock(p); + if (p->on_cpu) { + ttwu_activate(rq, p, ENQUEUE_WAKEUP); + ttwu_do_wakeup(rq, p, wake_flags); + ret = 1; + } + __task_rq_unlock(rq); + + return ret; + +} +#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ +#endif /* CONFIG_SMP */ static void ttwu_queue(struct task_struct *p, int cpu) { @@ -2631,17 +2650,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) while (p->on_cpu) { #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW /* - * If called from interrupt context we could have landed in the - * middle of schedule(), in this case we should take care not - * to spin on ->on_cpu if p is current, since that would - * deadlock. + * In case the architecture enables interrupts in + * context_switch(), we cannot busy wait, since that + * would lead to deadlocks when an interrupt hits and + * tries to wake up @prev. So bail and do a complete + * remote wakeup. */ - if (p == current) { - ttwu_queue(p, cpu); + if (ttwu_activate_remote(p, wake_flags)) goto stat; - } -#endif +#else cpu_relax(); +#endif } /* * Pairs with the smp_wmb() in finish_lock_switch(). -- cgit v1.2.3-70-g09d2 From 1e876231785d82443a5ac8b6c660e9f51bc5dede Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 May 2011 16:21:10 -0700 Subject: sched: Fix ->min_vruntime calculation in dequeue_entity() Dima Zavin reported: "After pulling the thread off the run-queue during a cgroup change, the cfs_rq.min_vruntime gets recalculated. The dequeued thread's vruntime then gets normalized to this new value. This can then lead to the thread getting an unfair boost in the new group if the vruntime of the next task in the old run-queue was way further ahead." Reported-by: Dima Zavin Signed-off-by: John Stultz Recalls-having-tested-once-upon-a-time-by: Mike Galbraith Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1305674470-23727-1-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e32a9b70ee9..433491c2dc8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1076,8 +1076,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) se->on_rq = 0; update_cfs_load(cfs_rq, 0); account_entity_dequeue(cfs_rq, se); - update_min_vruntime(cfs_rq); - update_cfs_shares(cfs_rq); /* * Normalize the entity after updating the min_vruntime because the @@ -1086,6 +1084,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) */ if (!(flags & DEQUEUE_SLEEP)) se->vruntime -= cfs_rq->min_vruntime; + + update_min_vruntime(cfs_rq); + update_cfs_shares(cfs_rq); } /* -- cgit v1.2.3-70-g09d2 From 1e1b6c511d1b23cb7c3b619d82fc7bd9f620565d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 19 May 2011 15:08:58 +0900 Subject: cpuset: Fix cpuset_cpus_allowed_fallback(), don't update tsk->rt.nr_cpus_allowed The rule is, we have to update tsk->rt.nr_cpus_allowed if we change tsk->cpus_allowed. Otherwise RT scheduler may confuse. Signed-off-by: KOSAKI Motohiro Cc: Oleg Nesterov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/4DD4B3FA.5060901@jp.fujitsu.com Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 2 +- include/linux/sched.h | 7 +++++++ kernel/cpuset.c | 4 ++-- kernel/kthread.c | 4 ++-- kernel/sched.c | 19 ++++++++++++------- 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index f20eb8f1602..e9eaec52265 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -146,7 +146,7 @@ static inline void cpuset_cpus_allowed(struct task_struct *p, static inline int cpuset_cpus_allowed_fallback(struct task_struct *p) { - cpumask_copy(&p->cpus_allowed, cpu_possible_mask); + do_set_cpus_allowed(p, cpu_possible_mask); return cpumask_any(cpu_active_mask); } diff --git a/include/linux/sched.h b/include/linux/sched.h index dc8871295a5..8da84b7bc1b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1841,9 +1841,16 @@ static inline void rcu_copy_process(struct task_struct *p) #endif #ifdef CONFIG_SMP +extern void do_set_cpus_allowed(struct task_struct *p, + const struct cpumask *new_mask); + extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); #else +static inline void do_set_cpus_allowed(struct task_struct *p, + const struct cpumask *new_mask) +{ +} static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1ceeb049c82..9c9b7545c81 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2190,7 +2190,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) rcu_read_lock(); cs = task_cs(tsk); if (cs) - cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed); + do_set_cpus_allowed(tsk, cs->cpus_allowed); rcu_read_unlock(); /* @@ -2217,7 +2217,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) * Like above we can temporary set any mask and rely on * set_cpus_allowed_ptr() as synchronization point. */ - cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask); + do_set_cpus_allowed(tsk, cpu_possible_mask); cpu = cpumask_any(cpu_active_mask); } diff --git a/kernel/kthread.c b/kernel/kthread.c index 3b34d2732bc..4ba7cccb499 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -202,8 +202,8 @@ void kthread_bind(struct task_struct *p, unsigned int cpu) return; } - p->cpus_allowed = cpumask_of_cpu(cpu); - p->rt.nr_cpus_allowed = 1; + /* It's safe because the task is inactive. */ + do_set_cpus_allowed(p, cpumask_of(cpu)); p->flags |= PF_THREAD_BOUND; } EXPORT_SYMBOL(kthread_bind); diff --git a/kernel/sched.c b/kernel/sched.c index a80ee911900..cbb3a0eee58 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5860,7 +5860,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); - cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); + do_set_cpus_allowed(idle, cpumask_of(cpu)); /* * We're having a chicken and egg problem, even though we are * holding rq->lock, the cpu isn't yet set to this cpu so the @@ -5948,6 +5948,16 @@ static inline void sched_init_granularity(void) } #ifdef CONFIG_SMP +void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) +{ + if (p->sched_class && p->sched_class->set_cpus_allowed) + p->sched_class->set_cpus_allowed(p, new_mask); + else { + cpumask_copy(&p->cpus_allowed, new_mask); + p->rt.nr_cpus_allowed = cpumask_weight(new_mask); + } +} + /* * This is how migration works: * @@ -5993,12 +6003,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) goto out; } - if (p->sched_class->set_cpus_allowed) - p->sched_class->set_cpus_allowed(p, new_mask); - else { - cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = cpumask_weight(new_mask); - } + do_set_cpus_allowed(p, new_mask); /* Can the task run on the task's current CPU? If so, we're done */ if (cpumask_test_cpu(task_cpu(p), new_mask)) -- cgit v1.2.3-70-g09d2 From f62508f68d04adefc4cf9b0177ba02c8818b3eec Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Thu, 19 May 2011 12:53:53 +0200 Subject: Documentation: Add statistics about nested locks Explain what the trailing "/1" on some lock class names of lock_stat output means. Reviewed-by: Yong Zhang Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/4DD4F6C1.5090701@gmail.com Signed-off-by: Ingo Molnar --- Documentation/lockstat.txt | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index 9c0a80d17a2..cef00d42ed5 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -12,8 +12,9 @@ Because things like lock contention can severely impact performance. - HOW Lockdep already has hooks in the lock functions and maps lock instances to -lock classes. We build on that. The graph below shows the relation between -the lock functions and the various hooks therein. +lock classes. We build on that (see Documentation/lockdep-design.txt). +The graph below shows the relation between the lock functions and the various +hooks therein. __acquire | @@ -128,6 +129,37 @@ points are the points we're contending with. The integer part of the time values is in us. +Dealing with nested locks, subclasses may appear: + +32............................................................................................................................................................................................... +33 +34 &rq->lock: 13128 13128 0.43 190.53 103881.26 97454 3453404 0.00 401.11 13224683.11 +35 --------- +36 &rq->lock 645 [] task_rq_lock+0x43/0x75 +37 &rq->lock 297 [] try_to_wake_up+0x127/0x25a +38 &rq->lock 360 [] select_task_rq_fair+0x1f0/0x74a +39 &rq->lock 428 [] scheduler_tick+0x46/0x1fb +40 --------- +41 &rq->lock 77 [] task_rq_lock+0x43/0x75 +42 &rq->lock 174 [] try_to_wake_up+0x127/0x25a +43 &rq->lock 4715 [] double_rq_lock+0x42/0x54 +44 &rq->lock 893 [] schedule+0x157/0x7b8 +45 +46............................................................................................................................................................................................... +47 +48 &rq->lock/1: 11526 11488 0.33 388.73 136294.31 21461 38404 0.00 37.93 109388.53 +49 ----------- +50 &rq->lock/1 11526 [] double_rq_lock+0x4f/0x54 +51 ----------- +52 &rq->lock/1 5645 [] double_rq_lock+0x42/0x54 +53 &rq->lock/1 1224 [] schedule+0x157/0x7b8 +54 &rq->lock/1 4336 [] double_rq_lock+0x4f/0x54 +55 &rq->lock/1 181 [] try_to_wake_up+0x127/0x25a + +Line 48 shows statistics for the second subclass (/1) of &rq->lock class +(subclass starts from 0), since in this case, as line 50 suggests, +double_rq_lock actually acquires a nested lock of two spinlocks. + View the top contending locks: # grep : /proc/lock_stat | head -- cgit v1.2.3-70-g09d2 From f506b3dc0ec454a16d40cab9ee5d75435b39dc50 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 May 2011 17:02:53 +0200 Subject: perf: Fix SIGIO handling Vince noticed that unless we mmap() a buffer, SIGIO gets lost. So explicitly push the wakeup (including signals) when requested. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: Link: http://lkml.kernel.org/n/tip-2euus3f3x3dyvdk52cjxw8zu@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index c09767f7db3..d863b3c057b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5028,6 +5028,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, else perf_event_output(event, nmi, data, regs); + if (event->fasync && event->pending_kill) { + if (nmi) { + event->pending_wakeup = 1; + irq_work_queue(&event->pending); + } else + perf_event_wakeup(event); + } + return ret; } -- cgit v1.2.3-70-g09d2 From 55c2945aa9d4d907ec5ca4f6a4e30ae908d8d30d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 11 May 2011 05:33:33 -0700 Subject: atomic: Add atomic_or() An atomic_or() function is needed by TREE_RCU to avoid deadlock, so add a generic version. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 96c038e43d6..ee456c79b0e 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -34,4 +34,17 @@ static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint) } #endif +#ifndef CONFIG_ARCH_HAS_ATOMIC_OR +static inline void atomic_or(int i, atomic_t *v) +{ + int old; + int new; + + do { + old = atomic_read(v); + new = old | i; + } while (atomic_cmpxchg(v, old, new) != old); +} +#endif /* #ifndef CONFIG_ARCH_HAS_ATOMIC_OR */ + #endif /* _LINUX_ATOMIC_H */ -- cgit v1.2.3-70-g09d2 From 8826f3b0397562eee6f8785d548be9dfdb169100 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 11 May 2011 05:41:41 -0700 Subject: rcu: Avoid acquiring rcu_node locks in timer functions This commit switches manipulations of the rcu_node ->wakemask field to atomic operations, which allows rcu_cpu_kthread_timer() to avoid acquiring the rcu_node lock. This should avoid the following lockdep splat reported by Valdis Kletnieks: [ 12.872150] usb 1-4: new high speed USB device number 3 using ehci_hcd [ 12.986667] usb 1-4: New USB device found, idVendor=413c, idProduct=2513 [ 12.986679] usb 1-4: New USB device strings: Mfr=0, Product=0, SerialNumber=0 [ 12.987691] hub 1-4:1.0: USB hub found [ 12.987877] hub 1-4:1.0: 3 ports detected [ 12.996372] input: PS/2 Generic Mouse as /devices/platform/i8042/serio1/input/input10 [ 13.071471] udevadm used greatest stack depth: 3984 bytes left [ 13.172129] [ 13.172130] ======================================================= [ 13.172425] [ INFO: possible circular locking dependency detected ] [ 13.172650] 2.6.39-rc6-mmotm0506 #1 [ 13.172773] ------------------------------------------------------- [ 13.172997] blkid/267 is trying to acquire lock: [ 13.173009] (&p->pi_lock){-.-.-.}, at: [] try_to_wake_up+0x29/0x1aa [ 13.173009] [ 13.173009] but task is already holding lock: [ 13.173009] (rcu_node_level_0){..-...}, at: [] rcu_cpu_kthread_timer+0x27/0x58 [ 13.173009] [ 13.173009] which lock already depends on the new lock. [ 13.173009] [ 13.173009] [ 13.173009] the existing dependency chain (in reverse order) is: [ 13.173009] [ 13.173009] -> #2 (rcu_node_level_0){..-...}: [ 13.173009] [] check_prevs_add+0x8b/0x104 [ 13.173009] [] validate_chain+0x36f/0x3ab [ 13.173009] [] __lock_acquire+0x369/0x3e2 [ 13.173009] [] lock_acquire+0xfc/0x14c [ 13.173009] [] _raw_spin_lock+0x36/0x45 [ 13.173009] [] rcu_read_unlock_special+0x8c/0x1d5 [ 13.173009] [] __rcu_read_unlock+0x4f/0xd7 [ 13.173009] [] rcu_read_unlock+0x21/0x23 [ 13.173009] [] cpuacct_charge+0x6c/0x75 [ 13.173009] [] update_curr+0x101/0x12e [ 13.173009] [] check_preempt_wakeup+0xf7/0x23b [ 13.173009] [] check_preempt_curr+0x2b/0x68 [ 13.173009] [] ttwu_do_wakeup+0x76/0x128 [ 13.173009] [] ttwu_do_activate.constprop.63+0x57/0x5c [ 13.173009] [] scheduler_ipi+0x48/0x5d [ 13.173009] [] smp_reschedule_interrupt+0x16/0x18 [ 13.173009] [] reschedule_interrupt+0x13/0x20 [ 13.173009] [] rcu_read_unlock+0x21/0x23 [ 13.173009] [] find_get_page+0xa9/0xb9 [ 13.173009] [] filemap_fault+0x6a/0x34d [ 13.173009] [] __do_fault+0x54/0x3e6 [ 13.173009] [] handle_pte_fault+0x12c/0x1ed [ 13.173009] [] handle_mm_fault+0x1cd/0x1e0 [ 13.173009] [] do_page_fault+0x42d/0x5de [ 13.173009] [] page_fault+0x1f/0x30 [ 13.173009] [ 13.173009] -> #1 (&rq->lock){-.-.-.}: [ 13.173009] [] check_prevs_add+0x8b/0x104 [ 13.173009] [] validate_chain+0x36f/0x3ab [ 13.173009] [] __lock_acquire+0x369/0x3e2 [ 13.173009] [] lock_acquire+0xfc/0x14c [ 13.173009] [] _raw_spin_lock+0x36/0x45 [ 13.173009] [] __task_rq_lock+0x8b/0xd3 [ 13.173009] [] wake_up_new_task+0x41/0x108 [ 13.173009] [] do_fork+0x265/0x33f [ 13.173009] [] kernel_thread+0x6b/0x6d [ 13.173009] [] rest_init+0x21/0xd2 [ 13.173009] [] start_kernel+0x3bb/0x3c6 [ 13.173009] [] x86_64_start_reservations+0xaf/0xb3 [ 13.173009] [] x86_64_start_kernel+0xf0/0xf7 [ 13.173009] [ 13.173009] -> #0 (&p->pi_lock){-.-.-.}: [ 13.173009] [] check_prev_add+0x68/0x20e [ 13.173009] [] check_prevs_add+0x8b/0x104 [ 13.173009] [] validate_chain+0x36f/0x3ab [ 13.173009] [] __lock_acquire+0x369/0x3e2 [ 13.173009] [] lock_acquire+0xfc/0x14c [ 13.173009] [] _raw_spin_lock_irqsave+0x44/0x57 [ 13.173009] [] try_to_wake_up+0x29/0x1aa [ 13.173009] [] wake_up_process+0x10/0x12 [ 13.173009] [] rcu_cpu_kthread_timer+0x44/0x58 [ 13.173009] [] call_timer_fn+0xac/0x1e9 [ 13.173009] [] run_timer_softirq+0x1aa/0x1f2 [ 13.173009] [] __do_softirq+0x109/0x26a [ 13.173009] [] call_softirq+0x1c/0x30 [ 13.173009] [] do_softirq+0x44/0xf1 [ 13.173009] [] irq_exit+0x58/0xc8 [ 13.173009] [] smp_apic_timer_interrupt+0x79/0x87 [ 13.173009] [] apic_timer_interrupt+0x13/0x20 [ 13.173009] [] get_page_from_freelist+0x2aa/0x310 [ 13.173009] [] __alloc_pages_nodemask+0x178/0x243 [ 13.173009] [] pte_alloc_one+0x1e/0x3a [ 13.173009] [] __pte_alloc+0x22/0x14b [ 13.173009] [] handle_mm_fault+0x17e/0x1e0 [ 13.173009] [] do_page_fault+0x42d/0x5de [ 13.173009] [] page_fault+0x1f/0x30 [ 13.173009] [ 13.173009] other info that might help us debug this: [ 13.173009] [ 13.173009] Chain exists of: [ 13.173009] &p->pi_lock --> &rq->lock --> rcu_node_level_0 [ 13.173009] [ 13.173009] Possible unsafe locking scenario: [ 13.173009] [ 13.173009] CPU0 CPU1 [ 13.173009] ---- ---- [ 13.173009] lock(rcu_node_level_0); [ 13.173009] lock(&rq->lock); [ 13.173009] lock(rcu_node_level_0); [ 13.173009] lock(&p->pi_lock); [ 13.173009] [ 13.173009] *** DEADLOCK *** [ 13.173009] [ 13.173009] 3 locks held by blkid/267: [ 13.173009] #0: (&mm->mmap_sem){++++++}, at: [] do_page_fault+0x1f3/0x5de [ 13.173009] #1: (&yield_timer){+.-...}, at: [] call_timer_fn+0x0/0x1e9 [ 13.173009] #2: (rcu_node_level_0){..-...}, at: [] rcu_cpu_kthread_timer+0x27/0x58 [ 13.173009] [ 13.173009] stack backtrace: [ 13.173009] Pid: 267, comm: blkid Not tainted 2.6.39-rc6-mmotm0506 #1 [ 13.173009] Call Trace: [ 13.173009] [] print_circular_bug+0xc8/0xd9 [ 13.173009] [] check_prev_add+0x68/0x20e [ 13.173009] [] ? save_stack_trace+0x28/0x46 [ 13.173009] [] check_prevs_add+0x8b/0x104 [ 13.173009] [] validate_chain+0x36f/0x3ab [ 13.173009] [] __lock_acquire+0x369/0x3e2 [ 13.173009] [] ? try_to_wake_up+0x29/0x1aa [ 13.173009] [] lock_acquire+0xfc/0x14c [ 13.173009] [] ? try_to_wake_up+0x29/0x1aa [ 13.173009] [] ? rcu_check_quiescent_state+0x82/0x82 [ 13.173009] [] _raw_spin_lock_irqsave+0x44/0x57 [ 13.173009] [] ? try_to_wake_up+0x29/0x1aa [ 13.173009] [] try_to_wake_up+0x29/0x1aa [ 13.173009] [] ? rcu_check_quiescent_state+0x82/0x82 [ 13.173009] [] wake_up_process+0x10/0x12 [ 13.173009] [] rcu_cpu_kthread_timer+0x44/0x58 [ 13.173009] [] ? rcu_check_quiescent_state+0x82/0x82 [ 13.173009] [] call_timer_fn+0xac/0x1e9 [ 13.173009] [] ? del_timer+0x75/0x75 [ 13.173009] [] ? rcu_check_quiescent_state+0x82/0x82 [ 13.173009] [] run_timer_softirq+0x1aa/0x1f2 [ 13.173009] [] __do_softirq+0x109/0x26a [ 13.173009] [] ? tick_dev_program_event+0x37/0xf6 [ 13.173009] [] ? time_hardirqs_off+0x1b/0x2f [ 13.173009] [] call_softirq+0x1c/0x30 [ 13.173009] [] do_softirq+0x44/0xf1 [ 13.173009] [] irq_exit+0x58/0xc8 [ 13.173009] [] smp_apic_timer_interrupt+0x79/0x87 [ 13.173009] [] apic_timer_interrupt+0x13/0x20 [ 13.173009] [] ? get_page_from_freelist+0x114/0x310 [ 13.173009] [] ? get_page_from_freelist+0x2aa/0x310 [ 13.173009] [] ? clear_page_c+0x7/0x10 [ 13.173009] [] ? prep_new_page+0x14c/0x1cd [ 13.173009] [] get_page_from_freelist+0x2aa/0x310 [ 13.173009] [] __alloc_pages_nodemask+0x178/0x243 [ 13.173009] [] ? __pmd_alloc+0x87/0x99 [ 13.173009] [] pte_alloc_one+0x1e/0x3a [ 13.173009] [] ? __pmd_alloc+0x87/0x99 [ 13.173009] [] __pte_alloc+0x22/0x14b [ 13.173009] [] handle_mm_fault+0x17e/0x1e0 [ 13.173009] [] do_page_fault+0x42d/0x5de [ 13.173009] [] ? sys_brk+0x32/0x10c [ 13.173009] [] ? time_hardirqs_off+0x1b/0x2f [ 13.173009] [] ? trace_hardirqs_off_caller+0x3f/0x9c [ 13.173009] [] ? trace_hardirqs_off_thunk+0x3a/0x3c [ 13.173009] [] page_fault+0x1f/0x30 [ 14.010075] usb 5-1: new full speed USB device number 2 using uhci_hcd Reported-by: Valdis Kletnieks Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 13 +++++-------- kernel/rcutree.h | 4 +++- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 8154a4a3491..5d96d68d20f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include @@ -1526,13 +1526,10 @@ static void rcu_cpu_kthread_setrt(int cpu, int to_rt) */ static void rcu_cpu_kthread_timer(unsigned long arg) { - unsigned long flags; struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); struct rcu_node *rnp = rdp->mynode; - raw_spin_lock_irqsave(&rnp->lock, flags); - rnp->wakemask |= rdp->grpmask; - raw_spin_unlock_irqrestore(&rnp->lock, flags); + atomic_or(rdp->grpmask, &rnp->wakemask); invoke_rcu_node_kthread(rnp); } @@ -1680,11 +1677,11 @@ static int rcu_node_kthread(void *arg) for (;;) { rnp->node_kthread_status = RCU_KTHREAD_WAITING; - wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0); + wait_event_interruptible(rnp->node_wq, + atomic_read(&rnp->wakemask) != 0); rnp->node_kthread_status = RCU_KTHREAD_RUNNING; raw_spin_lock_irqsave(&rnp->lock, flags); - mask = rnp->wakemask; - rnp->wakemask = 0; + mask = atomic_xchg(&rnp->wakemask, 0); rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { if ((mask & 0x1) == 0) diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 93d4a1c2e88..561dcb9a8d2 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -119,7 +119,9 @@ struct rcu_node { /* elements that need to drain to allow the */ /* current expedited grace period to */ /* complete (only for TREE_PREEMPT_RCU). */ - unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */ + atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */ + /* Since this has meaning only for leaf */ + /* rcu_node structures, 32 bits suffices. */ unsigned long qsmaskinit; /* Per-GP initial value for qsmask & expmask. */ unsigned long grpmask; /* Mask to apply to parent qsmask. */ -- cgit v1.2.3-70-g09d2 From 08bca60a6912ad225254250c0a9c3a05b4152cfa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 May 2011 16:06:29 -0700 Subject: rcu: Remove waitqueue usage for cpu, node, and boost kthreads It is not necessary to use waitqueues for the RCU kthreads because we always know exactly which thread is to be awakened. In addition, wake_up() only issues an actual wakeup when there is a thread waiting on the queue, which was why there was an extra explicit wake_up_process() to get the RCU kthreads started. Eliminating the waitqueues (and wake_up()) in favor of wake_up_process() eliminates the need for the initial wake_up_process() and also shrinks the data structure size a bit. The wakeup logic is placed in a new rcu_wait() macro. Signed-off-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 23 ++++++----------------- kernel/rcutree.h | 17 ++++++++++------- kernel/rcutree_plugin.h | 16 +--------------- 3 files changed, 17 insertions(+), 39 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5d96d68d20f..05e254e930e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -95,7 +95,6 @@ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); -static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq); DEFINE_PER_CPU(char, rcu_cpu_has_work); static char rcu_kthreads_spawnable; @@ -1476,7 +1475,7 @@ static void invoke_rcu_cpu_kthread(void) local_irq_restore(flags); return; } - wake_up(&__get_cpu_var(rcu_cpu_wq)); + wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); local_irq_restore(flags); } @@ -1596,14 +1595,12 @@ static int rcu_cpu_kthread(void *arg) unsigned long flags; int spincnt = 0; unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); - wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu); char work; char *workp = &per_cpu(rcu_cpu_has_work, cpu); for (;;) { *statusp = RCU_KTHREAD_WAITING; - wait_event_interruptible(*wqp, - *workp != 0 || kthread_should_stop()); + rcu_wait(*workp != 0 || kthread_should_stop()); local_bh_disable(); if (rcu_cpu_kthread_should_stop(cpu)) { local_bh_enable(); @@ -1654,7 +1651,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); per_cpu(rcu_cpu_kthread_task, cpu) = t; - wake_up_process(t); sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); return 0; @@ -1677,8 +1673,7 @@ static int rcu_node_kthread(void *arg) for (;;) { rnp->node_kthread_status = RCU_KTHREAD_WAITING; - wait_event_interruptible(rnp->node_wq, - atomic_read(&rnp->wakemask) != 0); + rcu_wait(atomic_read(&rnp->wakemask) != 0); rnp->node_kthread_status = RCU_KTHREAD_RUNNING; raw_spin_lock_irqsave(&rnp->lock, flags); mask = atomic_xchg(&rnp->wakemask, 0); @@ -1762,7 +1757,6 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, raw_spin_lock_irqsave(&rnp->lock, flags); rnp->node_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); - wake_up_process(t); sp.sched_priority = 99; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); } @@ -1779,21 +1773,16 @@ static int __init rcu_spawn_kthreads(void) rcu_kthreads_spawnable = 1; for_each_possible_cpu(cpu) { - init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu)); per_cpu(rcu_cpu_has_work, cpu) = 0; if (cpu_online(cpu)) (void)rcu_spawn_one_cpu_kthread(cpu); } rnp = rcu_get_root(rcu_state); - init_waitqueue_head(&rnp->node_wq); - rcu_init_boost_waitqueue(rnp); (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - if (NUM_RCU_NODES > 1) - rcu_for_each_leaf_node(rcu_state, rnp) { - init_waitqueue_head(&rnp->node_wq); - rcu_init_boost_waitqueue(rnp); + if (NUM_RCU_NODES > 1) { + rcu_for_each_leaf_node(rcu_state, rnp) (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - } + } return 0; } early_initcall(rcu_spawn_kthreads); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 561dcb9a8d2..7b9a08b4aae 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -159,9 +159,6 @@ struct rcu_node { struct task_struct *boost_kthread_task; /* kthread that takes care of priority */ /* boosting for this rcu_node structure. */ - wait_queue_head_t boost_wq; - /* Wait queue on which to park the boost */ - /* kthread. */ unsigned int boost_kthread_status; /* State of boost_kthread_task for tracing. */ unsigned long n_tasks_boosted; @@ -188,9 +185,6 @@ struct rcu_node { /* kthread that takes care of this rcu_node */ /* structure, for example, awakening the */ /* per-CPU kthreads as needed. */ - wait_queue_head_t node_wq; - /* Wait queue on which to park the per-node */ - /* kthread. */ unsigned int node_kthread_status; /* State of node_kthread_task for tracing. */ } ____cacheline_internodealigned_in_smp; @@ -336,6 +330,16 @@ struct rcu_data { /* scheduling clock irq */ /* before ratting on them. */ +#define rcu_wait(cond) \ +do { \ + for (;;) { \ + set_current_state(TASK_INTERRUPTIBLE); \ + if (cond) \ + break; \ + schedule(); \ + } \ + __set_current_state(TASK_RUNNING); \ +} while (0) /* * RCU global state, including node hierarchy. This hierarchy is @@ -445,7 +449,6 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu); static void rcu_preempt_send_cbs_to_online(void); static void __init __rcu_init_preempt(void); static void rcu_needs_cpu_flush(void); -static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, cpumask_var_t cm); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ed339702481..049f2787a98 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1196,8 +1196,7 @@ static int rcu_boost_kthread(void *arg) for (;;) { rnp->boost_kthread_status = RCU_KTHREAD_WAITING; - wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks || - rnp->exp_tasks); + rcu_wait(rnp->boost_tasks || rnp->exp_tasks); rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; more2boost = rcu_boost(rnp); if (more2boost) @@ -1274,14 +1273,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; } -/* - * Initialize the RCU-boost waitqueue. - */ -static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) -{ - init_waitqueue_head(&rnp->boost_wq); -} - /* * Create an RCU-boost kthread for the specified node if one does not * already exist. We only create this kthread for preemptible RCU. @@ -1306,7 +1297,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, raw_spin_lock_irqsave(&rnp->lock, flags); rnp->boost_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); - wake_up_process(t); sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); return 0; @@ -1328,10 +1318,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) { } -static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) -{ -} - static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp, int rnp_index) -- cgit v1.2.3-70-g09d2 From cc3ce5176d83cd8ae1134f86e208ea758d6cb78e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 25 May 2011 13:42:06 -0700 Subject: rcu: Start RCU kthreads in TASK_INTERRUPTIBLE state Upon creation, kthreads are in TASK_UNINTERRUPTIBLE state, which can result in softlockup warnings. Because some of RCU's kthreads can legitimately be idle indefinitely, start them in TASK_INTERRUPTIBLE state in order to avoid those warnings. Suggested-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Yinghai Lu Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 2 ++ kernel/rcutree_plugin.h | 1 + 2 files changed, 3 insertions(+) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 05e254e930e..77a7671dd14 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1648,6 +1648,7 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) if (IS_ERR(t)) return PTR_ERR(t); kthread_bind(t, cpu); + set_task_state(t, TASK_INTERRUPTIBLE); per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); per_cpu(rcu_cpu_kthread_task, cpu) = t; @@ -1755,6 +1756,7 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, if (IS_ERR(t)) return PTR_ERR(t); raw_spin_lock_irqsave(&rnp->lock, flags); + set_task_state(t, TASK_INTERRUPTIBLE); rnp->node_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = 99; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 049f2787a98..a767b7dac36 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1295,6 +1295,7 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, if (IS_ERR(t)) return PTR_ERR(t); raw_spin_lock_irqsave(&rnp->lock, flags); + set_task_state(t, TASK_INTERRUPTIBLE); rnp->boost_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = RCU_KTHREAD_PRIO; -- cgit v1.2.3-70-g09d2 From 69b4573296469fd3f70cf7044693074980517067 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 28 May 2011 08:25:51 -0700 Subject: Cache xattr security drop check for write v2 Some recent benchmarking on btrfs showed that a major scaling bottleneck on large systems on btrfs is currently the xattr lookup on every write. Why xattr lookup on every write I hear you ask? write wants to drop suid and security related xattrs that could set o capabilities for executables. To do that it currently looks up security.capability on EVERY write (even for non executables) to decide whether to drop it or not. In btrfs this causes an additional tree walk, hitting some per file system locks and quite bad scalability. In a simple read workload on a 8S system I saw over 90% CPU time in spinlocks related to that. Chris Mason tells me this is also a problem in ext4, where it hits the global mbcache lock. This patch adds a simple per inode to avoid this problem. We only do the lookup once per file and then if there is no xattr cache the decision. All xattr changes clear the flag. I also used the same flag to avoid the suid check, although that one is pretty cheap. A file system can also set this flag when it creates the inode, if it has a cheap way to do so. This is done for some common file systems in followon patches. With this patch a major part of the lock contention disappears for btrfs. Some testing on smaller systems didn't show significant performance changes, but at least it helps the larger systems and is generally more efficient. v2: Rename is_sgid. add file system helper. Cc: chris.mason@oracle.com Cc: josef@redhat.com Cc: viro@zeniv.linux.org.uk Cc: agruen@linbit.com Cc: Serge E. Hallyn Signed-off-by: Andi Kleen Signed-off-by: Al Viro --- fs/attr.c | 7 +++++++ fs/xattr.c | 7 +++++-- include/linux/fs.h | 13 +++++++++++++ mm/filemap.c | 14 ++++++++++++-- 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index 91dbe2a107f..caf2aa521e2 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -175,6 +175,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr) return -EPERM; } + if ((ia_valid & ATTR_MODE)) { + mode_t amode = attr->ia_mode; + /* Flag setting protected by i_mutex */ + if (is_sxid(amode)) + inode->i_flags &= ~S_NOSEC; + } + now = current_fs_time(inode->i_sb); attr->ia_ctime = now; diff --git a/fs/xattr.c b/fs/xattr.c index 4be2e7666d0..f060663ab70 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -91,7 +91,11 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, { struct inode *inode = dentry->d_inode; int error = -EOPNOTSUPP; + int issec = !strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN); + if (issec) + inode->i_flags &= ~S_NOSEC; if (inode->i_op->setxattr) { error = inode->i_op->setxattr(dentry, name, value, size, flags); if (!error) { @@ -99,8 +103,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, security_inode_post_setxattr(dentry, name, value, size, flags); } - } else if (!strncmp(name, XATTR_SECURITY_PREFIX, - XATTR_SECURITY_PREFIX_LEN)) { + } else if (issec) { const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; error = security_inode_setsecurity(inode, suffix, value, size, flags); diff --git a/include/linux/fs.h b/include/linux/fs.h index 573028df050..c55d6b7cd5d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -237,6 +237,7 @@ struct inodes_stat_t { #define S_PRIVATE 512 /* Inode is fs-internal */ #define S_IMA 1024 /* Inode has an associated IMA struct */ #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ +#define S_NOSEC 4096 /* no suid or xattr security attributes */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -273,6 +274,7 @@ struct inodes_stat_t { #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) #define IS_IMA(inode) ((inode)->i_flags & S_IMA) #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) +#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ @@ -2582,5 +2584,16 @@ int __init get_filesystem_list(char *buf); #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ (flag & __FMODE_NONOTIFY))) +static inline int is_sxid(mode_t mode) +{ + return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); +} + +static inline void inode_has_no_xattr(struct inode *inode) +{ + if (!is_sxid(inode->i_mode)) + inode->i_flags |= S_NOSEC; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_FS_H */ diff --git a/mm/filemap.c b/mm/filemap.c index dac95a24dea..d7b10578a64 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1982,16 +1982,26 @@ static int __remove_suid(struct dentry *dentry, int kill) int file_remove_suid(struct file *file) { struct dentry *dentry = file->f_path.dentry; - int killsuid = should_remove_suid(dentry); - int killpriv = security_inode_need_killpriv(dentry); + struct inode *inode = dentry->d_inode; + int killsuid; + int killpriv; int error = 0; + /* Fast path for nothing security related */ + if (IS_NOSEC(inode)) + return 0; + + killsuid = should_remove_suid(dentry); + killpriv = security_inode_need_killpriv(dentry); + if (killpriv < 0) return killpriv; if (killpriv) error = security_inode_killpriv(dentry); if (!error && killsuid) error = __remove_suid(dentry, killsuid); + if (!error) + inode->i_flags |= S_NOSEC; return error; } -- cgit v1.2.3-70-g09d2 From 091c75985e2f3d1b60eb25d577f04923c1b8e022 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 27 Oct 2010 03:41:03 -0400 Subject: Blackfin: bfin_serial.h: turn default port wrappers into stubs Any consumer that needs to access the MMRs has to provide these helpers, so make the default into useless stubs. Signed-off-by: Mike Frysinger --- arch/blackfin/include/asm/bfin_serial.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/blackfin/include/asm/bfin_serial.h b/arch/blackfin/include/asm/bfin_serial.h index 7dbc664eab1..7fd0ec7b5b0 100644 --- a/arch/blackfin/include/asm/bfin_serial.h +++ b/arch/blackfin/include/asm/bfin_serial.h @@ -184,7 +184,7 @@ struct bfin_uart_regs { #undef __BFP #ifndef port_membase -# define port_membase(p) (((struct bfin_serial_port *)(p))->port.membase) +# define port_membase(p) 0 #endif #define UART_GET_CHAR(p) bfin_read16(port_membase(p) + OFFSET_RBR) @@ -235,10 +235,10 @@ struct bfin_uart_regs { #define UART_SET_DLAB(p) do { UART_PUT_LCR(p, UART_GET_LCR(p) | DLAB); SSYNC(); } while (0) #ifndef put_lsr_cache -# define put_lsr_cache(p, v) (((struct bfin_serial_port *)(p))->lsr = (v)) +# define put_lsr_cache(p, v) #endif #ifndef get_lsr_cache -# define get_lsr_cache(p) (((struct bfin_serial_port *)(p))->lsr) +# define get_lsr_cache(p) 0 #endif /* The hardware clears the LSR bits upon read, so we need to cache -- cgit v1.2.3-70-g09d2 From 63917efc4feb856e134eedc9ad7a9810fec12968 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 28 Sep 2009 03:16:01 +0000 Subject: Blackfin: mach/bfin_serial_5xx.h: punt now-unused header Now that the serial code has been unified in bfin_serial.h, and the Blackfin UART driver pushed its resources to the boards files, we don't need these headers anymore. Signed-off-by: Mike Frysinger --- .../mach-bf518/include/mach/bfin_serial_5xx.h | 79 ------------------ .../mach-bf527/include/mach/bfin_serial_5xx.h | 79 ------------------ .../mach-bf533/include/mach/bfin_serial_5xx.h | 52 ------------ .../mach-bf537/include/mach/bfin_serial_5xx.h | 79 ------------------ .../mach-bf538/include/mach/bfin_serial_5xx.h | 93 --------------------- .../mach-bf548/include/mach/bfin_serial_5xx.h | 94 ---------------------- .../mach-bf561/include/mach/bfin_serial_5xx.h | 52 ------------ 7 files changed, 528 deletions(-) delete mode 100644 arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h delete mode 100644 arch/blackfin/mach-bf527/include/mach/bfin_serial_5xx.h delete mode 100644 arch/blackfin/mach-bf533/include/mach/bfin_serial_5xx.h delete mode 100644 arch/blackfin/mach-bf537/include/mach/bfin_serial_5xx.h delete mode 100644 arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h delete mode 100644 arch/blackfin/mach-bf548/include/mach/bfin_serial_5xx.h delete mode 100644 arch/blackfin/mach-bf561/include/mach/bfin_serial_5xx.h diff --git a/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h deleted file mode 100644 index f6d924ac0c4..00000000000 --- a/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2008-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later - */ - -#include -#include - -#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) -# define CONFIG_SERIAL_BFIN_CTSRTS - -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_CTS_PIN -# define CONFIG_UART1_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_RTS_PIN -# define CONFIG_UART1_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { -#ifdef CONFIG_SERIAL_BFIN_UART0 - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART1 - { - 0xFFC02000, - IRQ_UART1_RX, - IRQ_UART1_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART1_TX, - CH_UART1_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART1_CTS_PIN, - CONFIG_UART1_RTS_PIN, -#endif - }, -#endif -}; - -#define DRIVER_NAME "bfin-uart" - -#include diff --git a/arch/blackfin/mach-bf527/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf527/include/mach/bfin_serial_5xx.h deleted file mode 100644 index 960e08919de..00000000000 --- a/arch/blackfin/mach-bf527/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2007-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later - */ - -#include -#include - -#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) -# define CONFIG_SERIAL_BFIN_CTSRTS - -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_CTS_PIN -# define CONFIG_UART1_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_RTS_PIN -# define CONFIG_UART1_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { -#ifdef CONFIG_SERIAL_BFIN_UART0 - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART1 - { - 0xFFC02000, - IRQ_UART1_RX, - IRQ_UART1_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART1_TX, - CH_UART1_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART1_CTS_PIN, - CONFIG_UART1_RTS_PIN, -#endif - }, -#endif -}; - -#define DRIVER_NAME "bfin-uart" - -#include diff --git a/arch/blackfin/mach-bf533/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf533/include/mach/bfin_serial_5xx.h deleted file mode 100644 index 45dcaa4f3e4..00000000000 --- a/arch/blackfin/mach-bf533/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2006-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later - */ - -#include -#include - -#ifdef CONFIG_BFIN_UART0_CTSRTS -# define CONFIG_SERIAL_BFIN_CTSRTS -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - } -}; - -#define DRIVER_NAME "bfin-uart" - -#include diff --git a/arch/blackfin/mach-bf537/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf537/include/mach/bfin_serial_5xx.h deleted file mode 100644 index 3e955dba895..00000000000 --- a/arch/blackfin/mach-bf537/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2006-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later - */ - -#include -#include - -#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) -# define CONFIG_SERIAL_BFIN_CTSRTS - -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_CTS_PIN -# define CONFIG_UART1_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_RTS_PIN -# define CONFIG_UART1_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { -#ifdef CONFIG_SERIAL_BFIN_UART0 - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART1 - { - 0xFFC02000, - IRQ_UART1_RX, - IRQ_UART1_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART1_TX, - CH_UART1_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART1_CTS_PIN, - CONFIG_UART1_RTS_PIN, -#endif - }, -#endif -}; - -#define DRIVER_NAME "bfin-uart" - -#include diff --git a/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h deleted file mode 100644 index beb502e9cb3..00000000000 --- a/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2008-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later. - */ - -#include -#include - -#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) -# define CONFIG_SERIAL_BFIN_CTSRTS - -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_CTS_PIN -# define CONFIG_UART1_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_RTS_PIN -# define CONFIG_UART1_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { -#ifdef CONFIG_SERIAL_BFIN_UART0 - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART1 - { - 0xFFC02000, - IRQ_UART1_RX, - IRQ_UART1_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART1_TX, - CH_UART1_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART1_CTS_PIN, - CONFIG_UART1_RTS_PIN, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART2 - { - 0xFFC02100, - IRQ_UART2_RX, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART2_TX, - CH_UART2_RX, -#endif -#ifdef CONFIG_BFIN_UART2_CTSRTS - CONFIG_UART2_CTS_PIN, - CONFIG_UART2_RTS_PIN, -#endif - }, -#endif -}; - -#define DRIVER_NAME "bfin-uart" - -#include diff --git a/arch/blackfin/mach-bf548/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf548/include/mach/bfin_serial_5xx.h deleted file mode 100644 index 0d94edaaaa2..00000000000 --- a/arch/blackfin/mach-bf548/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2007-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later. - */ - -#include -#include - -#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) || \ - defined(CONFIG_BFIN_UART2_CTSRTS) || defined(CONFIG_BFIN_UART3_CTSRTS) -# define CONFIG_SERIAL_BFIN_HARD_CTSRTS -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { -#ifdef CONFIG_SERIAL_BFIN_UART0 - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS - 0, - 0, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART1 - { - 0xFFC02000, - IRQ_UART1_RX, - IRQ_UART1_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART1_TX, - CH_UART1_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS - GPIO_PE10, - GPIO_PE9, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART2 - { - 0xFFC02100, - IRQ_UART2_RX, - IRQ_UART2_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART2_TX, - CH_UART2_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS - 0, - 0, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART3 - { - 0xFFC03100, - IRQ_UART3_RX, - IRQ_UART3_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART3_TX, - CH_UART3_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS - GPIO_PB3, - GPIO_PB2, -#endif - }, -#endif -}; - -#define DRIVER_NAME "bfin-uart" - -#include diff --git a/arch/blackfin/mach-bf561/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf561/include/mach/bfin_serial_5xx.h deleted file mode 100644 index 3a6947456cf..00000000000 --- a/arch/blackfin/mach-bf561/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2006-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later. - */ - -#include -#include - -#ifdef CONFIG_BFIN_UART0_CTSRTS -# define CONFIG_SERIAL_BFIN_CTSRTS -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { - { - 0xFFC00400, - IRQ_UART_RX, - IRQ_UART_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART_TX, - CH_UART_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - } -}; - -#define DRIVER_NAME "bfin-uart" - -#include -- cgit v1.2.3-70-g09d2 From 427472c967977256db42ffbf16f65f2770278b7f Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 23 May 2011 21:45:42 -0400 Subject: Blackfin: wire up new sendmmsg syscall Signed-off-by: Mike Frysinger --- arch/blackfin/include/asm/unistd.h | 3 ++- arch/blackfin/mach-common/entry.S | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index 6ff9c411b14..0ccba60b9cc 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -398,8 +398,9 @@ #define __NR_clock_adjtime 377 #define __NR_syncfs 378 #define __NR_setns 379 +#define __NR_sendmmsg 380 -#define __NR_syscall 380 +#define __NR_syscall 381 #define NR_syscalls __NR_syscall /* Old optional stuff no one actually uses */ diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index dda11ef06be..225d311c970 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1754,6 +1754,7 @@ ENTRY(_sys_call_table) .long _sys_clock_adjtime .long _sys_syncfs .long _sys_setns + .long _sys_sendmmsg /* 380 */ .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall -- cgit v1.2.3-70-g09d2 From a4ffd956924e265865a4425bd888927059fd46a9 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 26 May 2011 17:26:58 -0400 Subject: Blackfin: gptimers: add structure for hardware register layout Signed-off-by: Mike Frysinger --- arch/blackfin/include/asm/gptimers.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/blackfin/include/asm/gptimers.h b/arch/blackfin/include/asm/gptimers.h index c722acdda0d..38657dac123 100644 --- a/arch/blackfin/include/asm/gptimers.h +++ b/arch/blackfin/include/asm/gptimers.h @@ -193,4 +193,22 @@ uint16_t get_enabled_gptimers(void); uint32_t get_gptimer_status(unsigned int group); void set_gptimer_status(unsigned int group, uint32_t value); +/* + * All Blackfin system MMRs are padded to 32bits even if the register + * itself is only 16bits. So use a helper macro to streamline this. + */ +#define __BFP(m) u16 m; u16 __pad_##m + +/* + * bfin timer registers layout + */ +struct bfin_gptimer_regs { + __BFP(config); + u32 counter; + u32 period; + u32 width; +}; + +#undef __BFP + #endif -- cgit v1.2.3-70-g09d2 From d09fb602030e3247c21452ee2b3229baf115f71f Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 26 May 2011 17:27:36 -0400 Subject: Blackfin: debug-mmrs: fix typos with gptimers/mdma/ppi This code was mostly developed against a BF54x, so some BF537-specific issues were missed. The PPI block starts at PPI_CONTROL, not PPI_STATUS (which is the reverse of the EPPI block). The MDMA block starts at MDMA_NEXT_DESC_PTR, not MDMA_CONFIG. Seems the sim does not catch misreads here so that'll need to get fixed. The gptimer block is mostly 32bit regs, not 16bit. Use the gptimer struct to figure that out rather than hardcoding it locally. Signed-off-by: Mike Frysinger --- arch/blackfin/kernel/debug-mmrs.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/blackfin/kernel/debug-mmrs.c b/arch/blackfin/kernel/debug-mmrs.c index 94b1d8a0256..aa201be49a1 100644 --- a/arch/blackfin/kernel/debug-mmrs.c +++ b/arch/blackfin/kernel/debug-mmrs.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -230,8 +231,8 @@ bfin_debug_mmrs_dma(struct dentry *parent, unsigned long base, int num, char mdm #define DMA(num) _DMA(num, DMA##num##_NEXT_DESC_PTR, 0, "") #define _MDMA(num, x) \ do { \ - _DMA(num, x##DMA_D##num##_CONFIG, 'D', #x); \ - _DMA(num, x##DMA_S##num##_CONFIG, 'S', #x); \ + _DMA(num, x##DMA_D##num##_NEXT_DESC_PTR, 'D', #x); \ + _DMA(num, x##DMA_S##num##_NEXT_DESC_PTR, 'S', #x); \ } while (0) #define MDMA(num) _MDMA(num, M) #define IMDMA(num) _MDMA(num, IM) @@ -264,20 +265,15 @@ bfin_debug_mmrs_eppi(struct dentry *parent, unsigned long base, int num) /* * General Purpose Timers */ -#define GPTIMER_OFF(mmr) (TIMER0_##mmr - TIMER0_CONFIG) -#define __GPTIMER(name) \ - do { \ - strcpy(_buf, #name); \ - debugfs_create_x16(buf, S_IRUSR|S_IWUSR, parent, (u16 *)(base + GPTIMER_OFF(name))); \ - } while (0) +#define __GPTIMER(uname, lname) __REGS(gptimer, #uname, lname) static void __init __maybe_unused bfin_debug_mmrs_gptimer(struct dentry *parent, unsigned long base, int num) { char buf[32], *_buf = REGS_STR_PFX(buf, TIMER, num); - __GPTIMER(CONFIG); - __GPTIMER(COUNTER); - __GPTIMER(PERIOD); - __GPTIMER(WIDTH); + __GPTIMER(CONFIG, config); + __GPTIMER(COUNTER, counter); + __GPTIMER(PERIOD, period); + __GPTIMER(WIDTH, width); } #define GPTIMER(num) bfin_debug_mmrs_gptimer(parent, TIMER##num##_CONFIG, num) @@ -355,7 +351,7 @@ bfin_debug_mmrs_ppi(struct dentry *parent, unsigned long base, int num) __PPI(DELAY, delay); __PPI(FRAME, frame); } -#define PPI(num) bfin_debug_mmrs_ppi(parent, PPI##num##_STATUS, num) +#define PPI(num) bfin_debug_mmrs_ppi(parent, PPI##num##_CONTROL, num) /* * SPI @@ -1288,15 +1284,15 @@ static int __init bfin_debug_mmrs_init(void) D16(VR_CTL); D32(CHIPID); /* it's part of this hardware block */ -#if defined(PPI_STATUS) || defined(PPI0_STATUS) || defined(PPI1_STATUS) +#if defined(PPI_CONTROL) || defined(PPI0_CONTROL) || defined(PPI1_CONTROL) parent = debugfs_create_dir("ppi", top); -# ifdef PPI_STATUS - bfin_debug_mmrs_ppi(parent, PPI_STATUS, -1); +# ifdef PPI_CONTROL + bfin_debug_mmrs_ppi(parent, PPI_CONTROL, -1); # endif -# ifdef PPI0_STATUS +# ifdef PPI0_CONTROL PPI(0); # endif -# ifdef PPI1_STATUS +# ifdef PPI1_CONTROL PPI(1); # endif #endif -- cgit v1.2.3-70-g09d2 From 61aa818f7bfdb5dd0aef4687513566c75c0e4c21 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 26 May 2011 17:39:17 -0400 Subject: Blackfin: bf52x/bf54x: fix up usb MMR defines The bf52x/bf54x have the incorrect addresses for USB_EP_NI7_RXINTERVAL and USB_EP_NI7_TXCOUNT, so adjust those. Further, the bf54x header puts the USB defines in the wrong place, so shuffle them back to the right grouping. Signed-off-by: Mike Frysinger --- arch/blackfin/mach-bf527/include/mach/defBF525.h | 4 ++-- arch/blackfin/mach-bf548/include/mach/defBF547.h | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/blackfin/mach-bf527/include/mach/defBF525.h b/arch/blackfin/mach-bf527/include/mach/defBF525.h index cc383adfdff..aab80bb1a68 100644 --- a/arch/blackfin/mach-bf527/include/mach/defBF525.h +++ b/arch/blackfin/mach-bf527/include/mach/defBF525.h @@ -185,8 +185,8 @@ #define USB_EP_NI7_TXTYPE 0xffc03bd4 /* Sets the transaction protocol and peripheral endpoint number for the Host Tx endpoint7 */ #define USB_EP_NI7_TXINTERVAL 0xffc03bd8 /* Sets the NAK response timeout on Endpoint7 */ #define USB_EP_NI7_RXTYPE 0xffc03bdc /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint7 */ -#define USB_EP_NI7_RXINTERVAL 0xffc03bf0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */ -#define USB_EP_NI7_TXCOUNT 0xffc03bf8 /* Number of bytes to be written to the endpoint7 Tx FIFO */ +#define USB_EP_NI7_RXINTERVAL 0xffc03be0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */ +#define USB_EP_NI7_TXCOUNT 0xffc03be8 /* Number of bytes to be written to the endpoint7 Tx FIFO */ #define USB_DMA_INTERRUPT 0xffc03c00 /* Indicates pending interrupts for the DMA channels */ diff --git a/arch/blackfin/mach-bf548/include/mach/defBF547.h b/arch/blackfin/mach-bf548/include/mach/defBF547.h index 1cbba115f96..1fa41ec03f3 100644 --- a/arch/blackfin/mach-bf548/include/mach/defBF547.h +++ b/arch/blackfin/mach-bf548/include/mach/defBF547.h @@ -271,10 +271,10 @@ #define USB_EP_NI0_TXINTERVAL 0xffc03e18 /* Sets the NAK response timeout on Endpoint 0 */ #define USB_EP_NI0_RXTYPE 0xffc03e1c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint0 */ #define USB_EP_NI0_RXINTERVAL 0xffc03e20 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint0 */ +#define USB_EP_NI0_TXCOUNT 0xffc03e28 /* Number of bytes to be written to the endpoint0 Tx FIFO */ /* USB Endpoint 1 Control Registers */ -#define USB_EP_NI0_TXCOUNT 0xffc03e28 /* Number of bytes to be written to the endpoint0 Tx FIFO */ #define USB_EP_NI1_TXMAXP 0xffc03e40 /* Maximum packet size for Host Tx endpoint1 */ #define USB_EP_NI1_TXCSR 0xffc03e44 /* Control Status register for endpoint1 */ #define USB_EP_NI1_RXMAXP 0xffc03e48 /* Maximum packet size for Host Rx endpoint1 */ @@ -284,10 +284,10 @@ #define USB_EP_NI1_TXINTERVAL 0xffc03e58 /* Sets the NAK response timeout on Endpoint1 */ #define USB_EP_NI1_RXTYPE 0xffc03e5c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint1 */ #define USB_EP_NI1_RXINTERVAL 0xffc03e60 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint1 */ +#define USB_EP_NI1_TXCOUNT 0xffc03e68 /* Number of bytes to be written to the+H102 endpoint1 Tx FIFO */ /* USB Endpoint 2 Control Registers */ -#define USB_EP_NI1_TXCOUNT 0xffc03e68 /* Number of bytes to be written to the+H102 endpoint1 Tx FIFO */ #define USB_EP_NI2_TXMAXP 0xffc03e80 /* Maximum packet size for Host Tx endpoint2 */ #define USB_EP_NI2_TXCSR 0xffc03e84 /* Control Status register for endpoint2 */ #define USB_EP_NI2_RXMAXP 0xffc03e88 /* Maximum packet size for Host Rx endpoint2 */ @@ -297,10 +297,10 @@ #define USB_EP_NI2_TXINTERVAL 0xffc03e98 /* Sets the NAK response timeout on Endpoint2 */ #define USB_EP_NI2_RXTYPE 0xffc03e9c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint2 */ #define USB_EP_NI2_RXINTERVAL 0xffc03ea0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint2 */ +#define USB_EP_NI2_TXCOUNT 0xffc03ea8 /* Number of bytes to be written to the endpoint2 Tx FIFO */ /* USB Endpoint 3 Control Registers */ -#define USB_EP_NI2_TXCOUNT 0xffc03ea8 /* Number of bytes to be written to the endpoint2 Tx FIFO */ #define USB_EP_NI3_TXMAXP 0xffc03ec0 /* Maximum packet size for Host Tx endpoint3 */ #define USB_EP_NI3_TXCSR 0xffc03ec4 /* Control Status register for endpoint3 */ #define USB_EP_NI3_RXMAXP 0xffc03ec8 /* Maximum packet size for Host Rx endpoint3 */ @@ -310,10 +310,10 @@ #define USB_EP_NI3_TXINTERVAL 0xffc03ed8 /* Sets the NAK response timeout on Endpoint3 */ #define USB_EP_NI3_RXTYPE 0xffc03edc /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint3 */ #define USB_EP_NI3_RXINTERVAL 0xffc03ee0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint3 */ +#define USB_EP_NI3_TXCOUNT 0xffc03ee8 /* Number of bytes to be written to the H124endpoint3 Tx FIFO */ /* USB Endpoint 4 Control Registers */ -#define USB_EP_NI3_TXCOUNT 0xffc03ee8 /* Number of bytes to be written to the H124endpoint3 Tx FIFO */ #define USB_EP_NI4_TXMAXP 0xffc03f00 /* Maximum packet size for Host Tx endpoint4 */ #define USB_EP_NI4_TXCSR 0xffc03f04 /* Control Status register for endpoint4 */ #define USB_EP_NI4_RXMAXP 0xffc03f08 /* Maximum packet size for Host Rx endpoint4 */ @@ -323,10 +323,10 @@ #define USB_EP_NI4_TXINTERVAL 0xffc03f18 /* Sets the NAK response timeout on Endpoint4 */ #define USB_EP_NI4_RXTYPE 0xffc03f1c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint4 */ #define USB_EP_NI4_RXINTERVAL 0xffc03f20 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint4 */ +#define USB_EP_NI4_TXCOUNT 0xffc03f28 /* Number of bytes to be written to the endpoint4 Tx FIFO */ /* USB Endpoint 5 Control Registers */ -#define USB_EP_NI4_TXCOUNT 0xffc03f28 /* Number of bytes to be written to the endpoint4 Tx FIFO */ #define USB_EP_NI5_TXMAXP 0xffc03f40 /* Maximum packet size for Host Tx endpoint5 */ #define USB_EP_NI5_TXCSR 0xffc03f44 /* Control Status register for endpoint5 */ #define USB_EP_NI5_RXMAXP 0xffc03f48 /* Maximum packet size for Host Rx endpoint5 */ @@ -336,10 +336,10 @@ #define USB_EP_NI5_TXINTERVAL 0xffc03f58 /* Sets the NAK response timeout on Endpoint5 */ #define USB_EP_NI5_RXTYPE 0xffc03f5c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint5 */ #define USB_EP_NI5_RXINTERVAL 0xffc03f60 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint5 */ +#define USB_EP_NI5_TXCOUNT 0xffc03f68 /* Number of bytes to be written to the H145endpoint5 Tx FIFO */ /* USB Endpoint 6 Control Registers */ -#define USB_EP_NI5_TXCOUNT 0xffc03f68 /* Number of bytes to be written to the H145endpoint5 Tx FIFO */ #define USB_EP_NI6_TXMAXP 0xffc03f80 /* Maximum packet size for Host Tx endpoint6 */ #define USB_EP_NI6_TXCSR 0xffc03f84 /* Control Status register for endpoint6 */ #define USB_EP_NI6_RXMAXP 0xffc03f88 /* Maximum packet size for Host Rx endpoint6 */ @@ -349,10 +349,10 @@ #define USB_EP_NI6_TXINTERVAL 0xffc03f98 /* Sets the NAK response timeout on Endpoint6 */ #define USB_EP_NI6_RXTYPE 0xffc03f9c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint6 */ #define USB_EP_NI6_RXINTERVAL 0xffc03fa0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint6 */ +#define USB_EP_NI6_TXCOUNT 0xffc03fa8 /* Number of bytes to be written to the endpoint6 Tx FIFO */ /* USB Endpoint 7 Control Registers */ -#define USB_EP_NI6_TXCOUNT 0xffc03fa8 /* Number of bytes to be written to the endpoint6 Tx FIFO */ #define USB_EP_NI7_TXMAXP 0xffc03fc0 /* Maximum packet size for Host Tx endpoint7 */ #define USB_EP_NI7_TXCSR 0xffc03fc4 /* Control Status register for endpoint7 */ #define USB_EP_NI7_RXMAXP 0xffc03fc8 /* Maximum packet size for Host Rx endpoint7 */ @@ -361,8 +361,9 @@ #define USB_EP_NI7_TXTYPE 0xffc03fd4 /* Sets the transaction protocol and peripheral endpoint number for the Host Tx endpoint7 */ #define USB_EP_NI7_TXINTERVAL 0xffc03fd8 /* Sets the NAK response timeout on Endpoint7 */ #define USB_EP_NI7_RXTYPE 0xffc03fdc /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint7 */ -#define USB_EP_NI7_RXINTERVAL 0xffc03ff0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */ -#define USB_EP_NI7_TXCOUNT 0xffc03ff8 /* Number of bytes to be written to the endpoint7 Tx FIFO */ +#define USB_EP_NI7_RXINTERVAL 0xffc03fe0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */ +#define USB_EP_NI7_TXCOUNT 0xffc03fe8 /* Number of bytes to be written to the endpoint7 Tx FIFO */ + #define USB_DMA_INTERRUPT 0xffc04000 /* Indicates pending interrupts for the DMA channels */ /* USB Channel 0 Config Registers */ -- cgit v1.2.3-70-g09d2 From fcb243918f9e8414bf5ad6fb0361447ac3d3fddb Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 26 May 2011 18:05:15 -0400 Subject: Blackfin: bf51x: fix up RSI_PID# MMR defines Looks like the copying of MMR defines from the SDH block missed updating the addresses of the RSI_PID# registers. So tweak them to reflect the actual hardware. Signed-off-by: Mike Frysinger --- arch/blackfin/mach-bf518/include/mach/defBF514.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/blackfin/mach-bf518/include/mach/defBF514.h b/arch/blackfin/mach-bf518/include/mach/defBF514.h index 98a51c47929..cfab428e577 100644 --- a/arch/blackfin/mach-bf518/include/mach/defBF514.h +++ b/arch/blackfin/mach-bf518/include/mach/defBF514.h @@ -36,13 +36,13 @@ #define RSI_EMASK 0xFFC038C4 /* RSI Exception Mask Register */ #define RSI_CONFIG 0xFFC038C8 /* RSI Configuration Register */ #define RSI_RD_WAIT_EN 0xFFC038CC /* RSI Read Wait Enable Register */ -#define RSI_PID0 0xFFC03FE0 /* RSI Peripheral ID Register 0 */ -#define RSI_PID1 0xFFC03FE4 /* RSI Peripheral ID Register 1 */ -#define RSI_PID2 0xFFC03FE8 /* RSI Peripheral ID Register 2 */ -#define RSI_PID3 0xFFC03FEC /* RSI Peripheral ID Register 3 */ -#define RSI_PID4 0xFFC03FF0 /* RSI Peripheral ID Register 4 */ -#define RSI_PID5 0xFFC03FF4 /* RSI Peripheral ID Register 5 */ -#define RSI_PID6 0xFFC03FF8 /* RSI Peripheral ID Register 6 */ -#define RSI_PID7 0xFFC03FFC /* RSI Peripheral ID Register 7 */ +#define RSI_PID0 0xFFC038D0 /* RSI Peripheral ID Register 0 */ +#define RSI_PID1 0xFFC038D4 /* RSI Peripheral ID Register 1 */ +#define RSI_PID2 0xFFC038D8 /* RSI Peripheral ID Register 2 */ +#define RSI_PID3 0xFFC038DC /* RSI Peripheral ID Register 3 */ +#define RSI_PID4 0xFFC038E0 /* RSI Peripheral ID Register 0 */ +#define RSI_PID5 0xFFC038E4 /* RSI Peripheral ID Register 1 */ +#define RSI_PID6 0xFFC038E8 /* RSI Peripheral ID Register 2 */ +#define RSI_PID7 0xFFC038EC /* RSI Peripheral ID Register 3 */ #endif /* _DEF_BF514_H */ -- cgit v1.2.3-70-g09d2 From c320afe965bf3f857249d223801d8f2fc95615c2 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 26 May 2011 18:07:11 -0400 Subject: Blackfin: debug-mmrs: include RSI_PID[4567] MMRs The documentation is a little iffy as to whether these are actual MMRs, but reading them on the hardware works, and the previous version of this logic (the SDH) had PID[4567]. So add it for RSI too. Signed-off-by: Mike Frysinger --- arch/blackfin/kernel/debug-mmrs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/blackfin/kernel/debug-mmrs.c b/arch/blackfin/kernel/debug-mmrs.c index aa201be49a1..fce4807ceef 100644 --- a/arch/blackfin/kernel/debug-mmrs.c +++ b/arch/blackfin/kernel/debug-mmrs.c @@ -1337,6 +1337,10 @@ static int __init bfin_debug_mmrs_init(void) D16(RSI_PID1); D16(RSI_PID2); D16(RSI_PID3); + D16(RSI_PID4); + D16(RSI_PID5); + D16(RSI_PID6); + D16(RSI_PID7); D16(RSI_PWR_CONTROL); D16(RSI_RD_WAIT_EN); D32(RSI_RESPONSE0); -- cgit v1.2.3-70-g09d2 From 826267cf1e6c6899eda1325a19f1b1d15c558b20 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 28 May 2011 13:14:09 -0700 Subject: tmpfs: fix race between truncate and writepage While running fsx on tmpfs with a memhog then swapoff, swapoff was hanging (interruptibly), repeatedly failing to locate the owner of a 0xff entry in the swap_map. Although shmem_writepage() does abandon when it sees incoming page index is beyond eof, there was still a window in which shmem_truncate_range() could come in between writepage's dropping lock and updating swap_map, find the half-completed swap_map entry, and in trying to free it, leave it in a state that swap_shmem_alloc() could not correct. Arguably a bug in __swap_duplicate()'s and swap_entry_free()'s handling of the different cases, but easiest to fix by moving swap_shmem_alloc() under cover of the lock. More interesting than the bug: it's been there since 2.6.33, why could I not see it with earlier kernels? The mmotm of two weeks ago seems to have some magic for generating races, this is just one of three I found. With yesterday's git I first saw this in mainline, bisected in search of that magic, but the easy reproducibility evaporated. Oh well, fix the bug. Signed-off-by: Hugh Dickins Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 1acfb2687bf..d221a1cfd7b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1114,8 +1114,8 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) delete_from_page_cache(page); shmem_swp_set(info, entry, swap.val); shmem_swp_unmap(entry); - spin_unlock(&info->lock); swap_shmem_alloc(swap); + spin_unlock(&info->lock); BUG_ON(page_mapped(page)); swap_writepage(page, wbc); return 0; -- cgit v1.2.3-70-g09d2 From 5dbe0af47f8a8f968bac2991c3ec974c6e3eaabc Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 28 May 2011 13:17:04 -0700 Subject: mm: fix kernel BUG at mm/rmap.c:1017! I've hit the "address >= vma->vm_end" check in do_page_add_anon_rmap() just once. The stack showed khugepaged allocation trying to compact pages: the call to page_add_anon_rmap() coming from remove_migration_pte(). That path holds anon_vma lock, but does not hold mmap_sem: it can therefore race with a split_vma(), and in commit 5f70b962ccc2 "mmap: avoid unnecessary anon_vma lock" we just took away the anon_vma lock protection when adjusting vma->vm_end. I don't think that particular BUG_ON ever caught anything interesting, so better replace it by a comment, than reinstate the anon_vma locking. Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/rmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 3a39b518a65..ba58ca36fc9 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1014,7 +1014,7 @@ void do_page_add_anon_rmap(struct page *page, return; VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); + /* address might be in next vma when migration races vma_adjust */ if (first) __page_set_anon_rmap(page, vma, address, exclusive); else @@ -1709,7 +1709,7 @@ void hugepage_add_anon_rmap(struct page *page, BUG_ON(!PageLocked(page)); BUG_ON(!anon_vma); - BUG_ON(address < vma->vm_start || address >= vma->vm_end); + /* address might be in next vma when migration races vma_adjust */ first = atomic_inc_and_test(&page->_mapcount); if (first) __hugepage_set_anon_rmap(page, vma, address, 0); -- cgit v1.2.3-70-g09d2 From eee0f252c6537da2e883f75d22cff1427515eaf3 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 28 May 2011 13:20:21 -0700 Subject: mm: fix page_lock_anon_vma leaving mutex locked On one machine I've been getting hangs, a page fault's anon_vma_prepare() waiting in anon_vma_lock(), other processes waiting for that page's lock. This is a replay of last year's f18194275c39 "mm: fix hang on anon_vma->root->lock". The new page_lock_anon_vma() places too much faith in its refcount: when it has acquired the mutex_trylock(), it's possible that a racing task in anon_vma_alloc() has just reallocated the struct anon_vma, set refcount to 1, and is about to reset its anon_vma->root. Fix this by saving anon_vma->root, and relying on the usual page_mapped() check instead of a refcount check: if page is still mapped, the anon_vma is still ours; if page is not still mapped, we're no longer interested. Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/rmap.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index ba58ca36fc9..6bada99cd61 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -405,6 +405,7 @@ out: struct anon_vma *page_lock_anon_vma(struct page *page) { struct anon_vma *anon_vma = NULL; + struct anon_vma *root_anon_vma; unsigned long anon_mapping; rcu_read_lock(); @@ -415,13 +416,15 @@ struct anon_vma *page_lock_anon_vma(struct page *page) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); - if (mutex_trylock(&anon_vma->root->mutex)) { + root_anon_vma = ACCESS_ONCE(anon_vma->root); + if (mutex_trylock(&root_anon_vma->mutex)) { /* - * If we observe a !0 refcount, then holding the lock ensures - * the anon_vma will not go away, see __put_anon_vma(). + * If the page is still mapped, then this anon_vma is still + * its anon_vma, and holding the mutex ensures that it will + * not go away, see __put_anon_vma(). */ - if (!atomic_read(&anon_vma->refcount)) { - anon_vma_unlock(anon_vma); + if (!page_mapped(page)) { + mutex_unlock(&root_anon_vma->mutex); anon_vma = NULL; } goto out; -- cgit v1.2.3-70-g09d2 From 7467571f4480b273007517b26297c07154c73924 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 24 Feb 2011 17:19:23 +0200 Subject: cpuidle: menu: fixed wrapping timers at 4.294 seconds Cpuidle menu governor is using u32 as a temporary datatype for storing nanosecond values which wrap around at 4.294 seconds. This causes errors in predicted sleep times resulting in higher than should be C state selection and increased power consumption. This also breaks cpuidle state residency statistics. cc: stable@kernel.org # .32.x through .39.x Signed-off-by: Tero Kristo Signed-off-by: Len Brown --- drivers/cpuidle/governors/menu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index f508690eb95..c47f3d09c1e 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -237,6 +237,7 @@ static int menu_select(struct cpuidle_device *dev) unsigned int power_usage = -1; int i; int multiplier; + struct timespec t; if (data->needs_update) { menu_update(dev); @@ -251,8 +252,9 @@ static int menu_select(struct cpuidle_device *dev) return 0; /* determine the expected residency time, round up */ + t = ktime_to_timespec(tick_nohz_get_sleep_length()); data->expected_us = - DIV_ROUND_UP((u32)ktime_to_ns(tick_nohz_get_sleep_length()), 1000); + t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC; data->bucket = which_bucket(data->expected_us); -- cgit v1.2.3-70-g09d2 From 333c5ae9948194428fe6c5ef5c088304fc98263b Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Fri, 11 Feb 2011 12:49:04 -0800 Subject: idle governor: Avoid lock acquisition to read pm_qos before entering idle Thanks to the reviews and comments by Rafael, James, Mark and Andi. Here's version 2 of the patch incorporating your comments and also some update to my previous patch comments. I noticed that before entering idle state, the menu idle governor will look up the current pm_qos target value according to the list of qos requests received. This look up currently needs the acquisition of a lock to access the list of qos requests to find the qos target value, slowing down the entrance into idle state due to contention by multiple cpus to access this list. The contention is severe when there are a lot of cpus waking and going into idle. For example, for a simple workload that has 32 pair of processes ping ponging messages to each other, where 64 cpu cores are active in test system, I see the following profile with 37.82% of cpu cycles spent in contention of pm_qos_lock: - 37.82% swapper [kernel.kallsyms] [k] _raw_spin_lock_irqsave - _raw_spin_lock_irqsave - 95.65% pm_qos_request menu_select cpuidle_idle_call - cpu_idle 99.98% start_secondary A better approach will be to cache the updated pm_qos target value so reading it does not require lock acquisition as in the patch below. With this patch the contention for pm_qos_lock is removed and I saw a 2.2X increase in throughput for my message passing workload. cc: stable@kernel.org Signed-off-by: Tim Chen Acked-by: Andi Kleen Acked-by: James Bottomley Acked-by: mark gross Signed-off-by: Len Brown --- include/linux/pm_qos_params.h | 4 ++++ kernel/pm_qos_params.c | 37 +++++++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h index 77cbddb3784..a7d87f911ca 100644 --- a/include/linux/pm_qos_params.h +++ b/include/linux/pm_qos_params.h @@ -16,6 +16,10 @@ #define PM_QOS_NUM_CLASSES 4 #define PM_QOS_DEFAULT_VALUE -1 +#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 + struct pm_qos_request_list { struct plist_node list; int pm_qos_class; diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index aeaa7f84682..6a8fad82a3a 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -53,11 +53,17 @@ enum pm_qos_type { PM_QOS_MIN /* return the smallest value */ }; +/* + * Note: The lockless read path depends on the CPU accessing + * target_value atomically. Atomic access is only guaranteed on all CPU + * types linux supports for 32 bit quantites + */ struct pm_qos_object { struct plist_head requests; struct blocking_notifier_head *notifiers; struct miscdevice pm_qos_power_miscdev; char *name; + s32 target_value; /* Do not change to 64 bit */ s32 default_value; enum pm_qos_type type; }; @@ -70,7 +76,8 @@ static struct pm_qos_object cpu_dma_pm_qos = { .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), .notifiers = &cpu_dma_lat_notifier, .name = "cpu_dma_latency", - .default_value = 2000 * USEC_PER_SEC, + .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN, }; @@ -79,7 +86,8 @@ static struct pm_qos_object network_lat_pm_qos = { .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), .notifiers = &network_lat_notifier, .name = "network_latency", - .default_value = 2000 * USEC_PER_SEC, + .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN }; @@ -89,7 +97,8 @@ static struct pm_qos_object network_throughput_pm_qos = { .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), .notifiers = &network_throughput_notifier, .name = "network_throughput", - .default_value = 0, + .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, .type = PM_QOS_MAX, }; @@ -132,6 +141,16 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) } } +static inline s32 pm_qos_read_value(struct pm_qos_object *o) +{ + return o->target_value; +} + +static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value) +{ + o->target_value = value; +} + static void update_target(struct pm_qos_object *o, struct plist_node *node, int del, int value) { @@ -156,6 +175,7 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node, plist_add(node, &o->requests); } curr_value = pm_qos_get_value(o); + pm_qos_set_value(o, curr_value); spin_unlock_irqrestore(&pm_qos_lock, flags); if (prev_value != curr_value) @@ -190,18 +210,11 @@ static int find_pm_qos_object_by_minor(int minor) * pm_qos_request - returns current system wide qos expectation * @pm_qos_class: identification of which qos value is requested * - * This function returns the current target value in an atomic manner. + * This function returns the current target value. */ int pm_qos_request(int pm_qos_class) { - unsigned long flags; - int value; - - spin_lock_irqsave(&pm_qos_lock, flags); - value = pm_qos_get_value(pm_qos_array[pm_qos_class]); - spin_unlock_irqrestore(&pm_qos_lock, flags); - - return value; + return pm_qos_read_value(pm_qos_array[pm_qos_class]); } EXPORT_SYMBOL_GPL(pm_qos_request); -- cgit v1.2.3-70-g09d2 From 534bc4e3d27096e2f3fc00c14a20efd597837a4f Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 26 Apr 2011 16:30:02 +0800 Subject: ACPI EC: enable MSI workaround for Quanta laptops Enable MSI workaround for Quanta laptops. https://bugzilla.kernel.org/show_bug.cgi?id=20242 Tested-by: Jan-Matthias Braun Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- drivers/acpi/ec.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index fa848c4116a..62628e5eba3 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -938,6 +938,14 @@ static struct dmi_system_id __initdata ec_dmi_table[] = { ec_flag_msi, "MSI hardware", { DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR")}, NULL}, { + ec_flag_msi, "Quanta hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "Quanta"), + DMI_MATCH(DMI_PRODUCT_NAME, "TW8/SW8/DW8"),}, NULL}, + { + ec_flag_msi, "Quanta hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "Quanta"), + DMI_MATCH(DMI_PRODUCT_NAME, "TW9/SW9"),}, NULL}, + { ec_validate_ecdt, "ASUS hardware", { DMI_MATCH(DMI_BIOS_VENDOR, "ASUS") }, NULL}, {}, -- cgit v1.2.3-70-g09d2 From aecad432fd68dafa5b3b497c4816fbfce6fd4066 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 26 May 2011 12:26:23 +0200 Subject: ACPI: Cleanup custom_method debug stuff - Move param aml_debug_output to other params into sysfs.c - Split acpi_debugfs_init to prepare custom_method to be an own .config option and driver. Signed-off-by: Thomas Renninger Acked-by: Rafael J. Wysocki Acked-by: rui.zhang@intel.com Signed-off-by: Len Brown --- drivers/acpi/debugfs.c | 32 +++++++++++++------------------- drivers/acpi/internal.h | 3 ++- drivers/acpi/sysfs.c | 8 ++++++++ 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/debugfs.c b/drivers/acpi/debugfs.c index 384f7abcff7..e7abc6e3bba 100644 --- a/drivers/acpi/debugfs.c +++ b/drivers/acpi/debugfs.c @@ -12,13 +12,8 @@ #define _COMPONENT ACPI_SYSTEM_COMPONENT ACPI_MODULE_NAME("debugfs"); - -/* /sys/modules/acpi/parameters/aml_debug_output */ - -module_param_named(aml_debug_output, acpi_gbl_enable_aml_debug_object, - bool, 0644); -MODULE_PARM_DESC(aml_debug_output, - "To enable/disable the ACPI Debug Object output."); +struct dentry *acpi_debugfs_dir; +static struct dentry *cm_dentry; /* /sys/kernel/debug/acpi/custom_method */ @@ -80,23 +75,22 @@ static const struct file_operations cm_fops = { .llseek = default_llseek, }; -int __init acpi_debugfs_init(void) +static int __init acpi_custom_method_init(void) { - struct dentry *acpi_dir, *cm_dentry; - - acpi_dir = debugfs_create_dir("acpi", NULL); - if (!acpi_dir) - goto err; + if (!acpi_debugfs_dir) + return -ENOENT; cm_dentry = debugfs_create_file("custom_method", S_IWUSR, - acpi_dir, NULL, &cm_fops); + acpi_debugfs_dir, NULL, &cm_fops); if (!cm_dentry) - goto err; + return -ENODEV; return 0; +} + +void __init acpi_debugfs_init(void) +{ + acpi_debugfs_dir = debugfs_create_dir("acpi", NULL); -err: - if (acpi_dir) - debugfs_remove(acpi_dir); - return -EINVAL; + acpi_custom_method_init(); } diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 4bfb759deb1..ca75b9ce048 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -28,9 +28,10 @@ int acpi_scan_init(void); int acpi_sysfs_init(void); #ifdef CONFIG_DEBUG_FS +extern struct dentry *acpi_debugfs_dir; int acpi_debugfs_init(void); #else -static inline int acpi_debugfs_init(void) { return 0; } +static inline void acpi_debugfs_init(void) { return; } #endif /* -------------------------------------------------------------------------- diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 61891e75583..77255f250db 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -220,6 +220,14 @@ module_param_call(trace_state, param_set_trace_state, param_get_trace_state, NULL, 0644); #endif /* CONFIG_ACPI_DEBUG */ + +/* /sys/modules/acpi/parameters/aml_debug_output */ + +module_param_named(aml_debug_output, acpi_gbl_enable_aml_debug_object, + bool, 0644); +MODULE_PARM_DESC(aml_debug_output, + "To enable/disable the ACPI Debug Object output."); + /* /sys/module/acpi/parameters/acpica_version */ static int param_get_acpica_version(char *buffer, struct kernel_param *kp) { -- cgit v1.2.3-70-g09d2 From 526b4af47f44148c9d665e57723ed9f86634c6e3 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 26 May 2011 12:26:24 +0200 Subject: ACPI: Split out custom_method functionality into an own driver With /sys/kernel/debug/acpi/custom_method root can write to arbitrary memory and increase his priveleges, even if these are restricted. -> Make this an own debug .config option and warn about the security issue in the config description. -> Still keep acpi/debugfs.c which now only creates an empty /sys/kernel/debug/acpi directory. There might be other users of it later. Signed-off-by: Thomas Renninger Acked-by: Rafael J. Wysocki Acked-by: rui.zhang@intel.com Signed-off-by: Len Brown --- Documentation/acpi/method-customizing.txt | 5 ++ drivers/acpi/Kconfig | 15 +++++ drivers/acpi/Makefile | 1 + drivers/acpi/custom_method.c | 100 ++++++++++++++++++++++++++++++ drivers/acpi/debugfs.c | 80 +----------------------- 5 files changed, 122 insertions(+), 79 deletions(-) create mode 100644 drivers/acpi/custom_method.c diff --git a/Documentation/acpi/method-customizing.txt b/Documentation/acpi/method-customizing.txt index 3e1d25aee3f..5f55373dd53 100644 --- a/Documentation/acpi/method-customizing.txt +++ b/Documentation/acpi/method-customizing.txt @@ -66,3 +66,8 @@ Note: We can use a kernel with multiple custom ACPI method running, But each individual write to debugfs can implement a SINGLE method override. i.e. if we want to insert/override multiple ACPI methods, we need to redo step c) ~ g) for multiple times. + +Note: Be aware that root can mis-use this driver to modify arbitrary + memory and gain additional rights, if root's privileges got + restricted (for example if root is not allowed to load additional + modules after boot). diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 3a17ca5fff6..d918e130bef 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -380,6 +380,21 @@ config ACPI_HED which is used to report some hardware errors notified via SCI, mainly the corrected errors. +config ACPI_CUSTOM_METHOD + tristate "Allow ACPI methods to be inserted/replaced at run time" + depends on DEBUG_FS + default n + help + This debug facility allows ACPI AML methods to me inserted and/or + replaced without rebooting the system. For details refer to: + Documentation/acpi/method-customizing.txt. + + NOTE: This option is security sensitive, because it allows arbitrary + kernel memory to be written to by root (uid=0) users, allowing them + to bypass certain security measures (e.g. if root is not allowed to + load additional kernel modules after boot, this feature may be used + to override that restriction). + source "drivers/acpi/apei/Kconfig" endif # ACPI diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index d113fa5100b..cba0b2334b9 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_ACPI_SBS) += sbs.o obj-$(CONFIG_ACPI_POWER_METER) += power_meter.o obj-$(CONFIG_ACPI_HED) += hed.o obj-$(CONFIG_ACPI_EC_DEBUGFS) += ec_sys.o +obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custom_method.o # processor has its own "processor." module_param namespace processor-y := processor_driver.o processor_throttling.o diff --git a/drivers/acpi/custom_method.c b/drivers/acpi/custom_method.c new file mode 100644 index 00000000000..5d42c2414ae --- /dev/null +++ b/drivers/acpi/custom_method.c @@ -0,0 +1,100 @@ +/* + * debugfs.c - ACPI debugfs interface to userspace. + */ + +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +#define _COMPONENT ACPI_SYSTEM_COMPONENT +ACPI_MODULE_NAME("custom_method"); +MODULE_LICENSE("GPL"); + +static struct dentry *cm_dentry; + +/* /sys/kernel/debug/acpi/custom_method */ + +static ssize_t cm_write(struct file *file, const char __user * user_buf, + size_t count, loff_t *ppos) +{ + static char *buf; + static u32 max_size; + static u32 uncopied_bytes; + + struct acpi_table_header table; + acpi_status status; + + if (!(*ppos)) { + /* parse the table header to get the table length */ + if (count <= sizeof(struct acpi_table_header)) + return -EINVAL; + if (copy_from_user(&table, user_buf, + sizeof(struct acpi_table_header))) + return -EFAULT; + uncopied_bytes = max_size = table.length; + buf = kzalloc(max_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } + + if (buf == NULL) + return -EINVAL; + + if ((*ppos > max_size) || + (*ppos + count > max_size) || + (*ppos + count < count) || + (count > uncopied_bytes)) + return -EINVAL; + + if (copy_from_user(buf + (*ppos), user_buf, count)) { + kfree(buf); + buf = NULL; + return -EFAULT; + } + + uncopied_bytes -= count; + *ppos += count; + + if (!uncopied_bytes) { + status = acpi_install_method(buf); + kfree(buf); + buf = NULL; + if (ACPI_FAILURE(status)) + return -EINVAL; + add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); + } + + return count; +} + +static const struct file_operations cm_fops = { + .write = cm_write, + .llseek = default_llseek, +}; + +static int __init acpi_custom_method_init(void) +{ + if (acpi_debugfs_dir == NULL) + return -ENOENT; + + cm_dentry = debugfs_create_file("custom_method", S_IWUSR, + acpi_debugfs_dir, NULL, &cm_fops); + if (cm_dentry == NULL) + return -ENODEV; + + return 0; +} + +static void __exit acpi_custom_method_exit(void) +{ + if (cm_dentry) + debugfs_remove(cm_dentry); + } + +module_init(acpi_custom_method_init); +module_exit(acpi_custom_method_exit); diff --git a/drivers/acpi/debugfs.c b/drivers/acpi/debugfs.c index e7abc6e3bba..182a9fc3635 100644 --- a/drivers/acpi/debugfs.c +++ b/drivers/acpi/debugfs.c @@ -3,9 +3,6 @@ */ #include -#include -#include -#include #include #include @@ -13,84 +10,9 @@ ACPI_MODULE_NAME("debugfs"); struct dentry *acpi_debugfs_dir; -static struct dentry *cm_dentry; - -/* /sys/kernel/debug/acpi/custom_method */ - -static ssize_t cm_write(struct file *file, const char __user * user_buf, - size_t count, loff_t *ppos) -{ - static char *buf; - static u32 max_size; - static u32 uncopied_bytes; - - struct acpi_table_header table; - acpi_status status; - - if (!(*ppos)) { - /* parse the table header to get the table length */ - if (count <= sizeof(struct acpi_table_header)) - return -EINVAL; - if (copy_from_user(&table, user_buf, - sizeof(struct acpi_table_header))) - return -EFAULT; - uncopied_bytes = max_size = table.length; - buf = kzalloc(max_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - } - - if (buf == NULL) - return -EINVAL; - - if ((*ppos > max_size) || - (*ppos + count > max_size) || - (*ppos + count < count) || - (count > uncopied_bytes)) - return -EINVAL; - - if (copy_from_user(buf + (*ppos), user_buf, count)) { - kfree(buf); - buf = NULL; - return -EFAULT; - } - - uncopied_bytes -= count; - *ppos += count; - - if (!uncopied_bytes) { - status = acpi_install_method(buf); - kfree(buf); - buf = NULL; - if (ACPI_FAILURE(status)) - return -EINVAL; - add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); - } - - return count; -} - -static const struct file_operations cm_fops = { - .write = cm_write, - .llseek = default_llseek, -}; - -static int __init acpi_custom_method_init(void) -{ - if (!acpi_debugfs_dir) - return -ENOENT; - - cm_dentry = debugfs_create_file("custom_method", S_IWUSR, - acpi_debugfs_dir, NULL, &cm_fops); - if (!cm_dentry) - return -ENODEV; - - return 0; -} +EXPORT_SYMBOL_GPL(acpi_debugfs_dir); void __init acpi_debugfs_init(void) { acpi_debugfs_dir = debugfs_create_dir("acpi", NULL); - - acpi_custom_method_init(); } -- cgit v1.2.3-70-g09d2 From 5be7ef002469eaf757065ee16b4100c6d3ead1cc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 28 May 2011 17:26:40 -0700 Subject: scsi: fix scsi_proc new kernel-doc warning Fix kernel-doc warnings in scsi_proc.c: Warning(drivers/scsi/scsi_proc.c:390): No description found for parameter 'dev' Warning(drivers/scsi/scsi_proc.c:390): No description found for parameter 'data' Warning(drivers/scsi/scsi_proc.c:390): Excess function parameter 's' description in 'always_match' Warning(drivers/scsi/scsi_proc.c:390): Excess function parameter 'p' description in 'always_match' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- drivers/scsi/scsi_proc.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index f46855cd853..ad747dc337d 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -381,11 +381,6 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf, return err; } -/** - * proc_scsi_show - show contents of /proc/scsi/scsi (attached devices) - * @s: output goes here - * @p: not used - */ static int always_match(struct device *dev, void *data) { return 1; -- cgit v1.2.3-70-g09d2 From 932df7414336a00f45e5aec62724cf736b0bcfd4 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Mon, 16 May 2011 09:11:00 +0800 Subject: ACPI: processor: fix processor_physically_present in UP kernel Usually, there are multiple processors defined in ACPI table, for example Scope (_PR) { Processor (CPU0, 0x00, 0x00000410, 0x06) {} Processor (CPU1, 0x01, 0x00000410, 0x06) {} Processor (CPU2, 0x02, 0x00000410, 0x06) {} Processor (CPU3, 0x03, 0x00000410, 0x06) {} } processor_physically_present(...) will be called to check whether those processors are physically present. Currently we have below codes in processor_physically_present, cpuid = acpi_get_cpuid(...); if ((cpuid == -1) && (num_possible_cpus() > 1)) return false; return true; In UP kernel, acpi_get_cpuid(...) always return -1 and num_possible_cpus() always return 1, so processor_physically_present(...) always returns true for all passed in processor handles. This is wrong for UP processor or SMP processor running UP kernel. This patch removes the !SMP version of acpi_get_cpuid(), so both UP and SMP kernel use the same acpi_get_cpuid function. And for UP kernel, only processor 0 is valid. https://bugzilla.kernel.org/show_bug.cgi?id=16548 https://bugzilla.kernel.org/show_bug.cgi?id=16357 Tested-by: Anton Kochkov Tested-by: Ambroz Bizjak Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/processor_core.c | 12 +++++++++--- include/acpi/processor.h | 7 ------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 25bf17da69f..02d2a4c9084 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -37,7 +37,6 @@ static struct dmi_system_id __initdata processor_idle_dmi_table[] = { {}, }; -#ifdef CONFIG_SMP static int map_lapic_id(struct acpi_subtable_header *entry, u32 acpi_id, int *apic_id) { @@ -165,7 +164,9 @@ exit: int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) { +#ifdef CONFIG_SMP int i; +#endif int apic_id = -1; apic_id = map_mat_entry(handle, type, acpi_id); @@ -174,14 +175,19 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) if (apic_id == -1) return apic_id; +#ifdef CONFIG_SMP for_each_possible_cpu(i) { if (cpu_physical_id(i) == apic_id) return i; } +#else + /* In UP kernel, only processor 0 is valid */ + if (apic_id == 0) + return apic_id; +#endif return -1; } EXPORT_SYMBOL_GPL(acpi_get_cpuid); -#endif static bool __init processor_physically_present(acpi_handle handle) { @@ -217,7 +223,7 @@ static bool __init processor_physically_present(acpi_handle handle) type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0; cpuid = acpi_get_cpuid(handle, type, acpi_id); - if ((cpuid == -1) && (num_possible_cpus() > 1)) + if (cpuid == -1) return false; return true; diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 55192ac0ced..ba4928cae47 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -310,14 +310,7 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) /* in processor_core.c */ void acpi_processor_set_pdc(acpi_handle handle); -#ifdef CONFIG_SMP int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id); -#else -static inline int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) -{ - return -1; -} -#endif /* in processor_throttling.c */ int acpi_processor_tstate_has_changed(struct acpi_processor *pr); -- cgit v1.2.3-70-g09d2 From 28c2103dad04dba29ba86e22dad5735db8f0e13c Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 4 May 2011 22:56:43 +0800 Subject: ACPI: Add D3 cold state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _SxW returns an Integer containing the lowest D-state supported in state Sx. If OSPM has not indicated that it supports _PR3, then the value “3” corresponds to D3. If it has indicated _PR3 support, the value “3” represents D3hot and the value “4” represents D3cold. Linux does set _OSC._PR3, so we should fix it to expect that _SxW can return 4. Signed-off-by: Lin Ming Acked-by: Jesse Barnes Signed-off-by: Len Brown --- drivers/acpi/bus.c | 2 +- drivers/pci/pci-acpi.c | 2 ++ include/acpi/actypes.h | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 9749980ca6c..d1e06c182cd 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -227,7 +227,7 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state) acpi_status status = AE_OK; char object_name[5] = { '_', 'P', 'S', '0' + state, '\0' }; - if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3)) + if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD)) return -EINVAL; /* Make sure this is a valid target state */ diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 7c3b18e78ce..d36f41ea8cb 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -195,6 +195,8 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) return PCI_D2; case ACPI_STATE_D3: return PCI_D3hot; + case ACPI_STATE_D3_COLD: + return PCI_D3cold; } return PCI_POWER_ERROR; } diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 64f838beaab..f72cbe57429 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -501,8 +501,9 @@ typedef u64 acpi_integer; #define ACPI_STATE_D1 (u8) 1 #define ACPI_STATE_D2 (u8) 2 #define ACPI_STATE_D3 (u8) 3 -#define ACPI_D_STATES_MAX ACPI_STATE_D3 -#define ACPI_D_STATE_COUNT 4 +#define ACPI_STATE_D3_COLD (u8) 4 +#define ACPI_D_STATES_MAX ACPI_STATE_D3_COLD +#define ACPI_D_STATE_COUNT 5 #define ACPI_STATE_C0 (u8) 0 #define ACPI_STATE_C1 (u8) 1 -- cgit v1.2.3-70-g09d2 From 08b53f0e6b565fe8dc0b8f929960ed16d76291bd Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 26 Apr 2011 16:29:55 +0800 Subject: ACPI EC: remove redundant code ec->handle is set in ec_parse_device(), so don't bother to set it again. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- drivers/acpi/ec.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index b3f1d6f52a8..ed8cf17cf03 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -804,8 +804,6 @@ static int acpi_ec_add(struct acpi_device *device) return -EINVAL; } - ec->handle = device->handle; - /* Find and register all query methods */ acpi_walk_namespace(ACPI_TYPE_METHOD, ec->handle, 1, acpi_ec_register_query_methods, NULL, ec, NULL); -- cgit v1.2.3-70-g09d2 From 02c68a02018669d1817c43c42de800975cbec467 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Apr 2011 16:59:53 -0400 Subject: x86 idle: clarify AMD erratum 400 workaround The workaround for AMD erratum 400 uses the term "c1e" falsely suggesting: 1. Intel C1E is somehow involved 2. All AMD processors with C1E are involved Use the string "amd_c1e" instead of simply "c1e" to clarify that this workaround is specific to AMD's version of C1E. Use the string "e400" to clarify that the workaround is specific to AMD processors with Erratum 400. This patch is text-substitution only, with no functional change. cc: x86@kernel.org Acked-by: Borislav Petkov Signed-off-by: Len Brown --- arch/x86/include/asm/acpi.h | 2 +- arch/x86/include/asm/idle.h | 2 +- arch/x86/include/asm/processor.h | 4 ++-- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/process.c | 38 +++++++++++++++++++------------------- arch/x86/kernel/smpboot.c | 2 +- drivers/acpi/processor_idle.c | 2 +- 7 files changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 4ea15ca89b2..52fd57f95c5 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -138,7 +138,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) boot_cpu_data.x86_model <= 0x05 && boot_cpu_data.x86_mask < 0x0A) return 1; - else if (c1e_detected) + else if (amd_e400_c1e_detected) return 1; else return max_cstate; diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index 38d87379e27..f49253d7571 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h @@ -16,6 +16,6 @@ static inline void enter_idle(void) { } static inline void exit_idle(void) { } #endif /* CONFIG_X86_64 */ -void c1e_remove_cpu(int cpu); +void amd_e400_remove_cpu(int cpu); #endif /* _ASM_X86_IDLE_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 45636cefa18..b9c03fb3369 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -758,10 +758,10 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); extern void select_idle_routine(const struct cpuinfo_x86 *c); -extern void init_c1e_mask(void); +extern void init_amd_e400_c1e_mask(void); extern unsigned long boot_option_idle_override; -extern bool c1e_detected; +extern bool amd_e400_c1e_detected; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL, IDLE_FORCE_MWAIT}; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1d59834396b..745a602f204 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -887,7 +887,7 @@ static void vgetcpu_set_mode(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); - init_c1e_mask(); + init_amd_e400_c1e_mask(); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ff455419898..2efbfb712fb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -538,45 +538,45 @@ int mwait_usable(const struct cpuinfo_x86 *c) return (edx & MWAIT_EDX_C1); } -bool c1e_detected; -EXPORT_SYMBOL(c1e_detected); +bool amd_e400_c1e_detected; +EXPORT_SYMBOL(amd_e400_c1e_detected); -static cpumask_var_t c1e_mask; +static cpumask_var_t amd_e400_c1e_mask; -void c1e_remove_cpu(int cpu) +void amd_e400_remove_cpu(int cpu) { - if (c1e_mask != NULL) - cpumask_clear_cpu(cpu, c1e_mask); + if (amd_e400_c1e_mask != NULL) + cpumask_clear_cpu(cpu, amd_e400_c1e_mask); } /* - * C1E aware idle routine. We check for C1E active in the interrupt + * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt * pending message MSR. If we detect C1E, then we handle it the same * way as C3 power states (local apic timer and TSC stop) */ -static void c1e_idle(void) +static void amd_e400_idle(void) { if (need_resched()) return; - if (!c1e_detected) { + if (!amd_e400_c1e_detected) { u32 lo, hi; rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); if (lo & K8_INTP_C1E_ACTIVE_MASK) { - c1e_detected = true; + amd_e400_c1e_detected = true; if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halt in AMD C1E"); printk(KERN_INFO "System has AMD C1E enabled\n"); } } - if (c1e_detected) { + if (amd_e400_c1e_detected) { int cpu = smp_processor_id(); - if (!cpumask_test_cpu(cpu, c1e_mask)) { - cpumask_set_cpu(cpu, c1e_mask); + if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { + cpumask_set_cpu(cpu, amd_e400_c1e_mask); /* * Force broadcast so ACPI can not interfere. */ @@ -619,17 +619,17 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) pm_idle = mwait_idle; } else if (cpu_has_amd_erratum(amd_erratum_400)) { /* E400: APIC timer interrupt does not wake up CPU from C1e */ - printk(KERN_INFO "using C1E aware idle routine\n"); - pm_idle = c1e_idle; + printk(KERN_INFO "using AMD E400 aware idle routine\n"); + pm_idle = amd_e400_idle; } else pm_idle = default_idle; } -void __init init_c1e_mask(void) +void __init init_amd_e400_c1e_mask(void) { - /* If we're using c1e_idle, we need to allocate c1e_mask. */ - if (pm_idle == c1e_idle) - zalloc_cpumask_var(&c1e_mask, GFP_KERNEL); + /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ + if (pm_idle == amd_e400_idle) + zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); } static int __init idle_setup(char *str) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 08776a95348..2c33633595c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1379,7 +1379,7 @@ void play_dead_common(void) { idle_task_exit(); reset_lazy_tlbstate(); - c1e_remove_cpu(raw_smp_processor_id()); + amd_e400_remove_cpu(raw_smp_processor_id()); mb(); /* Ack it */ diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index d615b7d69bc..431ab11c8c1 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -161,7 +161,7 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr, if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT)) return; - if (c1e_detected) + if (amd_e400_c1e_detected) type = ACPI_STATE_C1; /* -- cgit v1.2.3-70-g09d2 From 06ae40ce073daf233607a3c54a489f2c1e44683e Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Apr 2011 15:28:09 -0400 Subject: x86 idle: EXPORT_SYMBOL(default_idle, pm_idle) only when APM demands it In the long run, we don't want default_idle() or (pm_idle)() to be exported outside of process.c. Start by not exporting them to modules, unless the APM build demands it. cc: x86@kernel.org cc: Jiri Kosina Signed-off-by: Len Brown --- arch/x86/kernel/process.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2efbfb712fb..84f3cdae440 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -340,7 +340,9 @@ EXPORT_SYMBOL(boot_option_idle_override); * Powermanagement idle function, if any.. */ void (*pm_idle)(void); +#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE) EXPORT_SYMBOL(pm_idle); +#endif #ifdef CONFIG_X86_32 /* @@ -400,7 +402,7 @@ void default_idle(void) cpu_relax(); } } -#ifdef CONFIG_APM_MODULE +#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE) EXPORT_SYMBOL(default_idle); #endif -- cgit v1.2.3-70-g09d2 From 3b70b2e5fcf6315eb833a1bcc2b810bdc75484ff Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Apr 2011 15:08:48 -0400 Subject: x86 idle floppy: deprecate disable_hlt() Plan to remove floppy_disable_hlt in 2012, an ancient workaround with comments that it should be removed. This allows us to remove clutter and a run-time branch from the idle code. WARN_ONCE() on invocation until it is removed. cc: x86@kernel.org cc: stable@kernel.org # .39.x Signed-off-by: Len Brown --- Documentation/feature-removal-schedule.txt | 8 ++++++++ drivers/block/floppy.c | 1 + 2 files changed, 9 insertions(+) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index b3f35e5f9c9..5540615ac26 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,6 +6,14 @@ be removed from this file. --------------------------- +What: x86 floppy disable_hlt +When: 2012 +Why: ancient workaround of dubious utility clutters the + code used by everybody else. +Who: Len Brown + +--------------------------- + What: PRISM54 When: 2.6.34 diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 77fc76f8aea..20aea9b511b 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1038,6 +1038,7 @@ static void floppy_disable_hlt(void) { unsigned long flags; + WARN_ONCE(1, "floppy_disable_hlt() scheduled for removal in 2012"); spin_lock_irqsave(&floppy_hlt_lock, flags); if (!hlt_disabled) { hlt_disabled = 1; -- cgit v1.2.3-70-g09d2 From 99c63221435963e0cee2402686ba99293c2ffa9e Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Apr 2011 15:19:23 -0400 Subject: x86 idle APM: deprecate CONFIG_APM_CPU_IDLE We don't want to export the pm_idle function pointer to modules. Currently CONFIG_APM_CPU_IDLE w/ CONFIG_APM_MODULE forces us to. CONFIG_APM_CPU_IDLE is of dubious value, it runs only on 32-bit uniprocessor laptops that are over 10 years old. It calls into the BIOS during idle, and is known to cause a number of machines to fail. Removing CONFIG_APM_CPU_IDLE and will allow us to stop exporting pm_idle. Any systems that were calling into the APM BIOS at run-time will simply use HLT instead. cc: x86@kernel.org cc: Jiri Kosina cc: stable@kernel.org # .39.x Signed-off-by: Len Brown --- Documentation/feature-removal-schedule.txt | 10 ++++++++++ arch/x86/kernel/apm_32.c | 2 ++ 2 files changed, 12 insertions(+) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 5540615ac26..fc505c1b476 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -14,6 +14,16 @@ Who: Len Brown --------------------------- +What: CONFIG_APM_CPU_IDLE, and its ability to call APM BIOS in idle +When: 2012 +Why: This optional sub-feature of APM is of dubious reliability, + and ancient APM laptops are likely better served by calling HLT. + Deleting CONFIG_APM_CPU_IDLE allows x86 to stop exporting + the pm_idle function pointer to modules. +Who: Len Brown + +---------------------------- + What: PRISM54 When: 2.6.34 diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 0e4f24c2a74..4c4ac32cd12 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -359,6 +359,7 @@ struct apm_user { * idle percentage above which bios idle calls are done */ #ifdef CONFIG_APM_CPU_IDLE +#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012 #define DEFAULT_IDLE_THRESHOLD 95 #else #define DEFAULT_IDLE_THRESHOLD 100 @@ -902,6 +903,7 @@ static void apm_cpu_idle(void) unsigned int jiffies_since_last_check = jiffies - last_jiffies; unsigned int bucket; + WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); recalc: if (jiffies_since_last_check > IDLE_CALC_LIMIT) { use_apm_idle = 0; -- cgit v1.2.3-70-g09d2 From cdaab4a0d330f70c0e5ad8c3f7c65c2e375ea180 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Apr 2011 15:41:17 -0400 Subject: x86 idle: deprecate "no-hlt" cmdline param We'd rather that modern machines not check if HLT works on every entry into idle, for the benefit of machines that had marginal electricals 15-years ago. If those machines are still running the upstream kernel, they can use "idle=poll". The only difference will be that they'll now invoke HLT in machine_hlt(). cc: x86@kernel.org # .39.x Signed-off-by: Len Brown --- Documentation/feature-removal-schedule.txt | 11 +++++++++++ arch/x86/kernel/cpu/bugs.c | 1 + 2 files changed, 12 insertions(+) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index fc505c1b476..f1b0eb02aa1 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -24,6 +24,17 @@ Who: Len Brown ---------------------------- +What: x86_32 "no-hlt" cmdline param +When: 2012 +Why: remove a branch from idle path, simplify code used by everybody. + This option disabled the use of HLT in idle and machine_halt() + for hardware that was flakey 15-years ago. Today we have + "idle=poll" that removed HLT from idle, and so if such a machine + is still running the upstream kernel, "idle=poll" is likely sufficient. +Who: Len Brown + +---------------------------- + What: PRISM54 When: 2.6.34 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c39576cb301..525514cf33c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -19,6 +19,7 @@ static int __init no_halt(char *s) { + WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n"); boot_cpu_data.hlt_works_ok = 0; return 1; } -- cgit v1.2.3-70-g09d2 From 5d4c47e0195b989f284907358bd5c268a44b91c7 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Apr 2011 15:46:09 -0400 Subject: x86 idle: deprecate mwait_idle() and "idle=mwait" cmdline param mwait_idle() is a C1-only idle loop intended to be more efficient than HLT on SMP hardware that supports it. But mwait_idle() has been replaced by the more general mwait_idle_with_hints(), which handles both C1 and deeper C-states. ACPI uses only mwait_idle_with_hints(), and never uses mwait_idle(). Deprecate mwait_idle() and the "idle=mwait" cmdline param to simplify the x86 idle code. After this change, kernels configured with (!CONFIG_ACPI=n && !CONFIG_INTEL_IDLE=n) when run on hardware that support MWAIT will simply use HLT. If MWAIT is desired on those systems, cpuidle and the cpuidle drivers above can be used. cc: x86@kernel.org cc: stable@kernel.org # .39.x Signed-off-by: Len Brown --- Documentation/feature-removal-schedule.txt | 7 +++++++ arch/x86/kernel/process.c | 1 + 2 files changed, 8 insertions(+) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index f1b0eb02aa1..13f0bb3187b 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -35,6 +35,13 @@ Who: Len Brown ---------------------------- +What: x86 "idle=mwait" cmdline param +When: 2012 +Why: simplify x86 idle code +Who: Len Brown + +---------------------------- + What: PRISM54 When: 2.6.34 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 84f3cdae440..8fb182956cb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -645,6 +645,7 @@ static int __init idle_setup(char *str) boot_option_idle_override = IDLE_POLL; } else if (!strcmp(str, "mwait")) { boot_option_idle_override = IDLE_FORCE_MWAIT; + WARN_ONCE(1, "\idle=mwait\" will be removed in 2012\"\n"); } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is -- cgit v1.2.3-70-g09d2 From d5b72ce15ea99a0b8f0d1973074c584daf92d70e Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Sun, 29 May 2011 00:38:46 +0100 Subject: Squashfs: Fix sanity check patches on big-endian systems le64 values should be swapped when accessing on big-endian systems. Signed-off-by: Phillip Lougher --- fs/squashfs/export.c | 2 +- fs/squashfs/fragment.c | 2 +- fs/squashfs/id.c | 2 +- fs/squashfs/super.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index 730c56248c9..5e1101ff276 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c @@ -147,7 +147,7 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, * table[0] points to the first inode lookup table metadata block, * this should be less than lookup_table_start */ - if (!IS_ERR(table) && table[0] >= lookup_table_start) { + if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 1516a6490bf..0ed6edbc5c7 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -90,7 +90,7 @@ __le64 *squashfs_read_fragment_index_table(struct super_block *sb, * table[0] points to the first fragment table metadata block, this * should be less than fragment_table_start */ - if (!IS_ERR(table) && table[0] >= fragment_table_start) { + if (!IS_ERR(table) && le64_to_cpu(table[0]) >= fragment_table_start) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index a70858e0fb4..d38ea3dab95 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c @@ -93,7 +93,7 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, * table[0] points to the first id lookup table metadata block, this * should be less than id_table_start */ - if (!IS_ERR(table) && table[0] >= id_table_start) { + if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 6f26abee359..7438850c62d 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -245,7 +245,7 @@ allocate_id_index_table: msblk->id_table = NULL; goto failed_mount; } - next_table = msblk->id_table[0]; + next_table = le64_to_cpu(msblk->id_table[0]); /* Handle inode lookup table */ lookup_table_start = le64_to_cpu(sblk->lookup_table_start); @@ -261,7 +261,7 @@ allocate_id_index_table: msblk->inode_lookup_table = NULL; goto failed_mount; } - next_table = msblk->inode_lookup_table[0]; + next_table = le64_to_cpu(msblk->inode_lookup_table[0]); sb->s_export_op = &squashfs_export_ops; @@ -286,7 +286,7 @@ handle_fragments: msblk->fragment_index = NULL; goto failed_mount; } - next_table = msblk->fragment_index[0]; + next_table = le64_to_cpu(msblk->fragment_index[0]); check_directory_table: /* Sanity check directory_table */ -- cgit v1.2.3-70-g09d2 From 3b6445a6f68b839d1b437756b9c72312e33339b2 Mon Sep 17 00:00:00 2001 From: Jim Rees Date: Tue, 22 Feb 2011 19:31:57 -0500 Subject: NFSv4.1: fix typo in filelayout_check_layout Signed-off-by: Jim Rees Signed-off-by: Benny Halevy --- fs/nfs/nfs4filelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index be79dc9f386..dd6ccf00767 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -428,7 +428,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); if (fl->pattern_offset > lgr->range.offset) { - dprintk("%s pattern_offset %lld to large\n", + dprintk("%s pattern_offset %lld too large\n", __func__, fl->pattern_offset); goto out; } -- cgit v1.2.3-70-g09d2 From 67d51f65bde233b17de304baec4f7c4d086471fe Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 20 May 2011 10:45:05 +0200 Subject: NFSv4.1: use struct nfs_client to qualify deviceid deviceids are unique per server, per layout type. Therefore, in the global cache in the files layout driver deviceids from different servers may clash so we need to qualify them with a struct nfs_client that represents the nfs server that returned the deviceid. Introduced in 2.6.39 commit ea8eecdd "NFSv4.1 move deviceid cache to filelayout driver" Signed-off-by: Benny Halevy --- fs/nfs/nfs4filelayout.c | 2 +- fs/nfs/nfs4filelayout.h | 3 ++- fs/nfs/nfs4filelayoutdev.c | 9 ++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index dd6ccf00767..571c1b032e9 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -440,7 +440,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, } /* find and reference the deviceid */ - dsaddr = nfs4_fl_find_get_deviceid(id); + dsaddr = nfs4_fl_find_get_deviceid(NFS_SERVER(lo->plh_inode)->nfs_client, id); if (dsaddr == NULL) { dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); if (dsaddr == NULL) diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 2b461d77b43..301b95568bd 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -60,6 +60,7 @@ struct nfs4_pnfs_ds { struct nfs4_file_layout_dsaddr { struct hlist_node node; + struct nfs_client *nfs_client; struct nfs4_deviceid deviceid; atomic_t ref; unsigned long flags; @@ -101,7 +102,7 @@ u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx); extern struct nfs4_file_layout_dsaddr * -nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id); +nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index db07c7af139..42e32663727 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -431,7 +431,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) dsaddr->stripe_indices = stripe_indices; stripe_indices = NULL; dsaddr->ds_num = num; - + dsaddr->nfs_client = NFS_SERVER(ino)->nfs_client; memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); for (i = 0; i < dsaddr->ds_num; i++) { @@ -516,7 +516,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl } spin_lock(&filelayout_deviceid_lock); - d = nfs4_fl_find_get_deviceid(&new->deviceid); + d = nfs4_fl_find_get_deviceid(new->nfs_client, &new->deviceid); if (d) { spin_unlock(&filelayout_deviceid_lock); nfs4_fl_free_deviceid(new); @@ -610,16 +610,15 @@ nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) } struct nfs4_file_layout_dsaddr * -nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id) +nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) { struct nfs4_file_layout_dsaddr *d; struct hlist_node *n; long hash = nfs4_fl_deviceid_hash(id); - rcu_read_lock(); hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) { - if (!memcmp(&d->deviceid, id, sizeof(*id))) { + if (d->nfs_client == clp && !memcmp(&d->deviceid, id, sizeof(*id))) { if (!atomic_inc_not_zero(&d->ref)) goto fail; rcu_read_unlock(); -- cgit v1.2.3-70-g09d2 From 45df3c8b0f3a58facb125d7631890426706c0bfa Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Thu, 5 May 2011 08:28:46 +0300 Subject: pnfs: resolve header dependency in pnfs.h Some definitions in the header file depend on nfs_fs.h so pnfs.h can't be included independently. Signed-off-by: Benny Halevy --- fs/nfs/pnfs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0c015bad9e7..720bb9da3f9 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -30,6 +30,7 @@ #ifndef FS_NFS_PNFS_H #define FS_NFS_PNFS_H +#include #include enum { -- cgit v1.2.3-70-g09d2 From a1eaecbc4c8307e27772d6584ef85a2e93250661 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Thu, 19 May 2011 22:14:47 -0400 Subject: NFSv4.1: make deviceid cache global Move deviceid cache from the pnfs files layout driver to the generic layer in preparation for the objects layout driver. Signed-off-by: Benny Halevy --- fs/nfs/Makefile | 2 +- fs/nfs/nfs4filelayout.c | 10 +-- fs/nfs/nfs4filelayout.h | 8 +-- fs/nfs/nfs4filelayoutdev.c | 104 ++++-------------------------- fs/nfs/pnfs.h | 17 +++++ fs/nfs/pnfs_dev.c | 156 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 194 insertions(+), 103 deletions(-) create mode 100644 fs/nfs/pnfs_dev.c diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 4776ff9e381..7516a8aaa42 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -15,7 +15,7 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o -nfs-$(CONFIG_NFS_V4_1) += pnfs.o +nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 571c1b032e9..4c67a6f6519 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -421,6 +421,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_deviceid *id, gfp_t gfp_flags) { + struct nfs4_deviceid_node *d; struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); @@ -440,12 +441,13 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, } /* find and reference the deviceid */ - dsaddr = nfs4_fl_find_get_deviceid(NFS_SERVER(lo->plh_inode)->nfs_client, id); - if (dsaddr == NULL) { + d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->nfs_client, id); + if (d == NULL) { dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); if (dsaddr == NULL) goto out; - } + } else + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); fl->dsaddr = dsaddr; if (fl->first_stripe_index < 0 || @@ -535,7 +537,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, memcpy(id, p, sizeof(*id)); p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); - print_deviceid(id); + nfs4_print_deviceid(id); nfl_util = be32_to_cpup(p++); if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 301b95568bd..0ace0a2d6d7 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -59,10 +59,7 @@ struct nfs4_pnfs_ds { #define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001 struct nfs4_file_layout_dsaddr { - struct hlist_node node; - struct nfs_client *nfs_client; - struct nfs4_deviceid deviceid; - atomic_t ref; + struct nfs4_deviceid_node id_node; unsigned long flags; u32 stripe_count; u8 *stripe_indices; @@ -96,13 +93,10 @@ extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); extern void print_ds(struct nfs4_pnfs_ds *ds); -extern void print_deviceid(struct nfs4_deviceid *dev_id); u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx); -extern struct nfs4_file_layout_dsaddr * -nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 42e32663727..eda4527a57e 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -36,30 +36,6 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -/* - * Device ID RCU cache. A device ID is unique per client ID and layout type. - */ -#define NFS4_FL_DEVICE_ID_HASH_BITS 5 -#define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS) -#define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1) - -static inline u32 -nfs4_fl_deviceid_hash(struct nfs4_deviceid *id) -{ - unsigned char *cptr = (unsigned char *)id->data; - unsigned int nbytes = NFS4_DEVICEID4_SIZE; - u32 x = 0; - - while (nbytes--) { - x *= 37; - x += *cptr++; - } - return x & NFS4_FL_DEVICE_ID_HASH_MASK; -} - -static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE]; -static DEFINE_SPINLOCK(filelayout_deviceid_lock); - /* * Data server cache * @@ -89,27 +65,6 @@ print_ds(struct nfs4_pnfs_ds *ds) ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); } -void -print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr) -{ - int i; - - ifdebug(FACILITY) { - printk("%s dsaddr->ds_num %d\n", __func__, - dsaddr->ds_num); - for (i = 0; i < dsaddr->ds_num; i++) - print_ds(dsaddr->ds_list[i]); - } -} - -void print_deviceid(struct nfs4_deviceid *id) -{ - u32 *p = (u32 *)id; - - dprintk("%s: device id= [%x%x%x%x]\n", __func__, - p[0], p[1], p[2], p[3]); -} - /* nfs4_ds_cache_lock is held */ static struct nfs4_pnfs_ds * _data_server_lookup_locked(u32 ip_addr, u32 port) @@ -207,7 +162,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) struct nfs4_pnfs_ds *ds; int i; - print_deviceid(&dsaddr->deviceid); + nfs4_print_deviceid(&dsaddr->id_node.deviceid); for (i = 0; i < dsaddr->ds_num; i++) { ds = dsaddr->ds_list[i]; @@ -431,8 +386,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) dsaddr->stripe_indices = stripe_indices; stripe_indices = NULL; dsaddr->ds_num = num; - dsaddr->nfs_client = NFS_SERVER(ino)->nfs_client; - memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); + nfs4_init_deviceid_node(&dsaddr->id_node, NFS_SERVER(ino)->nfs_client, + &pdev->dev_id); for (i = 0; i < dsaddr->ds_num; i++) { int j; @@ -505,8 +460,8 @@ out_err: static struct nfs4_file_layout_dsaddr * decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) { - struct nfs4_file_layout_dsaddr *d, *new; - long hash; + struct nfs4_deviceid_node *d; + struct nfs4_file_layout_dsaddr *n, *new; new = decode_device(inode, dev, gfp_flags); if (!new) { @@ -515,20 +470,13 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl return NULL; } - spin_lock(&filelayout_deviceid_lock); - d = nfs4_fl_find_get_deviceid(new->nfs_client, &new->deviceid); - if (d) { - spin_unlock(&filelayout_deviceid_lock); + d = nfs4_insert_deviceid_node(&new->id_node); + n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); + if (n != new) { nfs4_fl_free_deviceid(new); - return d; + return n; } - INIT_HLIST_NODE(&new->node); - atomic_set(&new->ref, 1); - hash = nfs4_fl_deviceid_hash(&new->deviceid); - hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]); - spin_unlock(&filelayout_deviceid_lock); - return new; } @@ -600,34 +548,8 @@ out_free: void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) { - if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) { - hlist_del_rcu(&dsaddr->node); - spin_unlock(&filelayout_deviceid_lock); - - synchronize_rcu(); + if (nfs4_put_deviceid_node(&dsaddr->id_node)) nfs4_fl_free_deviceid(dsaddr); - } -} - -struct nfs4_file_layout_dsaddr * -nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) -{ - struct nfs4_file_layout_dsaddr *d; - struct hlist_node *n; - long hash = nfs4_fl_deviceid_hash(id); - - rcu_read_lock(); - hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) { - if (d->nfs_client == clp && !memcmp(&d->deviceid, id, sizeof(*id))) { - if (!atomic_inc_not_zero(&d->ref)) - goto fail; - rcu_read_unlock(); - return d; - } - } -fail: - rcu_read_unlock(); - return NULL; } /* @@ -675,15 +597,15 @@ static void filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, int err, u32 ds_addr) { - u32 *p = (u32 *)&dsaddr->deviceid; + u32 *p = (u32 *)&dsaddr->id_node.deviceid; printk(KERN_ERR "NFS: data server %x connection error %d." " Deviceid [%x%x%x%x] marked out of use.\n", ds_addr, err, p[0], p[1], p[2], p[3]); - spin_lock(&filelayout_deviceid_lock); + spin_lock(&nfs4_ds_cache_lock); dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; - spin_unlock(&filelayout_deviceid_lock); + spin_unlock(&nfs4_ds_cache_lock); } struct nfs4_pnfs_ds * diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 720bb9da3f9..3831ad04a23 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -157,6 +157,23 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier); void pnfs_set_layoutcommit(struct nfs_write_data *wdata); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); +/* pnfs_dev.c */ +struct nfs4_deviceid_node { + struct hlist_node node; + const struct nfs_client *nfs_client; + struct nfs4_deviceid deviceid; + atomic_t ref; +}; + +void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); +struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct nfs_client *, const struct nfs4_deviceid *); +struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct nfs_client *, const struct nfs4_deviceid *); +void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, + const struct nfs_client *, + const struct nfs4_deviceid *); +struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); +bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); + static inline int lo_fail_bit(u32 iomode) { return iomode == IOMODE_RW ? diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c new file mode 100644 index 00000000000..bf05189a7cb --- /dev/null +++ b/fs/nfs/pnfs_dev.c @@ -0,0 +1,156 @@ +/* + * Device operations for the pnfs client. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand + * Garth Goodson + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#include "pnfs.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS + +/* + * Device ID RCU cache. A device ID is unique per server and layout type. + */ +#define NFS4_DEVICE_ID_HASH_BITS 5 +#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) +#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) + +static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; +static DEFINE_SPINLOCK(nfs4_deviceid_lock); + +void +nfs4_print_deviceid(const struct nfs4_deviceid *id) +{ + u32 *p = (u32 *)id; + + dprintk("%s: device id= [%x%x%x%x]\n", __func__, + p[0], p[1], p[2], p[3]); +} +EXPORT_SYMBOL_GPL(nfs4_print_deviceid); + +static inline u32 +nfs4_deviceid_hash(const struct nfs4_deviceid *id) +{ + unsigned char *cptr = (unsigned char *)id->data; + unsigned int nbytes = NFS4_DEVICEID4_SIZE; + u32 x = 0; + + while (nbytes--) { + x *= 37; + x += *cptr++; + } + return x & NFS4_DEVICE_ID_HASH_MASK; +} + +/* + * Lookup a deviceid in cache and get a reference count on it if found + * + * @clp nfs_client associated with deviceid + * @id deviceid to look up + */ +struct nfs4_deviceid_node * +nfs4_find_get_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id) +{ + struct nfs4_deviceid_node *d; + struct hlist_node *n; + long hash = nfs4_deviceid_hash(id); + + rcu_read_lock(); + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) { + if (d->nfs_client == clp && !memcmp(&d->deviceid, id, sizeof(*id))) { + if (!atomic_inc_not_zero(&d->ref)) + goto fail; + rcu_read_unlock(); + return d; + } + } +fail: + rcu_read_unlock(); + return NULL; +} +EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); + +void +nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, + const struct nfs_client *nfs_client, + const struct nfs4_deviceid *id) +{ + d->nfs_client = nfs_client; + d->deviceid = *id; +} +EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); + +/* + * Uniquely initialize and insert a deviceid node into cache + * + * @new new deviceid node + * Note that the caller must set up new->nfs_client and new->deviceid + * + * @ret the inserted node, if none found, otherwise, the found entry. + */ +struct nfs4_deviceid_node * +nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) +{ + struct nfs4_deviceid_node *d; + long hash; + + spin_lock(&nfs4_deviceid_lock); + d = nfs4_find_get_deviceid(new->nfs_client, &new->deviceid); + if (d) { + spin_unlock(&nfs4_deviceid_lock); + return d; + } + + INIT_HLIST_NODE(&new->node); + atomic_set(&new->ref, 1); + hash = nfs4_deviceid_hash(&new->deviceid); + hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); + spin_unlock(&nfs4_deviceid_lock); + + return new; +} +EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); + +/* + * Dereference a deviceid node and delete it when its reference count drops + * to zero. + * + * @d deviceid node to put + * + * @ret true iff the node was deleted + */ +bool +nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) +{ + if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) + return false; + hlist_del_init_rcu(&d->node); + spin_unlock(&nfs4_deviceid_lock); + synchronize_rcu(); + return true; +} +EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); -- cgit v1.2.3-70-g09d2 From a43a9d93d40a69eceeb4e4a4c860cc20186d475c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 29 May 2011 12:40:50 +0200 Subject: [S390] mm: fix storage key handling page_get_storage_key() and page_set_storage_key() expect a page address and not its page frame number. This got inconsistent with 2d42552d "[S390] merge page_test_dirty and page_clear_dirty". Result is that we read/write storage keys from random pages and do not have a working dirty bit tracking at all. E.g. SetPageUpdate() doesn't clear the dirty bit of requested pages, which for example ext4 doesn't like very much and panics after a while. Unable to handle kernel paging request at virtual user address (null) Oops: 0004 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 1 Not tainted 2.6.39-07551-g139f37f-dirty #152 Process flush-94:0 (pid: 1576, task: 000000003eb34538, ksp: 000000003c287b70) Krnl PSW : 0704c00180000000 0000000000316b12 (jbd2_journal_file_inode+0x10e/0x138) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 EA:3 Krnl GPRS: 0000000000000000 0000000000000000 0000000000000000 0700000000000000 0000000000316a62 000000003eb34cd0 0000000000000025 000000003c287b88 0000000000000001 000000003c287a70 000000003f1ec678 000000003f1ec000 0000000000000000 000000003e66ec00 0000000000316a62 000000003c287988 Krnl Code: 0000000000316b04: f0a0000407f4 srp 4(11,%r0),2036,0 0000000000316b0a: b9020022 ltgr %r2,%r2 0000000000316b0e: a7740015 brc 7,316b38 >0000000000316b12: e3d0c0000024 stg %r13,0(%r12) 0000000000316b18: 4120c010 la %r2,16(%r12) 0000000000316b1c: 4130d060 la %r3,96(%r13) 0000000000316b20: e340d0600004 lg %r4,96(%r13) 0000000000316b26: c0e50002b567 brasl %r14,36d5f4 Call Trace: ([<0000000000316a62>] jbd2_journal_file_inode+0x5e/0x138) [<00000000002da13c>] mpage_da_map_and_submit+0x2e8/0x42c [<00000000002daac2>] ext4_da_writepages+0x2da/0x504 [<00000000002597e8>] writeback_single_inode+0xf8/0x268 [<0000000000259f06>] writeback_sb_inodes+0xd2/0x18c [<000000000025a700>] writeback_inodes_wb+0x80/0x168 [<000000000025aa92>] wb_writeback+0x2aa/0x324 [<000000000025abde>] wb_do_writeback+0xd2/0x274 [<000000000025ae3a>] bdi_writeback_thread+0xba/0x1c4 [<00000000001737be>] kthread+0xa6/0xb0 [<000000000056c1da>] kernel_thread_starter+0x6/0xc [<000000000056c1d4>] kernel_thread_starter+0x0/0xc INFO: lockdep is turned off. Last Breaking-Event-Address: [<0000000000316a8a>] jbd2_journal_file_inode+0x86/0x138 Reported-by: Sebastian Ott Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pgtable.h | 16 ++++++++-------- include/linux/page-flags.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c4773a2ef3d..e4efacfe1b6 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -577,16 +577,16 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long pfn, bits; + unsigned long address, bits; unsigned char skey; - pfn = pte_val(*ptep) >> PAGE_SHIFT; - skey = page_get_storage_key(pfn); + address = pte_val(*ptep) & PAGE_MASK; + skey = page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Clear page changed & referenced bit in the storage key */ if (bits) { skey ^= bits; - page_set_storage_key(pfn, skey, 1); + page_set_storage_key(address, skey, 1); } /* Transfer page changed & referenced bit to guest bits in pgste */ pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ @@ -628,16 +628,16 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long pfn; + unsigned long address; unsigned long okey, nkey; - pfn = pte_val(*ptep) >> PAGE_SHIFT; - okey = nkey = page_get_storage_key(pfn); + address = pte_val(*ptep) & PAGE_MASK; + okey = nkey = page_get_storage_key(address); nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set page access key and fetch protection bit from pgste */ nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; if (okey != nkey) - page_set_storage_key(pfn, nkey, 1); + page_set_storage_key(address, nkey, 1); #endif } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 79a6700b716..6081493db68 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -308,7 +308,7 @@ static inline void SetPageUptodate(struct page *page) { #ifdef CONFIG_S390 if (!test_and_set_bit(PG_uptodate, &page->flags)) - page_set_storage_key(page_to_pfn(page), PAGE_DEFAULT_KEY, 0); + page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, 0); #else /* * Memory barrier must be issued before setting the PG_uptodate bit, -- cgit v1.2.3-70-g09d2 From 3c5cffb66d8ea94832650fcb55194715b0229088 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 29 May 2011 12:40:51 +0200 Subject: [S390] mm: fix mmu_gather rework Quite a few functions that get called from the tlb gather code require that preemption must be disabled. So disable preemption inside of the called functions instead. The only drawback is that rcu_table_freelist_finish() doesn't get necessarily called on the cpu(s) that filled the free lists. So we may see a delay, until we finally see an rcu callback. However over time this shouldn't matter. So we get rid of lots of "BUG: using smp_processor_id() in preemptible" messages. Signed-off-by: Heiko Carstens --- arch/s390/mm/pgtable.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 14c6fae6fe6..b09763fe5da 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -71,12 +71,15 @@ static void rcu_table_freelist_callback(struct rcu_head *head) void rcu_table_freelist_finish(void) { - struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist); + struct rcu_table_freelist **batchp = &get_cpu_var(rcu_table_freelist); + struct rcu_table_freelist *batch = *batchp; if (!batch) - return; + goto out; call_rcu(&batch->rcu, rcu_table_freelist_callback); - __get_cpu_var(rcu_table_freelist) = NULL; + *batchp = NULL; +out: + put_cpu_var(rcu_table_freelist); } static void smp_sync(void *arg) @@ -141,20 +144,23 @@ void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) { struct rcu_table_freelist *batch; + preempt_disable(); if (atomic_read(&mm->mm_users) < 2 && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { crst_table_free(mm, table); - return; + goto out; } batch = rcu_table_freelist_get(mm); if (!batch) { smp_call_function(smp_sync, NULL, 1); crst_table_free(mm, table); - return; + goto out; } batch->table[--batch->crst_index] = table; if (batch->pgt_index >= batch->crst_index) rcu_table_freelist_finish(); +out: + preempt_enable(); } #ifdef CONFIG_64BIT @@ -323,16 +329,17 @@ void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned long bits; + preempt_disable(); if (atomic_read(&mm->mm_users) < 2 && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { page_table_free(mm, table); - return; + goto out; } batch = rcu_table_freelist_get(mm); if (!batch) { smp_call_function(smp_sync, NULL, 1); page_table_free(mm, table); - return; + goto out; } bits = (mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); @@ -345,6 +352,8 @@ void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) batch->table[batch->pgt_index++] = table; if (batch->pgt_index >= batch->crst_index) rcu_table_freelist_finish(); +out: + preempt_enable(); } /* -- cgit v1.2.3-70-g09d2 From 4c2593270133708698d4b8cea2dab469479ad13b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sun, 29 May 2011 12:52:55 +0100 Subject: dm table: allow targets to support discards internally Permit a target to support discards regardless of whether or not all its underlying devices do. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 6 +++++- include/linux/device-mapper.h | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index cb8380c9767..215e112d153 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1346,7 +1346,8 @@ bool dm_table_supports_discards(struct dm_table *t) return 0; /* - * Ensure that at least one underlying device supports discards. + * Unless any target used by the table set discards_supported, + * require at least one underlying device to support discards. * t->devices includes internal dm devices such as mirror logs * so we need to use iterate_devices here, which targets * supporting discard must provide. @@ -1354,6 +1355,9 @@ bool dm_table_supports_discards(struct dm_table *t) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); + if (ti->discards_supported) + return 1; + if (ti->type->iterate_devices && ti->type->iterate_devices(ti, device_discard_capable, NULL)) return 1; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 32a4423710f..4427e045405 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -191,6 +191,12 @@ struct dm_target { /* Used to provide an error string from the ctr */ char *error; + + /* + * Set if this target needs to receive discards regardless of + * whether or not its underlying devices have support. + */ + unsigned discards_supported:1; }; /* Each target can link one of these into the table */ -- cgit v1.2.3-70-g09d2 From f4808ca99a203f20b4475601748e44b25a65bdec Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Sun, 29 May 2011 13:02:52 +0100 Subject: dm table: reject devices without request fns This patch adds a check that a block device has a request function defined before it is used. Otherwise, misconfiguration can cause an oops. Because we are allowing devices with zero size e.g. an offline multipath device as in commit 2cd54d9bedb79a97f014e86c0da393416b264eb3 ("dm: allow offline devices") there needs to be an additional check to ensure devices are initialised. Some block devices, like a loop device without a backing file, exist but have no request function. Reproducer is trivial: dm-mirror on unbound loop device (no backing file on loop devices) dmsetup create x --table "0 8 mirror core 2 8 sync 2 /dev/loop0 0 /dev/loop1 0" and mirror resync will immediatelly cause OOps. BUG: unable to handle kernel NULL pointer dereference at (null) ? generic_make_request+0x2bd/0x590 ? kmem_cache_alloc+0xad/0x190 submit_bio+0x53/0xe0 ? bio_add_page+0x3b/0x50 dispatch_io+0x1ca/0x210 [dm_mod] ? read_callback+0x0/0xd0 [dm_mirror] dm_io+0xbb/0x290 [dm_mod] do_mirror+0x1e0/0x748 [dm_mirror] Signed-off-by: Milan Broz Reported-by: Zdenek Kabelac Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 215e112d153..451c3bb176d 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -362,6 +362,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { + struct request_queue *q; struct queue_limits *limits = data; struct block_device *bdev = dev->bdev; sector_t dev_size = @@ -370,6 +371,22 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, limits->logical_block_size >> SECTOR_SHIFT; char b[BDEVNAME_SIZE]; + /* + * Some devices exist without request functions, + * such as loop devices not yet bound to backing files. + * Forbid the use of such devices. + */ + q = bdev_get_queue(bdev); + if (!q || !q->make_request_fn) { + DMWARN("%s: %s is not yet initialised: " + "start=%llu, len=%llu, dev_size=%llu", + dm_device_name(ti->table->md), bdevname(bdev, b), + (unsigned long long)start, + (unsigned long long)len, + (unsigned long long)dev_size); + return 1; + } + if (!dev_size) return 0; -- cgit v1.2.3-70-g09d2 From 6f13f6fba76edc7d0e7580c5deee829d59a41b2f Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Sun, 29 May 2011 13:02:55 +0100 Subject: dm mpath: do not fail paths after integrity errors Integrity errors need to be passed to the owner of the integrity metadata for processing. Consequently EILSEQ should be passed up the stack. Cc: stable@kernel.org Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index a550a057d99..aa4e570c2cb 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1290,7 +1290,7 @@ static int do_end_io(struct multipath *m, struct request *clone, if (!error && !clone->errors) return 0; /* I/O complete */ - if (error == -EOPNOTSUPP || error == -EREMOTEIO) + if (error == -EOPNOTSUPP || error == -EREMOTEIO || error == -EILSEQ) return error; if (mpio->pgpath) -- cgit v1.2.3-70-g09d2 From a705a34a565a5445bf731bd8006d51ea4d2b4236 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 29 May 2011 13:02:58 +0100 Subject: dm kcopyd: avoid pointless job splitting Don't split SUB_JOB_SIZE jobs If the job size equals SUB_JOB_SIZE, there is no point in splitting it. Splitting it just unnecessarily wastes time, because the split job size is SUB_JOB_SIZE too. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 1bb73a13ca4..505b6f5cd38 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -578,7 +578,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->fn = fn; job->context = context; - if (job->source.count < SUB_JOB_SIZE) + if (job->source.count <= SUB_JOB_SIZE) dispatch_job(job); else { -- cgit v1.2.3-70-g09d2 From c6ea41fbbe08f270a8edef99dc369faf809d1bd6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 29 May 2011 13:03:00 +0100 Subject: dm kcopyd: preallocate sub jobs to avoid deadlock There's a possible theoretical deadlock in dm-kcopyd because multiple allocations from the same mempool are required to finish a request. Avoid this by preallocating sub jobs. There is a mempool of 512 entries. Each request requires up to 9 entries from the mempool. If we have at least 57 concurrent requests running, the mempool may overflow and mempool allocations may start blocking until another entry is freed to the mempool. Because the same thread is used to free entries to the mempool and allocate entries from the mempool, this may result in a deadlock. This patch changes it so that one mempool entry contains all 9 "struct kcopyd_job" required to fulfill the whole request. The allocation is done only once in dm_kcopyd_copy and no further mempool allocations are done during request processing. If dm_kcopyd_copy is not run in the completion thread, this implementation is deadlock-free. MIN_JOBS needs reducing accordingly and we've chosen to reduce it further to 8. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 49 +++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 505b6f5cd38..24fb42ed7b8 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -27,6 +27,10 @@ #include "dm.h" +#define SUB_JOB_SIZE 128 +#define SPLIT_COUNT 8 +#define MIN_JOBS 8 + /*----------------------------------------------------------------- * Each kcopyd client has its own little pool of preallocated * pages for kcopyd io. @@ -216,16 +220,17 @@ struct kcopyd_job { struct mutex lock; atomic_t sub_jobs; sector_t progress; -}; -/* FIXME: this should scale with the number of pages */ -#define MIN_JOBS 512 + struct kcopyd_job *master_job; +}; static struct kmem_cache *_job_cache; int __init dm_kcopyd_init(void) { - _job_cache = KMEM_CACHE(kcopyd_job, 0); + _job_cache = kmem_cache_create("kcopyd_job", + sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1), + __alignof__(struct kcopyd_job), 0, NULL); if (!_job_cache) return -ENOMEM; @@ -299,7 +304,12 @@ static int run_complete_job(struct kcopyd_job *job) if (job->pages) kcopyd_put_pages(kc, job->pages); - mempool_free(job, kc->job_pool); + /* + * If this is the master job, the sub jobs have already + * completed so we can free everything. + */ + if (job->master_job == job) + mempool_free(job, kc->job_pool); fn(read_err, write_err, context); if (atomic_dec_and_test(&kc->nr_jobs)) @@ -460,14 +470,14 @@ static void dispatch_job(struct kcopyd_job *job) wake(kc); } -#define SUB_JOB_SIZE 128 static void segment_complete(int read_err, unsigned long write_err, void *context) { /* FIXME: tidy this function */ sector_t progress = 0; sector_t count = 0; - struct kcopyd_job *job = (struct kcopyd_job *) context; + struct kcopyd_job *sub_job = (struct kcopyd_job *) context; + struct kcopyd_job *job = sub_job->master_job; struct dm_kcopyd_client *kc = job->kc; mutex_lock(&job->lock); @@ -498,8 +508,6 @@ static void segment_complete(int read_err, unsigned long write_err, if (count) { int i; - struct kcopyd_job *sub_job = mempool_alloc(kc->job_pool, - GFP_NOIO); *sub_job = *job; sub_job->source.sector += progress; @@ -511,7 +519,7 @@ static void segment_complete(int read_err, unsigned long write_err, } sub_job->fn = segment_complete; - sub_job->context = job; + sub_job->context = sub_job; dispatch_job(sub_job); } else if (atomic_dec_and_test(&job->sub_jobs)) { @@ -531,19 +539,19 @@ static void segment_complete(int read_err, unsigned long write_err, } /* - * Create some little jobs that will do the move between - * them. + * Create some sub jobs to share the work between them. */ -#define SPLIT_COUNT 8 -static void split_job(struct kcopyd_job *job) +static void split_job(struct kcopyd_job *master_job) { int i; - atomic_inc(&job->kc->nr_jobs); + atomic_inc(&master_job->kc->nr_jobs); - atomic_set(&job->sub_jobs, SPLIT_COUNT); - for (i = 0; i < SPLIT_COUNT; i++) - segment_complete(0, 0u, job); + atomic_set(&master_job->sub_jobs, SPLIT_COUNT); + for (i = 0; i < SPLIT_COUNT; i++) { + master_job[i + 1].master_job = master_job; + segment_complete(0, 0u, &master_job[i + 1]); + } } int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, @@ -553,7 +561,8 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, struct kcopyd_job *job; /* - * Allocate a new job. + * Allocate an array of jobs consisting of one master job + * followed by SPLIT_COUNT sub jobs. */ job = mempool_alloc(kc->job_pool, GFP_NOIO); @@ -577,10 +586,10 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->fn = fn; job->context = context; + job->master_job = job; if (job->source.count <= SUB_JOB_SIZE) dispatch_job(job); - else { mutex_init(&job->lock); job->progress = 0; -- cgit v1.2.3-70-g09d2 From 4cc1b4cffd187a5c5d6264c8d766c49b3c57fb05 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 29 May 2011 13:03:02 +0100 Subject: dm kcopyd: remove superfluous page allocation spinlock Remove the spinlock protecting the pages allocation. The spinlock is only taken on initialization or from single-threaded workqueue. Therefore, the spinlock is useless. The spinlock is taken in kcopyd_get_pages and kcopyd_put_pages. kcopyd_get_pages is only called from run_pages_job, which is only called from process_jobs called from do_work. kcopyd_put_pages is called from client_alloc_pages (which is initialization function) or from run_complete_job. run_complete_job is only called from process_jobs called from do_work. Another spinlock, kc->job_lock is taken each time someone pushes or pops some work for the worker thread. Once we take kc->job_lock, we guarantee that any written memory is visible to the other CPUs. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 24fb42ed7b8..ed957791639 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -36,7 +36,6 @@ * pages for kcopyd io. *---------------------------------------------------------------*/ struct dm_kcopyd_client { - spinlock_t lock; struct page_list *pages; unsigned int nr_pages; unsigned int nr_free_pages; @@ -99,11 +98,8 @@ static int kcopyd_get_pages(struct dm_kcopyd_client *kc, { struct page_list *pl; - spin_lock(&kc->lock); - if (kc->nr_free_pages < nr) { - spin_unlock(&kc->lock); + if (kc->nr_free_pages < nr) return -ENOMEM; - } kc->nr_free_pages -= nr; for (*pages = pl = kc->pages; --nr; pl = pl->next) @@ -112,8 +108,6 @@ static int kcopyd_get_pages(struct dm_kcopyd_client *kc, kc->pages = pl->next; pl->next = NULL; - spin_unlock(&kc->lock); - return 0; } @@ -121,14 +115,12 @@ static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) { struct page_list *cursor; - spin_lock(&kc->lock); for (cursor = pl; cursor->next; cursor = cursor->next) kc->nr_free_pages++; kc->nr_free_pages++; cursor->next = kc->pages; kc->pages = pl; - spin_unlock(&kc->lock); } /* @@ -625,7 +617,6 @@ int dm_kcopyd_client_create(unsigned int nr_pages, if (!kc) return -ENOMEM; - spin_lock_init(&kc->lock); spin_lock_init(&kc->job_lock); INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); -- cgit v1.2.3-70-g09d2 From f99b55eec795bd0fd577ab3ca06f3acfbe3b1ab1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 29 May 2011 13:03:04 +0100 Subject: dm kcopyd: add gfp parm to alloc_pl Introduce a parameter for gfp flags to alloc_pl() for use in following patches. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index ed957791639..0270844c2a3 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -70,15 +70,15 @@ static void wake(struct dm_kcopyd_client *kc) queue_work(kc->kcopyd_wq, &kc->kcopyd_work); } -static struct page_list *alloc_pl(void) +static struct page_list *alloc_pl(gfp_t gfp) { struct page_list *pl; - pl = kmalloc(sizeof(*pl), GFP_KERNEL); + pl = kmalloc(sizeof(*pl), gfp); if (!pl) return NULL; - pl->page = alloc_page(GFP_KERNEL); + pl->page = alloc_page(gfp); if (!pl->page) { kfree(pl); return NULL; @@ -143,7 +143,7 @@ static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr) struct page_list *pl = NULL, *next; for (i = 0; i < nr; i++) { - next = alloc_pl(); + next = alloc_pl(GFP_KERNEL); if (!next) { if (pl) drop_pages(pl); -- cgit v1.2.3-70-g09d2 From d04714580f12379fcf7a0f799e86c92b96dd4e1f Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 29 May 2011 13:03:07 +0100 Subject: dm kcopyd: alloc pages from the main page allocator This patch changes dm-kcopyd so that it allocates pages from the main page allocator with __GFP_NOWARN | __GFP_NORETRY flags (so that it can fail in case of memory pressure). If the allocation fails, dm-kcopyd allocates pages from its own reserve. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 91 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 31 deletions(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 0270844c2a3..5dfbdcb40a4 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -37,8 +37,8 @@ *---------------------------------------------------------------*/ struct dm_kcopyd_client { struct page_list *pages; - unsigned int nr_pages; - unsigned int nr_free_pages; + unsigned nr_reserved_pages; + unsigned nr_free_pages; struct dm_io_client *io_client; @@ -70,6 +70,9 @@ static void wake(struct dm_kcopyd_client *kc) queue_work(kc->kcopyd_wq, &kc->kcopyd_work); } +/* + * Obtain one page for the use of kcopyd. + */ static struct page_list *alloc_pl(gfp_t gfp) { struct page_list *pl; @@ -93,34 +96,56 @@ static void free_pl(struct page_list *pl) kfree(pl); } -static int kcopyd_get_pages(struct dm_kcopyd_client *kc, - unsigned int nr, struct page_list **pages) +/* + * Add the provided pages to a client's free page list, releasing + * back to the system any beyond the reserved_pages limit. + */ +static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) { - struct page_list *pl; - - if (kc->nr_free_pages < nr) - return -ENOMEM; + struct page_list *next; - kc->nr_free_pages -= nr; - for (*pages = pl = kc->pages; --nr; pl = pl->next) - ; + do { + next = pl->next; - kc->pages = pl->next; - pl->next = NULL; + if (kc->nr_free_pages >= kc->nr_reserved_pages) + free_pl(pl); + else { + pl->next = kc->pages; + kc->pages = pl; + kc->nr_free_pages++; + } - return 0; + pl = next; + } while (pl); } -static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) +static int kcopyd_get_pages(struct dm_kcopyd_client *kc, + unsigned int nr, struct page_list **pages) { - struct page_list *cursor; + struct page_list *pl; - for (cursor = pl; cursor->next; cursor = cursor->next) - kc->nr_free_pages++; + *pages = NULL; + + do { + pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY); + if (unlikely(!pl)) { + /* Use reserved pages */ + pl = kc->pages; + if (unlikely(!pl)) + goto out_of_memory; + kc->pages = pl->next; + kc->nr_free_pages--; + } + pl->next = *pages; + *pages = pl; + } while (--nr); + + return 0; - kc->nr_free_pages++; - cursor->next = kc->pages; - kc->pages = pl; +out_of_memory: + if (*pages) + kcopyd_put_pages(kc, *pages); + return -ENOMEM; } /* @@ -137,12 +162,15 @@ static void drop_pages(struct page_list *pl) } } -static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr) +/* + * Allocate and reserve nr_pages for the use of a specific client. + */ +static int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned nr_pages) { - unsigned int i; + unsigned i; struct page_list *pl = NULL, *next; - for (i = 0; i < nr; i++) { + for (i = 0; i < nr_pages; i++) { next = alloc_pl(GFP_KERNEL); if (!next) { if (pl) @@ -153,17 +181,18 @@ static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr) pl = next; } + kc->nr_reserved_pages += nr_pages; kcopyd_put_pages(kc, pl); - kc->nr_pages += nr; + return 0; } static void client_free_pages(struct dm_kcopyd_client *kc) { - BUG_ON(kc->nr_free_pages != kc->nr_pages); + BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages); drop_pages(kc->pages); kc->pages = NULL; - kc->nr_free_pages = kc->nr_pages = 0; + kc->nr_free_pages = kc->nr_reserved_pages = 0; } /*----------------------------------------------------------------- @@ -607,7 +636,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) /*----------------------------------------------------------------- * Client setup *---------------------------------------------------------------*/ -int dm_kcopyd_client_create(unsigned int nr_pages, +int dm_kcopyd_client_create(unsigned min_pages, struct dm_kcopyd_client **result) { int r = -ENOMEM; @@ -633,12 +662,12 @@ int dm_kcopyd_client_create(unsigned int nr_pages, goto bad_workqueue; kc->pages = NULL; - kc->nr_pages = kc->nr_free_pages = 0; - r = client_alloc_pages(kc, nr_pages); + kc->nr_reserved_pages = kc->nr_free_pages = 0; + r = client_reserve_pages(kc, min_pages); if (r) goto bad_client_pages; - kc->io_client = dm_io_client_create(nr_pages); + kc->io_client = dm_io_client_create(min_pages); if (IS_ERR(kc->io_client)) { r = PTR_ERR(kc->io_client); goto bad_io_client; -- cgit v1.2.3-70-g09d2 From bda8efec5c706a672e0714d341a342e811f0262a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 29 May 2011 13:03:09 +0100 Subject: dm io: use fixed initial mempool size Replace the arbitrary calculation of an initial io struct mempool size with a constant. The code calculated the number of reserved structures based on the request size and used a "magic" multiplication constant of 4. This patch changes it to reserve a fixed number - itself still chosen quite arbitrarily. Further testing might show if there is a better number to choose. Note that if there is no memory pressure, we can still allocate an arbitrary number of "struct io" structures. One structure is enough to process the whole request. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-io.c | 27 +++++---------------------- drivers/md/dm-kcopyd.c | 2 +- drivers/md/dm-log.c | 3 +-- drivers/md/dm-raid1.c | 3 +-- drivers/md/dm-snap-persistent.c | 13 +------------ include/linux/dm-io.h | 3 +-- 6 files changed, 10 insertions(+), 41 deletions(-) diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 76a5af00a26..2067288f61f 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -19,6 +19,8 @@ #define DM_MSG_PREFIX "io" #define DM_IO_MAX_REGIONS BITS_PER_LONG +#define MIN_IOS 16 +#define MIN_BIOS 16 struct dm_io_client { mempool_t *pool; @@ -40,34 +42,22 @@ struct io { static struct kmem_cache *_dm_io_cache; -/* - * io contexts are only dynamically allocated for asynchronous - * io. Since async io is likely to be the majority of io we'll - * have the same number of io contexts as bios! (FIXME: must reduce this). - */ - -static unsigned int pages_to_ios(unsigned int pages) -{ - return 4 * pages; /* too many ? */ -} - /* * Create a client with mempool and bioset. */ -struct dm_io_client *dm_io_client_create(unsigned num_pages) +struct dm_io_client *dm_io_client_create(void) { - unsigned ios = pages_to_ios(num_pages); struct dm_io_client *client; client = kmalloc(sizeof(*client), GFP_KERNEL); if (!client) return ERR_PTR(-ENOMEM); - client->pool = mempool_create_slab_pool(ios, _dm_io_cache); + client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache); if (!client->pool) goto bad; - client->bios = bioset_create(16, 0); + client->bios = bioset_create(MIN_BIOS, 0); if (!client->bios) goto bad; @@ -81,13 +71,6 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages) } EXPORT_SYMBOL(dm_io_client_create); -int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client) -{ - return mempool_resize(client->pool, pages_to_ios(num_pages), - GFP_KERNEL); -} -EXPORT_SYMBOL(dm_io_client_resize); - void dm_io_client_destroy(struct dm_io_client *client) { mempool_destroy(client->pool); diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 5dfbdcb40a4..719693340d1 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -667,7 +667,7 @@ int dm_kcopyd_client_create(unsigned min_pages, if (r) goto bad_client_pages; - kc->io_client = dm_io_client_create(min_pages); + kc->io_client = dm_io_client_create(); if (IS_ERR(kc->io_client)) { r = PTR_ERR(kc->io_client); goto bad_io_client; diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index a1f32188967..948e3f4925b 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -449,8 +449,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, lc->io_req.mem.type = DM_IO_VMA; lc->io_req.notify.fn = NULL; - lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, - PAGE_SIZE)); + lc->io_req.client = dm_io_client_create(); if (IS_ERR(lc->io_req.client)) { r = PTR_ERR(lc->io_req.client); DMWARN("couldn't allocate disk io client"); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 976ad4688af..53089aa1138 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -22,7 +22,6 @@ #define DM_MSG_PREFIX "raid1" #define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ -#define DM_IO_PAGES 64 #define DM_KCOPYD_PAGES 64 #define DM_RAID1_HANDLE_ERRORS 0x01 @@ -887,7 +886,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, return NULL; } - ms->io_client = dm_io_client_create(DM_IO_PAGES); + ms->io_client = dm_io_client_create(); if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; mempool_destroy(ms->read_record_pool); diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 95891dfcbca..135c2f1fdbf 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -154,11 +154,6 @@ struct pstore { struct workqueue_struct *metadata_wq; }; -static unsigned sectors_to_pages(unsigned sectors) -{ - return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); -} - static int alloc_area(struct pstore *ps) { int r = -ENOMEM; @@ -318,8 +313,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) chunk_size_supplied = 0; } - ps->io_client = dm_io_client_create(sectors_to_pages(ps->store-> - chunk_size)); + ps->io_client = dm_io_client_create(); if (IS_ERR(ps->io_client)) return PTR_ERR(ps->io_client); @@ -368,11 +362,6 @@ static int read_header(struct pstore *ps, int *new_snapshot) return r; } - r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size), - ps->io_client); - if (r) - return r; - r = alloc_area(ps); return r; diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 5c9186b93ff..f4b0aa3126f 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -69,8 +69,7 @@ struct dm_io_request { * * Create/destroy may block. */ -struct dm_io_client *dm_io_client_create(unsigned num_pages); -int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client); +struct dm_io_client *dm_io_client_create(void); void dm_io_client_destroy(struct dm_io_client *client); /* -- cgit v1.2.3-70-g09d2 From 5f43ba2950414dc0abf4ac44c397d88069056746 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 29 May 2011 13:03:11 +0100 Subject: dm kcopyd: reserve fewer pages Reserve just the minimum of pages needed to process one job. Because we allocate pages from page allocator, we don't need to reserve a large number of pages. The maximum job size is SUB_JOB_SIZE and we calculate the number of reserved pages based on this. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 6 +++--- drivers/md/dm-raid1.c | 3 +-- drivers/md/dm-snap.c | 7 +------ include/linux/dm-kcopyd.h | 3 +-- 4 files changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 719693340d1..579647f8b4d 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -30,6 +30,7 @@ #define SUB_JOB_SIZE 128 #define SPLIT_COUNT 8 #define MIN_JOBS 8 +#define RESERVE_PAGES (DIV_ROUND_UP(SUB_JOB_SIZE << SECTOR_SHIFT, PAGE_SIZE)) /*----------------------------------------------------------------- * Each kcopyd client has its own little pool of preallocated @@ -636,8 +637,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) /*----------------------------------------------------------------- * Client setup *---------------------------------------------------------------*/ -int dm_kcopyd_client_create(unsigned min_pages, - struct dm_kcopyd_client **result) +int dm_kcopyd_client_create(struct dm_kcopyd_client **result) { int r = -ENOMEM; struct dm_kcopyd_client *kc; @@ -663,7 +663,7 @@ int dm_kcopyd_client_create(unsigned min_pages, kc->pages = NULL; kc->nr_reserved_pages = kc->nr_free_pages = 0; - r = client_reserve_pages(kc, min_pages); + r = client_reserve_pages(kc, RESERVE_PAGES); if (r) goto bad_client_pages; diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 53089aa1138..9defad04541 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -22,7 +22,6 @@ #define DM_MSG_PREFIX "raid1" #define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ -#define DM_KCOPYD_PAGES 64 #define DM_RAID1_HANDLE_ERRORS 0x01 #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) @@ -1116,7 +1115,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err_destroy_wq; } - r = dm_kcopyd_client_create(DM_KCOPYD_PAGES, &ms->kcopyd_client); + r = dm_kcopyd_client_create(&ms->kcopyd_client); if (r) goto err_destroy_wq; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a2d330942cb..5a2296de84a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -39,11 +39,6 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; */ #define SNAPSHOT_COPY_PRIORITY 2 -/* - * Reserve 1MB for each snapshot initially (with minimum of 1 page). - */ -#define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) - /* * The size of the mempool used to track chunks in use. */ @@ -1116,7 +1111,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); + r = dm_kcopyd_client_create(&s->kcopyd_client); if (r) { ti->error = "Could not create kcopyd client"; goto bad_kcopyd; diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 5db21631169..312513d4741 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -25,8 +25,7 @@ * To use kcopyd you must first create a dm_kcopyd_client object. */ struct dm_kcopyd_client; -int dm_kcopyd_client_create(unsigned num_pages, - struct dm_kcopyd_client **result); +int dm_kcopyd_client_create(struct dm_kcopyd_client **result); void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); /* -- cgit v1.2.3-70-g09d2 From fa34ce73072f90ecd90dcc43f29d82e70e5f8676 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 29 May 2011 13:03:13 +0100 Subject: dm kcopyd: return client directly and not through a pointer Return client directly from dm_kcopyd_client_create, not through a parameter, making it consistent with dm_io_client_create. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 9 ++++----- drivers/md/dm-raid1.c | 6 ++++-- drivers/md/dm-snap.c | 5 +++-- include/linux/dm-kcopyd.h | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 579647f8b4d..819e37eaaeb 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -637,14 +637,14 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) /*----------------------------------------------------------------- * Client setup *---------------------------------------------------------------*/ -int dm_kcopyd_client_create(struct dm_kcopyd_client **result) +struct dm_kcopyd_client *dm_kcopyd_client_create(void) { int r = -ENOMEM; struct dm_kcopyd_client *kc; kc = kmalloc(sizeof(*kc), GFP_KERNEL); if (!kc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); spin_lock_init(&kc->job_lock); INIT_LIST_HEAD(&kc->complete_jobs); @@ -676,8 +676,7 @@ int dm_kcopyd_client_create(struct dm_kcopyd_client **result) init_waitqueue_head(&kc->destroyq); atomic_set(&kc->nr_jobs, 0); - *result = kc; - return 0; + return kc; bad_io_client: client_free_pages(kc); @@ -688,7 +687,7 @@ bad_workqueue: bad_slab: kfree(kc); - return r; + return ERR_PTR(r); } EXPORT_SYMBOL(dm_kcopyd_client_create); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9defad04541..9bfd057be68 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1115,9 +1115,11 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err_destroy_wq; } - r = dm_kcopyd_client_create(&ms->kcopyd_client); - if (r) + ms->kcopyd_client = dm_kcopyd_client_create(); + if (IS_ERR(ms->kcopyd_client)) { + r = PTR_ERR(ms->kcopyd_client); goto err_destroy_wq; + } wakeup_mirrord(ms); return 0; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 5a2296de84a..9ecff5f3023 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1111,8 +1111,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - r = dm_kcopyd_client_create(&s->kcopyd_client); - if (r) { + s->kcopyd_client = dm_kcopyd_client_create(); + if (IS_ERR(s->kcopyd_client)) { + r = PTR_ERR(s->kcopyd_client); ti->error = "Could not create kcopyd client"; goto bad_kcopyd; } diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 312513d4741..298d587e349 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -25,7 +25,7 @@ * To use kcopyd you must first create a dm_kcopyd_client object. */ struct dm_kcopyd_client; -int dm_kcopyd_client_create(struct dm_kcopyd_client **result); +struct dm_kcopyd_client *dm_kcopyd_client_create(void); void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); /* -- cgit v1.2.3-70-g09d2 From bc658c96037fc87463f0703ad2ea7c895344cb7e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 29 May 2011 10:33:44 +0200 Subject: mm, rmap: Add yet more comments to page_get_anon_vma/page_lock_anon_vma Inspired by an analysis from Hugh on why again all this doesn't explode in our face. Signed-off-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- mm/rmap.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/rmap.c b/mm/rmap.c index 6bada99cd61..0eb463ea88d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -352,6 +352,11 @@ void __init anon_vma_init(void) * The page might have been remapped to a different anon_vma or the anon_vma * returned may already be freed (and even reused). * + * In case it was remapped to a different anon_vma, the new anon_vma will be a + * child of the old anon_vma, and the anon_vma lifetime rules will therefore + * ensure that any anon_vma obtained from the page will still be valid for as + * long as we observe page_mapped() [ hence all those page_mapped() tests ]. + * * All users of this function must be very careful when walking the anon_vma * chain and verify that the page in question is indeed mapped in it * [ something equivalent to page_mapped_in_vma() ]. @@ -421,7 +426,7 @@ struct anon_vma *page_lock_anon_vma(struct page *page) /* * If the page is still mapped, then this anon_vma is still * its anon_vma, and holding the mutex ensures that it will - * not go away, see __put_anon_vma(). + * not go away, see anon_vma_free(). */ if (!page_mapped(page)) { mutex_unlock(&root_anon_vma->mutex); -- cgit v1.2.3-70-g09d2 From c4f790736ca8d7d86883c5aee2ba1caa15cd8da3 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 23 May 2011 21:18:20 -0500 Subject: eCryptfs: Consolidate inode functions into inode.c These functions should live in inode.c since their focus is on inodes and they're primarily used by functions in inode.c. Also does a simple cleanup of ecryptfs_inode_test() and rolls ecryptfs_init_inode() into ecryptfs_inode_set(). Signed-off-by: Tyler Hicks Tested-by: David --- fs/ecryptfs/ecryptfs_kernel.h | 9 +--- fs/ecryptfs/inode.c | 104 ++++++++++++++++++++++++++++++++++++------ fs/ecryptfs/main.c | 69 ---------------------------- fs/ecryptfs/super.c | 16 ------- 4 files changed, 91 insertions(+), 107 deletions(-) diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index e70282775e2..37224b5fb12 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -625,10 +625,8 @@ struct ecryptfs_open_req { struct list_head kthread_ctl_list; }; -#define ECRYPTFS_INTERPOSE_FLAG_D_ADD 0x00000001 -int ecryptfs_interpose(struct dentry *hidden_dentry, - struct dentry *this_dentry, struct super_block *sb, - u32 flags); +struct inode *ecryptfs_get_inode(struct inode *lower_inode, + struct super_block *sb); void ecryptfs_i_size_init(const char *page_virt, struct inode *inode); int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, struct dentry *lower_dentry, @@ -679,9 +677,6 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, unsigned char *src, struct dentry *ecryptfs_dentry); int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); -int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode); -int ecryptfs_inode_set(struct inode *inode, void *lower_inode); -void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode); ssize_t ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, void *value, size_t size); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 94ab3c06317..704a8c8fe19 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -51,6 +51,95 @@ static void unlock_dir(struct dentry *dir) dput(dir); } +static int ecryptfs_inode_test(struct inode *inode, void *lower_inode) +{ + if (ecryptfs_inode_to_lower(inode) == (struct inode *)lower_inode) + return 1; + return 0; +} + +static int ecryptfs_inode_set(struct inode *inode, void *lower_inode) +{ + ecryptfs_set_inode_lower(inode, (struct inode *)lower_inode); + inode->i_ino = ((struct inode *)lower_inode)->i_ino; + inode->i_version++; + inode->i_op = &ecryptfs_main_iops; + inode->i_fop = &ecryptfs_main_fops; + inode->i_mapping->a_ops = &ecryptfs_aops; + return 0; +} + +struct inode *ecryptfs_get_inode(struct inode *lower_inode, + struct super_block *sb) +{ + struct inode *inode; + int rc = 0; + + if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) { + rc = -EXDEV; + goto out; + } + if (!igrab(lower_inode)) { + rc = -ESTALE; + goto out; + } + inode = iget5_locked(sb, (unsigned long)lower_inode, + ecryptfs_inode_test, ecryptfs_inode_set, + lower_inode); + if (!inode) { + rc = -EACCES; + iput(lower_inode); + goto out; + } + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + else + iput(lower_inode); + if (S_ISLNK(lower_inode->i_mode)) + inode->i_op = &ecryptfs_symlink_iops; + else if (S_ISDIR(lower_inode->i_mode)) + inode->i_op = &ecryptfs_dir_iops; + if (S_ISDIR(lower_inode->i_mode)) + inode->i_fop = &ecryptfs_dir_fops; + if (special_file(lower_inode->i_mode)) + init_special_inode(inode, lower_inode->i_mode, + lower_inode->i_rdev); + fsstack_copy_attr_all(inode, lower_inode); + /* This size will be overwritten for real files w/ headers and + * other metadata */ + fsstack_copy_inode_size(inode, lower_inode); + return inode; +out: + return ERR_PTR(rc); +} + +#define ECRYPTFS_INTERPOSE_FLAG_D_ADD 0x00000001 +/** + * ecryptfs_interpose + * @lower_dentry: Existing dentry in the lower filesystem + * @dentry: ecryptfs' dentry + * @sb: ecryptfs's super_block + * @flags: flags to govern behavior of interpose procedure + * + * Interposes upper and lower dentries. + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_interpose(struct dentry *lower_dentry, + struct dentry *dentry, struct super_block *sb, + u32 flags) +{ + struct inode *lower_inode = lower_dentry->d_inode; + struct inode *inode = ecryptfs_get_inode(lower_inode, sb); + if (IS_ERR(inode)) + return PTR_ERR(inode); + if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) + d_add(dentry, inode); + else + d_instantiate(dentry, inode); + return 0; +} + /** * ecryptfs_create_underlying_file * @lower_dir_inode: inode of the parent in the lower fs of the new file @@ -1079,21 +1168,6 @@ out: return rc; } -int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode) -{ - if ((ecryptfs_inode_to_lower(inode) - == (struct inode *)candidate_lower_inode)) - return 1; - else - return 0; -} - -int ecryptfs_inode_set(struct inode *inode, void *lower_inode) -{ - ecryptfs_init_inode(inode, (struct inode *)lower_inode); - return 0; -} - const struct inode_operations ecryptfs_symlink_iops = { .readlink = ecryptfs_readlink, .follow_link = ecryptfs_follow_link, diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 89b93389af8..7c697abab39 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -168,75 +168,6 @@ void ecryptfs_put_lower_file(struct inode *inode) } } -static struct inode *ecryptfs_get_inode(struct inode *lower_inode, - struct super_block *sb) -{ - struct inode *inode; - int rc = 0; - - if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) { - rc = -EXDEV; - goto out; - } - if (!igrab(lower_inode)) { - rc = -ESTALE; - goto out; - } - inode = iget5_locked(sb, (unsigned long)lower_inode, - ecryptfs_inode_test, ecryptfs_inode_set, - lower_inode); - if (!inode) { - rc = -EACCES; - iput(lower_inode); - goto out; - } - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - else - iput(lower_inode); - if (S_ISLNK(lower_inode->i_mode)) - inode->i_op = &ecryptfs_symlink_iops; - else if (S_ISDIR(lower_inode->i_mode)) - inode->i_op = &ecryptfs_dir_iops; - if (S_ISDIR(lower_inode->i_mode)) - inode->i_fop = &ecryptfs_dir_fops; - if (special_file(lower_inode->i_mode)) - init_special_inode(inode, lower_inode->i_mode, - lower_inode->i_rdev); - fsstack_copy_attr_all(inode, lower_inode); - /* This size will be overwritten for real files w/ headers and - * other metadata */ - fsstack_copy_inode_size(inode, lower_inode); - return inode; -out: - return ERR_PTR(rc); -} - -/** - * ecryptfs_interpose - * @lower_dentry: Existing dentry in the lower filesystem - * @dentry: ecryptfs' dentry - * @sb: ecryptfs's super_block - * @flags: flags to govern behavior of interpose procedure - * - * Interposes upper and lower dentries. - * - * Returns zero on success; non-zero otherwise - */ -int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, - struct super_block *sb, u32 flags) -{ - struct inode *lower_inode = lower_dentry->d_inode; - struct inode *inode = ecryptfs_get_inode(lower_inode, sb); - if (IS_ERR(inode)) - return PTR_ERR(inode); - if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) - d_add(dentry, inode); - else - d_instantiate(dentry, inode); - return 0; -} - enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_cipher, ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes, diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 245b517bf1b..dbd52d40df4 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -92,22 +92,6 @@ static void ecryptfs_destroy_inode(struct inode *inode) call_rcu(&inode->i_rcu, ecryptfs_i_callback); } -/** - * ecryptfs_init_inode - * @inode: The ecryptfs inode - * - * Set up the ecryptfs inode. - */ -void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode) -{ - ecryptfs_set_inode_lower(inode, lower_inode); - inode->i_ino = lower_inode->i_ino; - inode->i_version++; - inode->i_op = &ecryptfs_main_iops; - inode->i_fop = &ecryptfs_main_fops; - inode->i_mapping->a_ops = &ecryptfs_aops; -} - /** * ecryptfs_statfs * @sb: The ecryptfs super block -- cgit v1.2.3-70-g09d2 From 1775bc342c6eacd6304493cbb2e0cda1a0182246 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 20 May 2011 13:47:33 +0200 Subject: NFSv4.1: purge deviceid cache on nfs_free_client Use the pnfs_layoutdriver_type both as a qualifier for the deviceid, distinguishing deviceid from different layout types on the server, and for freeing the layout-driver allocated structure containing the nfs4_deviceid_node. [BUG in _deviceid_purge_client] [layout_driver MUST set free_deviceid_node if using dev-cache] [let ver < 4.1 compile] Signed-off-by: Boaz Harrosh [removed EXPORT_SYMBOL_GPL(nfs4_deviceid_purge_client)] Signed-off-by: Benny Halevy --- fs/nfs/client.c | 2 ++ fs/nfs/nfs4filelayout.c | 7 +++++++ fs/nfs/nfs4filelayout.h | 1 + fs/nfs/nfs4filelayoutdev.c | 9 +++++---- fs/nfs/pnfs.h | 11 +++++++++++ fs/nfs/pnfs_dev.c | 47 +++++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 70 insertions(+), 7 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 139be9647d8..b3dc2b88b65 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -290,6 +290,8 @@ static void nfs_free_client(struct nfs_client *clp) if (clp->cl_machine_cred != NULL) put_rpccred(clp->cl_machine_cred); + nfs4_deviceid_purge_client(clp); + kfree(clp->cl_hostname); kfree(clp); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 4c67a6f6519..cd289d9b7de 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -862,6 +862,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, return -ENOMEM; } +static void +filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) +{ + nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); +} + static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", @@ -874,6 +880,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, + .free_deviceid_node = filelayout_free_deveiceid_node, }; static int __init nfs4filelayout_init(void) diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 0ace0a2d6d7..cebe01e3795 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -98,6 +98,7 @@ u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx); extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); +extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index eda4527a57e..5914659c8ec 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -156,7 +156,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds) kfree(ds); } -static void +void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) { struct nfs4_pnfs_ds *ds; @@ -386,7 +386,9 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) dsaddr->stripe_indices = stripe_indices; stripe_indices = NULL; dsaddr->ds_num = num; - nfs4_init_deviceid_node(&dsaddr->id_node, NFS_SERVER(ino)->nfs_client, + nfs4_init_deviceid_node(&dsaddr->id_node, + NFS_SERVER(ino)->pnfs_curr_ld, + NFS_SERVER(ino)->nfs_client, &pdev->dev_id); for (i = 0; i < dsaddr->ds_num; i++) { @@ -548,8 +550,7 @@ out_free: void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) { - if (nfs4_put_deviceid_node(&dsaddr->id_node)) - nfs4_fl_free_deviceid(dsaddr); + nfs4_put_deviceid_node(&dsaddr->id_node); } /* diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 3831ad04a23..80a5d0e2cc4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -65,6 +65,8 @@ enum { NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ }; +struct nfs4_deviceid_node; + /* Per-layout driver specific registration structure */ struct pnfs_layoutdriver_type { struct list_head pnfs_tblid; @@ -90,6 +92,8 @@ struct pnfs_layoutdriver_type { */ enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); + + void (*free_deviceid_node) (struct nfs4_deviceid_node *); }; struct pnfs_layout_hdr { @@ -160,6 +164,7 @@ int pnfs_layoutcommit_inode(struct inode *inode, bool sync); /* pnfs_dev.c */ struct nfs4_deviceid_node { struct hlist_node node; + const struct pnfs_layoutdriver_type *ld; const struct nfs_client *nfs_client; struct nfs4_deviceid deviceid; atomic_t ref; @@ -169,10 +174,12 @@ void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct nfs_client *, const struct nfs4_deviceid *); struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, + const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); +void nfs4_deviceid_purge_client(const struct nfs_client *); static inline int lo_fail_bit(u32 iomode) { @@ -349,6 +356,10 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; } + +static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) +{ +} #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index bf05189a7cb..64a4b85c7db 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -96,11 +96,15 @@ EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, + const struct pnfs_layoutdriver_type *ld, const struct nfs_client *nfs_client, const struct nfs4_deviceid *id) { + INIT_HLIST_NODE(&d->node); + d->ld = ld; d->nfs_client = nfs_client; d->deviceid = *id; + atomic_set(&d->ref, 1); } EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); @@ -108,7 +112,10 @@ EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); * Uniquely initialize and insert a deviceid node into cache * * @new new deviceid node - * Note that the caller must set up new->nfs_client and new->deviceid + * Note that the caller must set up the following members: + * new->ld + * new->nfs_client + * new->deviceid * * @ret the inserted node, if none found, otherwise, the found entry. */ @@ -125,8 +132,6 @@ nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) return d; } - INIT_HLIST_NODE(&new->node); - atomic_set(&new->ref, 1); hash = nfs4_deviceid_hash(&new->deviceid); hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); spin_unlock(&nfs4_deviceid_lock); @@ -151,6 +156,42 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) hlist_del_init_rcu(&d->node); spin_unlock(&nfs4_deviceid_lock); synchronize_rcu(); + d->ld->free_deviceid_node(d); return true; } EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); + +static void +_deviceid_purge_client(const struct nfs_client *clp, long hash) +{ + struct nfs4_deviceid_node *d; + struct hlist_node *n, *next; + HLIST_HEAD(tmp); + + rcu_read_lock(); + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + if (d->nfs_client == clp && atomic_read(&d->ref)) { + hlist_del_init_rcu(&d->node); + hlist_add_head(&d->node, &tmp); + } + rcu_read_unlock(); + + if (hlist_empty(&tmp)) + return; + + synchronize_rcu(); + hlist_for_each_entry_safe(d, n, next, &tmp, node) + if (atomic_dec_and_test(&d->ref)) + d->ld->free_deviceid_node(d); +} + +void +nfs4_deviceid_purge_client(const struct nfs_client *clp) +{ + long h; + + spin_lock(&nfs4_deviceid_lock); + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) + _deviceid_purge_client(clp, h); + spin_unlock(&nfs4_deviceid_lock); +} -- cgit v1.2.3-70-g09d2 From 5ccf92037c7c6e6f28175fd245284923f939259f Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 24 May 2011 02:16:51 -0500 Subject: eCryptfs: Cleanup inode initialization code The eCryptfs inode get, initialization, and dentry interposition code has two separate paths. One is for when dentry interposition is needed after doing things like a mkdir in the lower filesystem and the other is needed after a lookup. Unlocking new inodes and doing a d_add() needs to happen at different times, depending on which type of dentry interposing is being done. This patch cleans up the inode get and initialization code paths and splits them up so that the locking and d_add() differences mentioned above can be handled appropriately in a later patch. Signed-off-by: Tyler Hicks Tested-by: David --- fs/ecryptfs/ecryptfs_kernel.h | 3 - fs/ecryptfs/inode.c | 134 ++++++++++++++++++++++-------------------- 2 files changed, 69 insertions(+), 68 deletions(-) diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 37224b5fb12..41a45323637 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -628,9 +628,6 @@ struct ecryptfs_open_req { struct inode *ecryptfs_get_inode(struct inode *lower_inode, struct super_block *sb); void ecryptfs_i_size_init(const char *page_virt, struct inode *inode); -int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, - struct dentry *lower_dentry, - struct inode *ecryptfs_dir_inode); int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, size_t *decrypted_name_size, struct dentry *ecryptfs_dentry, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 704a8c8fe19..fc7d2b74850 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -58,85 +58,87 @@ static int ecryptfs_inode_test(struct inode *inode, void *lower_inode) return 0; } -static int ecryptfs_inode_set(struct inode *inode, void *lower_inode) +static int ecryptfs_inode_set(struct inode *inode, void *opaque) { - ecryptfs_set_inode_lower(inode, (struct inode *)lower_inode); - inode->i_ino = ((struct inode *)lower_inode)->i_ino; + struct inode *lower_inode = opaque; + + ecryptfs_set_inode_lower(inode, lower_inode); + fsstack_copy_attr_all(inode, lower_inode); + /* i_size will be overwritten for encrypted regular files */ + fsstack_copy_inode_size(inode, lower_inode); + inode->i_ino = lower_inode->i_ino; inode->i_version++; - inode->i_op = &ecryptfs_main_iops; - inode->i_fop = &ecryptfs_main_fops; inode->i_mapping->a_ops = &ecryptfs_aops; + + if (S_ISLNK(inode->i_mode)) + inode->i_op = &ecryptfs_symlink_iops; + else if (S_ISDIR(inode->i_mode)) + inode->i_op = &ecryptfs_dir_iops; + else + inode->i_op = &ecryptfs_main_iops; + + if (S_ISDIR(inode->i_mode)) + inode->i_fop = &ecryptfs_dir_fops; + else if (special_file(inode->i_mode)) + init_special_inode(inode, inode->i_mode, inode->i_rdev); + else + inode->i_fop = &ecryptfs_main_fops; + return 0; } -struct inode *ecryptfs_get_inode(struct inode *lower_inode, - struct super_block *sb) +static struct inode *__ecryptfs_get_inode(struct inode *lower_inode, + struct super_block *sb) { struct inode *inode; - int rc = 0; - if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) { - rc = -EXDEV; - goto out; - } - if (!igrab(lower_inode)) { - rc = -ESTALE; - goto out; - } + if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) + return ERR_PTR(-EXDEV); + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); inode = iget5_locked(sb, (unsigned long)lower_inode, ecryptfs_inode_test, ecryptfs_inode_set, lower_inode); if (!inode) { - rc = -EACCES; iput(lower_inode); - goto out; + return ERR_PTR(-EACCES); } - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - else + if (!(inode->i_state & I_NEW)) iput(lower_inode); - if (S_ISLNK(lower_inode->i_mode)) - inode->i_op = &ecryptfs_symlink_iops; - else if (S_ISDIR(lower_inode->i_mode)) - inode->i_op = &ecryptfs_dir_iops; - if (S_ISDIR(lower_inode->i_mode)) - inode->i_fop = &ecryptfs_dir_fops; - if (special_file(lower_inode->i_mode)) - init_special_inode(inode, lower_inode->i_mode, - lower_inode->i_rdev); - fsstack_copy_attr_all(inode, lower_inode); - /* This size will be overwritten for real files w/ headers and - * other metadata */ - fsstack_copy_inode_size(inode, lower_inode); + + return inode; +} + +struct inode *ecryptfs_get_inode(struct inode *lower_inode, + struct super_block *sb) +{ + struct inode *inode = __ecryptfs_get_inode(lower_inode, sb); + + if (!IS_ERR(inode) && (inode->i_state & I_NEW)) + unlock_new_inode(inode); + return inode; -out: - return ERR_PTR(rc); } -#define ECRYPTFS_INTERPOSE_FLAG_D_ADD 0x00000001 /** * ecryptfs_interpose * @lower_dentry: Existing dentry in the lower filesystem * @dentry: ecryptfs' dentry * @sb: ecryptfs's super_block - * @flags: flags to govern behavior of interpose procedure * * Interposes upper and lower dentries. * * Returns zero on success; non-zero otherwise */ static int ecryptfs_interpose(struct dentry *lower_dentry, - struct dentry *dentry, struct super_block *sb, - u32 flags) + struct dentry *dentry, struct super_block *sb) { - struct inode *lower_inode = lower_dentry->d_inode; - struct inode *inode = ecryptfs_get_inode(lower_inode, sb); + struct inode *inode = ecryptfs_get_inode(lower_dentry->d_inode, sb); + if (IS_ERR(inode)) return PTR_ERR(inode); - if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) - d_add(dentry, inode); - else - d_instantiate(dentry, inode); + d_instantiate(dentry, inode); + return 0; } @@ -218,7 +220,7 @@ ecryptfs_do_create(struct inode *directory_inode, goto out_lock; } rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, - directory_inode->i_sb, 0); + directory_inode->i_sb); if (rc) { ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n"); goto out_lock; @@ -305,15 +307,15 @@ out: } /** - * ecryptfs_lookup_and_interpose_lower - Perform a lookup + * ecryptfs_lookup_interpose - Dentry interposition for a lookup */ -int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, - struct dentry *lower_dentry, - struct inode *ecryptfs_dir_inode) +static int ecryptfs_lookup_interpose(struct dentry *ecryptfs_dentry, + struct dentry *lower_dentry, + struct inode *ecryptfs_dir_inode) { struct dentry *lower_dir_dentry; struct vfsmount *lower_mnt; - struct inode *lower_inode; + struct inode *inode, *lower_inode; struct ecryptfs_crypt_stat *crypt_stat; char *page_virt = NULL; int put_lower = 0, rc = 0; @@ -341,14 +343,16 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, d_add(ecryptfs_dentry, NULL); goto out; } - rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, - ecryptfs_dir_inode->i_sb, - ECRYPTFS_INTERPOSE_FLAG_D_ADD); - if (rc) { + inode = __ecryptfs_get_inode(lower_inode, ecryptfs_dir_inode->i_sb); + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); printk(KERN_ERR "%s: Error interposing; rc = [%d]\n", __func__, rc); goto out; } + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + d_add(ecryptfs_dentry, inode); if (S_ISDIR(lower_inode->i_mode)) goto out; if (S_ISLNK(lower_inode->i_mode)) @@ -442,12 +446,12 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, goto out_d_drop; } if (lower_dentry->d_inode) - goto lookup_and_interpose; + goto interpose; mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; if (!(mount_crypt_stat && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES))) - goto lookup_and_interpose; + goto interpose; dput(lower_dentry); rc = ecryptfs_encrypt_and_encode_filename( &encrypted_and_encoded_name, &encrypted_and_encoded_name_size, @@ -470,9 +474,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, encrypted_and_encoded_name); goto out_d_drop; } -lookup_and_interpose: - rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry, - ecryptfs_dir_inode); +interpose: + rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry, + ecryptfs_dir_inode); goto out; out_d_drop: d_drop(ecryptfs_dentry); @@ -500,7 +504,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, lower_new_dentry); if (rc || !lower_new_dentry->d_inode) goto out_lock; - rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); + rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb); if (rc) goto out_lock; fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); @@ -567,7 +571,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, kfree(encoded_symname); if (rc || !lower_dentry->d_inode) goto out_lock; - rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); if (rc) goto out_lock; fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); @@ -591,7 +595,7 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode); if (rc || !lower_dentry->d_inode) goto out; - rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); if (rc) goto out; fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); @@ -639,7 +643,7 @@ ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev); if (rc || !lower_dentry->d_inode) goto out; - rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); if (rc) goto out; fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); -- cgit v1.2.3-70-g09d2 From 1be5683b03a766670b3b629bf6bfeab3ca9239d8 Mon Sep 17 00:00:00 2001 From: Marc Eshel Date: Sun, 22 May 2011 19:47:09 +0300 Subject: pnfs: CB_NOTIFY_DEVICEID Note: This functionlaity is incomplete as all layout segments referring to the 'to be removed device id' need to be reaped, and all in flight I/O drained. [use be32 res in nfs4_callback_devicenotify] [use nfs_client to qualify deviceid for cb_notify_deviceid] [use global deviceid cache for CB_NOTIFY_DEVICEID] [refactor device cache _lookup_deviceid] [refactor device cache _find_get_deviceid] Signed-off-by: Benny Halevy [Bug in new global-device-cache code] [layout_driver MUST set free_deviceid_node if using dev-cache] Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- fs/nfs/callback.h | 17 +++++++++ fs/nfs/callback_proc.c | 47 ++++++++++++++++++++++++ fs/nfs/callback_xdr.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/pnfs.h | 1 + fs/nfs/pnfs_dev.c | 95 +++++++++++++++++++++++++++++++++++++++++-------- 5 files changed, 241 insertions(+), 15 deletions(-) diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 46d93ce7311..b257383bb56 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -167,6 +167,23 @@ extern unsigned nfs4_callback_layoutrecall( extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); extern void nfs4_cb_take_slot(struct nfs_client *clp); + +struct cb_devicenotifyitem { + uint32_t cbd_notify_type; + uint32_t cbd_layout_type; + struct nfs4_deviceid cbd_dev_id; + uint32_t cbd_immediate; +}; + +struct cb_devicenotifyargs { + int ndevs; + struct cb_devicenotifyitem *devs; +}; + +extern __be32 nfs4_callback_devicenotify( + struct cb_devicenotifyargs *args, + void *dummy, struct cb_process_state *cps); + #endif /* CONFIG_NFS_V4_1 */ extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 2f41dccea18..fb5e5b9a97a 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -241,6 +241,53 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp) do_callback_layoutrecall(clp, &args); } +__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, + void *dummy, struct cb_process_state *cps) +{ + int i; + __be32 res = 0; + struct nfs_client *clp = cps->clp; + struct nfs_server *server = NULL; + + dprintk("%s: -->\n", __func__); + + if (!clp) { + res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); + goto out; + } + + for (i = 0; i < args->ndevs; i++) { + struct cb_devicenotifyitem *dev = &args->devs[i]; + + if (!server || + server->pnfs_curr_ld->id != dev->cbd_layout_type) { + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + if (server->pnfs_curr_ld && + server->pnfs_curr_ld->id == dev->cbd_layout_type) { + rcu_read_unlock(); + goto found; + } + rcu_read_unlock(); + dprintk("%s: layout type %u not found\n", + __func__, dev->cbd_layout_type); + continue; + } + + found: + if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) + dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, " + "deleting instead\n", __func__); + nfs4_delete_deviceid(clp, &dev->cbd_dev_id); + } + +out: + kfree(args->devs); + dprintk("%s: exit with status = %u\n", + __func__, be32_to_cpu(res)); + return res; +} + int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) { if (delegation == NULL) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 00ecf62ce7c..c6c86a77e04 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -25,6 +25,7 @@ #if defined(CONFIG_NFS_V4_1) #define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ 4 + 1 + 3) #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) @@ -284,6 +285,93 @@ out: return status; } +static +__be32 decode_devicenotify_args(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct cb_devicenotifyargs *args) +{ + __be32 *p; + __be32 status = 0; + u32 tmp; + int n, i; + args->ndevs = 0; + + /* Num of device notifications */ + p = read_buf(xdr, sizeof(uint32_t)); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_BADXDR); + goto out; + } + n = ntohl(*p++); + if (n <= 0) + goto out; + + args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL); + if (!args->devs) { + status = htonl(NFS4ERR_DELAY); + goto out; + } + + /* Decode each dev notification */ + for (i = 0; i < n; i++) { + struct cb_devicenotifyitem *dev = &args->devs[i]; + + p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_BADXDR); + goto err; + } + + tmp = ntohl(*p++); /* bitmap size */ + if (tmp != 1) { + status = htonl(NFS4ERR_INVAL); + goto err; + } + dev->cbd_notify_type = ntohl(*p++); + if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE && + dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) { + status = htonl(NFS4ERR_INVAL); + goto err; + } + + tmp = ntohl(*p++); /* opaque size */ + if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) && + (tmp != NFS4_DEVICEID4_SIZE + 8)) || + ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) && + (tmp != NFS4_DEVICEID4_SIZE + 4))) { + status = htonl(NFS4ERR_INVAL); + goto err; + } + dev->cbd_layout_type = ntohl(*p++); + memcpy(dev->cbd_dev_id.data, p, NFS4_DEVICEID4_SIZE); + p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); + + if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) { + p = read_buf(xdr, sizeof(uint32_t)); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_BADXDR); + goto err; + } + dev->cbd_immediate = ntohl(*p++); + } else { + dev->cbd_immediate = 0; + } + + args->ndevs++; + + dprintk("%s: type %d layout 0x%x immediate %d\n", + __func__, dev->cbd_notify_type, dev->cbd_layout_type, + dev->cbd_immediate); + } +out: + dprintk("%s: status %d ndevs %d\n", + __func__, ntohl(status), args->ndevs); + return status; +err: + kfree(args->devs); + goto out; +} + static __be32 decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) { @@ -639,10 +727,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_RECALL_ANY: case OP_CB_RECALL_SLOT: case OP_CB_LAYOUTRECALL: + case OP_CB_NOTIFY_DEVICEID: *op = &callback_ops[op_nr]; break; - case OP_CB_NOTIFY_DEVICEID: case OP_CB_NOTIFY: case OP_CB_PUSH_DELEG: case OP_CB_RECALLABLE_OBJ_AVAIL: @@ -849,6 +937,12 @@ static struct callback_op callback_ops[] = { (callback_decode_arg_t)decode_layoutrecall_args, .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ, }, + [OP_CB_NOTIFY_DEVICEID] = { + .process_op = (callback_process_op_t)nfs4_callback_devicenotify, + .decode_args = + (callback_decode_arg_t)decode_devicenotify_args, + .res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ, + }, [OP_CB_SEQUENCE] = { .process_op = (callback_process_op_t)nfs4_callback_sequence, .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 80a5d0e2cc4..fbd3f7cd9e7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -173,6 +173,7 @@ struct nfs4_deviceid_node { void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct nfs_client *, const struct nfs4_deviceid *); struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct nfs_client *, const struct nfs4_deviceid *); +void nfs4_delete_deviceid(const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, const struct pnfs_layoutdriver_type *, const struct nfs_client *, diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 64a4b85c7db..8fd3839df29 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -66,33 +66,100 @@ nfs4_deviceid_hash(const struct nfs4_deviceid *id) return x & NFS4_DEVICE_ID_HASH_MASK; } +static struct nfs4_deviceid_node * +_lookup_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id, + long hash) +{ + struct nfs4_deviceid_node *d; + struct hlist_node *n; + + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + if (d->nfs_client == clp && !memcmp(&d->deviceid, id, sizeof(*id))) { + if (atomic_read(&d->ref)) + return d; + else + continue; + } + return NULL; +} + /* * Lookup a deviceid in cache and get a reference count on it if found * * @clp nfs_client associated with deviceid * @id deviceid to look up */ +struct nfs4_deviceid_node * +_find_get_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id, + long hash) +{ + struct nfs4_deviceid_node *d; + + rcu_read_lock(); + d = _lookup_deviceid(clp, id, hash); + if (d && !atomic_inc_not_zero(&d->ref)) + d = NULL; + rcu_read_unlock(); + return d; +} + struct nfs4_deviceid_node * nfs4_find_get_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id) +{ + return _find_get_deviceid(clp, id, nfs4_deviceid_hash(id)); +} +EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); + +/* + * Unhash and put deviceid + * + * @clp nfs_client associated with deviceid + * @id the deviceid to unhash + * + * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. + */ +struct nfs4_deviceid_node * +nfs4_unhash_put_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id) { struct nfs4_deviceid_node *d; - struct hlist_node *n; - long hash = nfs4_deviceid_hash(id); + spin_lock(&nfs4_deviceid_lock); rcu_read_lock(); - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) { - if (d->nfs_client == clp && !memcmp(&d->deviceid, id, sizeof(*id))) { - if (!atomic_inc_not_zero(&d->ref)) - goto fail; - rcu_read_unlock(); - return d; - } - } -fail: + d = _lookup_deviceid(clp, id, nfs4_deviceid_hash(id)); rcu_read_unlock(); + if (!d) { + spin_unlock(&nfs4_deviceid_lock); + return NULL; + } + hlist_del_init_rcu(&d->node); + spin_unlock(&nfs4_deviceid_lock); + synchronize_rcu(); + + /* balance the initial ref set in pnfs_insert_deviceid */ + if (atomic_dec_and_test(&d->ref)) + return d; + return NULL; } -EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); +EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); + +/* + * Delete a deviceid from cache + * + * @clp struct nfs_client qualifying the deviceid + * @id deviceid to delete + */ +void +nfs4_delete_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id) +{ + struct nfs4_deviceid_node *d; + + d = nfs4_unhash_put_deviceid(clp, id); + if (!d) + return; + d->ld->free_deviceid_node(d); +} +EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, @@ -126,13 +193,13 @@ nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) long hash; spin_lock(&nfs4_deviceid_lock); - d = nfs4_find_get_deviceid(new->nfs_client, &new->deviceid); + hash = nfs4_deviceid_hash(&new->deviceid); + d = _find_get_deviceid(new->nfs_client, &new->deviceid, hash); if (d) { spin_unlock(&nfs4_deviceid_lock); return d; } - hash = nfs4_deviceid_hash(&new->deviceid); hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); spin_unlock(&nfs4_deviceid_lock); -- cgit v1.2.3-70-g09d2 From 35c8bb543c9e83197e6375142d1d1c2ee3cf017d Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 24 May 2011 18:04:02 +0300 Subject: NFSv4.1: use layout driver in global device cache pnfs deviceids are unique per server, per layout type. struct nfs_client is currently used to distinguish deviceids from different nfs servers, yet these may clash between different layout types on the same server. Therefore, use the layout driver associated with each deviceid at insertion time to look it up, unhash, or delete it. Signed-off-by: Benny Halevy --- fs/nfs/callback_proc.c | 2 +- fs/nfs/nfs4filelayout.c | 3 ++- fs/nfs/pnfs.h | 6 +++--- fs/nfs/pnfs_dev.c | 28 +++++++++++++++++----------- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index fb5e5b9a97a..c73e7b2fb8e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -278,7 +278,7 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, " "deleting instead\n", __func__); - nfs4_delete_deviceid(clp, &dev->cbd_dev_id); + nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); } out: diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index cd289d9b7de..501a9b86b31 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -441,7 +441,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, } /* find and reference the deviceid */ - d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->nfs_client, id); + d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, + NFS_SERVER(lo->plh_inode)->nfs_client, id); if (d == NULL) { dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); if (dsaddr == NULL) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index fbd3f7cd9e7..5b083d29533 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -171,9 +171,9 @@ struct nfs4_deviceid_node { }; void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); -struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct nfs_client *, const struct nfs4_deviceid *); -struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct nfs_client *, const struct nfs4_deviceid *); -void nfs4_delete_deviceid(const struct nfs_client *, const struct nfs4_deviceid *); +struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); +struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); +void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, const struct pnfs_layoutdriver_type *, const struct nfs_client *, diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 8fd3839df29..c65e133ce9c 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -67,14 +67,16 @@ nfs4_deviceid_hash(const struct nfs4_deviceid *id) } static struct nfs4_deviceid_node * -_lookup_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id, +_lookup_deviceid(const struct pnfs_layoutdriver_type *ld, + const struct nfs_client *clp, const struct nfs4_deviceid *id, long hash) { struct nfs4_deviceid_node *d; struct hlist_node *n; hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) - if (d->nfs_client == clp && !memcmp(&d->deviceid, id, sizeof(*id))) { + if (d->ld == ld && d->nfs_client == clp && + !memcmp(&d->deviceid, id, sizeof(*id))) { if (atomic_read(&d->ref)) return d; else @@ -90,13 +92,14 @@ _lookup_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id, * @id deviceid to look up */ struct nfs4_deviceid_node * -_find_get_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id, +_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, + const struct nfs_client *clp, const struct nfs4_deviceid *id, long hash) { struct nfs4_deviceid_node *d; rcu_read_lock(); - d = _lookup_deviceid(clp, id, hash); + d = _lookup_deviceid(ld, clp, id, hash); if (d && !atomic_inc_not_zero(&d->ref)) d = NULL; rcu_read_unlock(); @@ -104,9 +107,10 @@ _find_get_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id, } struct nfs4_deviceid_node * -nfs4_find_get_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id) +nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, + const struct nfs_client *clp, const struct nfs4_deviceid *id) { - return _find_get_deviceid(clp, id, nfs4_deviceid_hash(id)); + return _find_get_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); } EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); @@ -119,13 +123,14 @@ EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. */ struct nfs4_deviceid_node * -nfs4_unhash_put_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id) +nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, + const struct nfs_client *clp, const struct nfs4_deviceid *id) { struct nfs4_deviceid_node *d; spin_lock(&nfs4_deviceid_lock); rcu_read_lock(); - d = _lookup_deviceid(clp, id, nfs4_deviceid_hash(id)); + d = _lookup_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); rcu_read_unlock(); if (!d) { spin_unlock(&nfs4_deviceid_lock); @@ -150,11 +155,12 @@ EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); * @id deviceid to delete */ void -nfs4_delete_deviceid(const struct nfs_client *clp, const struct nfs4_deviceid *id) +nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, + const struct nfs_client *clp, const struct nfs4_deviceid *id) { struct nfs4_deviceid_node *d; - d = nfs4_unhash_put_deviceid(clp, id); + d = nfs4_unhash_put_deviceid(ld, clp, id); if (!d) return; d->ld->free_deviceid_node(d); @@ -194,7 +200,7 @@ nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) spin_lock(&nfs4_deviceid_lock); hash = nfs4_deviceid_hash(&new->deviceid); - d = _find_get_deviceid(new->nfs_client, &new->deviceid, hash); + d = _find_get_deviceid(new->ld, new->nfs_client, &new->deviceid, hash); if (d) { spin_unlock(&nfs4_deviceid_lock); return d; -- cgit v1.2.3-70-g09d2 From f7da7a129d57bfe0f74573dc03531c63e1360fae Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Thu, 19 May 2011 14:16:47 -0400 Subject: SUNRPC: introduce xdr_init_decode_pages Initialize xdr_stream and xdr_buf using an array of page pointers and length of buffer. Signed-off-by: Benny Halevy --- fs/nfs/dir.c | 9 ++------- fs/nfs/nfs4filelayout.c | 9 ++------- fs/nfs/nfs4filelayoutdev.c | 9 ++------- include/linux/sunrpc/xdr.h | 2 ++ net/sunrpc/xdr.c | 19 +++++++++++++++++++ 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7237672216c..f673a9e1d95 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -512,12 +512,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en struct page **xdr_pages, struct page *page, unsigned int buflen) { struct xdr_stream stream; - struct xdr_buf buf = { - .pages = xdr_pages, - .page_len = buflen, - .buflen = buflen, - .len = buflen, - }; + struct xdr_buf buf; struct page *scratch; struct nfs_cache_array *array; unsigned int count = 0; @@ -527,7 +522,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en if (scratch == NULL) return -ENOMEM; - xdr_init_decode(&stream, &buf, NULL); + xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); do { diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 501a9b86b31..33bda24e8cd 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -510,12 +510,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, gfp_t gfp_flags) { struct xdr_stream stream; - struct xdr_buf buf = { - .pages = lgr->layoutp->pages, - .page_len = lgr->layoutp->len, - .buflen = lgr->layoutp->len, - .len = lgr->layoutp->len, - }; + struct xdr_buf buf; struct page *scratch; __be32 *p; uint32_t nfl_util; @@ -527,7 +522,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, if (!scratch) return -ENOMEM; - xdr_init_decode(&stream, &buf, NULL); + xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 5914659c8ec..3b7bf137726 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -308,12 +308,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) u8 max_stripe_index; struct nfs4_file_layout_dsaddr *dsaddr = NULL; struct xdr_stream stream; - struct xdr_buf buf = { - .pages = pdev->pages, - .page_len = pdev->pglen, - .buflen = pdev->pglen, - .len = pdev->pglen, - }; + struct xdr_buf buf; struct page *scratch; /* set up xdr stream */ @@ -321,7 +316,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) if (!scratch) goto out_err; - xdr_init_decode(&stream, &buf, NULL); + xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); /* Get the stripe count (number of stripe index) */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index fc84b7a19ca..a20970ef9e4 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -216,6 +216,8 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, unsigned int len); extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 679cd674b81..f008c14ad34 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -638,6 +638,25 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) } EXPORT_SYMBOL_GPL(xdr_init_decode); +/** + * xdr_init_decode - Initialize an xdr_stream for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to XDR buffer from which to decode data + * @pages: list of pages to decode into + * @len: length in bytes of buffer in pages + */ +void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, unsigned int len) +{ + memset(buf, 0, sizeof(*buf)); + buf->pages = pages; + buf->page_len = len; + buf->buflen = len; + buf->len = len; + xdr_init_decode(xdr, buf, NULL); +} +EXPORT_SYMBOL_GPL(xdr_init_decode_pages); + static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { __be32 *p = xdr->p; -- cgit v1.2.3-70-g09d2 From fb3296eb4636763918edef2d22e45b85b15d4518 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:47:26 +0300 Subject: pnfs: Use byte-range for layoutget Add offset and count parameters to pnfs_update_layout and use them to get the layout in the pageio path. Order cache layout segments in the following order: * offset (ascending) * length (descending) * iomode (RW before READ) Test byte range against the layout segment in use in pnfs_{read,write}_pg_test so not to coalesce pages not using the same layout segment. [fix lseg ordering] [clean up pnfs_find_lseg lseg arg] [remove unnecessary FIXME] [fix ordering in pnfs_insert_layout] [clean up pnfs_insert_layout] Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 165 +++++++++++++++++++++++++++++++++++++++++++-------------- fs/nfs/pnfs.h | 6 ++- fs/nfs/read.c | 8 ++- fs/nfs/write.c | 8 ++- 4 files changed, 142 insertions(+), 45 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f57f5281a52..c2f09e9b670 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -261,6 +261,65 @@ put_lseg(struct pnfs_layout_segment *lseg) } EXPORT_SYMBOL_GPL(put_lseg); +static inline u64 +end_offset(u64 start, u64 len) +{ + u64 end; + + end = start + len; + return end >= start ? end : NFS4_MAX_UINT64; +} + +/* last octet in a range */ +static inline u64 +last_byte_offset(u64 start, u64 len) +{ + u64 end; + + BUG_ON(!len); + end = start + len; + return end > start ? end - 1 : NFS4_MAX_UINT64; +} + +/* + * is l2 fully contained in l1? + * start1 end1 + * [----------------------------------) + * start2 end2 + * [----------------) + */ +static inline int +lo_seg_contained(struct pnfs_layout_range *l1, + struct pnfs_layout_range *l2) +{ + u64 start1 = l1->offset; + u64 end1 = end_offset(start1, l1->length); + u64 start2 = l2->offset; + u64 end2 = end_offset(start2, l2->length); + + return (start1 <= start2) && (end1 >= end2); +} + +/* + * is l1 and l2 intersecting? + * start1 end1 + * [----------------------------------) + * start2 end2 + * [----------------) + */ +static inline int +lo_seg_intersecting(struct pnfs_layout_range *l1, + struct pnfs_layout_range *l2) +{ + u64 start1 = l1->offset; + u64 end1 = end_offset(start1, l1->length); + u64 start2 = l2->offset; + u64 end2 = end_offset(start2, l2->length); + + return (end1 == NFS4_MAX_UINT64 || end1 > start2) && + (end2 == NFS4_MAX_UINT64 || end2 > start1); +} + static bool should_free_lseg(u32 lseg_iomode, u32 recall_iomode) { @@ -467,7 +526,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, static struct pnfs_layout_segment * send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_open_context *ctx, - u32 iomode, + struct pnfs_layout_range *range, gfp_t gfp_flags) { struct inode *ino = lo->plh_inode; @@ -499,11 +558,11 @@ send_layoutget(struct pnfs_layout_hdr *lo, goto out_err_free; } - lgp->args.minlength = NFS4_MAX_UINT64; + lgp->args.minlength = PAGE_CACHE_SIZE; + if (lgp->args.minlength > range->length) + lgp->args.minlength = range->length; lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; - lgp->args.range.iomode = iomode; - lgp->args.range.offset = 0; - lgp->args.range.length = NFS4_MAX_UINT64; + lgp->args.range = *range; lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); @@ -518,7 +577,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, nfs4_proc_layoutget(lgp); if (!lseg) { /* remember that LAYOUTGET failed and suspend trying */ - set_bit(lo_fail_bit(iomode), &lo->plh_flags); + set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); } /* free xdr pages */ @@ -625,10 +684,23 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier) * are seen first. */ static s64 -cmp_layout(u32 iomode1, u32 iomode2) +cmp_layout(struct pnfs_layout_range *l1, + struct pnfs_layout_range *l2) { + s64 d; + + /* high offset > low offset */ + d = l1->offset - l2->offset; + if (d) + return d; + + /* short length > long length */ + d = l2->length - l1->length; + if (d) + return d; + /* read > read/write */ - return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); + return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); } static void @@ -636,13 +708,12 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { struct pnfs_layout_segment *lp; - int found = 0; dprintk("%s:Begin\n", __func__); assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lp, &lo->plh_segs, pls_list) { - if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) + if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) continue; list_add_tail(&lseg->pls_list, &lp->pls_list); dprintk("%s: inserted lseg %p " @@ -652,16 +723,14 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, lseg->pls_range.offset, lseg->pls_range.length, lp, lp->pls_range.iomode, lp->pls_range.offset, lp->pls_range.length); - found = 1; - break; - } - if (!found) { - list_add_tail(&lseg->pls_list, &lo->plh_segs); - dprintk("%s: inserted lseg %p " - "iomode %d offset %llu length %llu at tail\n", - __func__, lseg, lseg->pls_range.iomode, - lseg->pls_range.offset, lseg->pls_range.length); + goto out; } + list_add_tail(&lseg->pls_list, &lo->plh_segs); + dprintk("%s: inserted lseg %p " + "iomode %d offset %llu length %llu at tail\n", + __func__, lseg, lseg->pls_range.iomode, + lseg->pls_range.offset, lseg->pls_range.length); +out: get_layout_hdr(lo); dprintk("%s:Return\n", __func__); @@ -721,16 +790,28 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) * READ RW true */ static int -is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) +is_matching_lseg(struct pnfs_layout_range *ls_range, + struct pnfs_layout_range *range) { - return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW); + struct pnfs_layout_range range1; + + if ((range->iomode == IOMODE_RW && + ls_range->iomode != IOMODE_RW) || + !lo_seg_intersecting(ls_range, range)) + return 0; + + /* range1 covers only the first byte in the range */ + range1 = *range; + range1.length = 1; + return lo_seg_contained(ls_range, &range1); } /* * lookup range in layout */ static struct pnfs_layout_segment * -pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) +pnfs_find_lseg(struct pnfs_layout_hdr *lo, + struct pnfs_layout_range *range) { struct pnfs_layout_segment *lseg, *ret = NULL; @@ -739,11 +820,11 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && - is_matching_lseg(lseg, iomode)) { + is_matching_lseg(&lseg->pls_range, range)) { ret = get_lseg(lseg); break; } - if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) + if (cmp_layout(range, &lseg->pls_range) > 0) break; } @@ -759,9 +840,16 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, + loff_t pos, + u64 count, enum pnfs_iomode iomode, gfp_t gfp_flags) { + struct pnfs_layout_range arg = { + .iomode = iomode, + .offset = pos, + .length = count, + }; struct nfs_inode *nfsi = NFS_I(ino); struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; struct pnfs_layout_hdr *lo; @@ -789,7 +877,7 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; /* Check to see if the layout for the given range already exists */ - lseg = pnfs_find_lseg(lo, iomode); + lseg = pnfs_find_lseg(lo, &arg); if (lseg) goto out_unlock; @@ -811,7 +899,7 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&clp->cl_lock); } - lseg = send_layoutget(lo, ctx, iomode, gfp_flags); + lseg = send_layoutget(lo, ctx, &arg, gfp_flags); if (!lseg && first) { spin_lock(&clp->cl_lock); list_del_init(&lo->plh_layouts); @@ -838,17 +926,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; int status = 0; - /* Verify we got what we asked for. - * Note that because the xdr parsing only accepts a single - * element array, this can fail even if the server is behaving - * correctly. - */ - if (lgp->args.range.iomode > res->range.iomode || - res->range.offset != 0 || - res->range.length != NFS4_MAX_UINT64) { - status = -EINVAL; - goto out; - } /* Inject layout blob into I/O device driver */ lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); if (!lseg || IS_ERR(lseg)) { @@ -903,9 +980,14 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, + req_offset(req), + pgio->pg_count, IOMODE_READ, GFP_KERNEL); - } + } else if (pgio->pg_lseg && + req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length)) + return 0; return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } @@ -926,9 +1008,14 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, + req_offset(req), + pgio->pg_count, IOMODE_RW, GFP_NOFS); - } + } else if (pgio->pg_lseg && + req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length)) + return 0; return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 5b083d29533..78f8a4a171b 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -131,7 +131,8 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode access_type, gfp_t gfp_flags); + loff_t pos, u64 count, enum pnfs_iomode access_type, + gfp_t gfp_flags); void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, @@ -271,7 +272,8 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) static inline struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode access_type, gfp_t gfp_flags) + loff_t pos, u64 count, enum pnfs_iomode access_type, + gfp_t gfp_flags) { return NULL; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2bcf0dc306a..540c8bc93f9 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -288,7 +288,9 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) atomic_set(&req->wb_complete, requests); BUG_ON(desc->pg_lseg != NULL); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, + req_offset(req), desc->pg_count, + IOMODE_READ, GFP_KERNEL); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -351,7 +353,9 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, + req_offset(req), desc->pg_count, + IOMODE_READ, GFP_KERNEL); ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 0, lseg); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 49c715b4ac9..7edb72f27c2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -939,7 +939,9 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) atomic_set(&req->wb_complete, requests); BUG_ON(desc->pg_lseg); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, + req_offset(req), desc->pg_count, + IOMODE_RW, GFP_NOFS); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -1013,7 +1015,9 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, + req_offset(req), desc->pg_count, + IOMODE_RW, GFP_NOFS); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) -- cgit v1.2.3-70-g09d2 From 707ed5fdb587c71fdb7ad224ba1d80231f33c974 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:47:46 +0300 Subject: pnfs: align layoutget requests on page boundaries Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c2f09e9b670..2357ee343f4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -850,6 +850,7 @@ pnfs_update_layout(struct inode *ino, .offset = pos, .length = count, }; + unsigned pg_offset; struct nfs_inode *nfsi = NFS_I(ino); struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; struct pnfs_layout_hdr *lo; @@ -899,6 +900,13 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&clp->cl_lock); } + pg_offset = arg.offset & ~PAGE_CACHE_MASK; + if (pg_offset) { + arg.offset -= pg_offset; + arg.length += pg_offset; + } + arg.length = PAGE_CACHE_ALIGN(arg.length); + lseg = send_layoutget(lo, ctx, &arg, gfp_flags); if (!lseg && first) { spin_lock(&clp->cl_lock); -- cgit v1.2.3-70-g09d2 From 778b5502fdba5b183553f3f2ef1672ba78ac58b6 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:48:02 +0300 Subject: pnfs: Use byte-range for cb_layoutrecall Use recalled range to invalidate particular layout segments in the layout cache. Signed-off-by: Benny Halevy --- fs/nfs/callback_proc.c | 4 ++-- fs/nfs/pnfs.c | 15 +++++++++------ fs/nfs/pnfs.h | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c73e7b2fb8e..d4d1954e9bb 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -139,7 +139,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, spin_lock(&ino->i_lock); if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || mark_matching_lsegs_invalid(lo, &free_me_list, - args->cbl_range.iomode)) + &args->cbl_range)) rv = NFS4ERR_DELAY; else rv = NFS4ERR_NOMATCHING_LAYOUT; @@ -184,7 +184,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, ino = lo->plh_inode; spin_lock(&ino->i_lock); set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); - if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode)) + if (mark_matching_lsegs_invalid(lo, &free_me_list, &range)) rv = NFS4ERR_DELAY; list_del_init(&lo->plh_bulk_recall); spin_unlock(&ino->i_lock); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2357ee343f4..20436a5e76c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -321,10 +321,12 @@ lo_seg_intersecting(struct pnfs_layout_range *l1, } static bool -should_free_lseg(u32 lseg_iomode, u32 recall_iomode) +should_free_lseg(struct pnfs_layout_range *lseg_range, + struct pnfs_layout_range *recall_range) { - return (recall_iomode == IOMODE_ANY || - lseg_iomode == recall_iomode); + return (recall_range->iomode == IOMODE_ANY || + lseg_range->iomode == recall_range->iomode) && + lo_seg_intersecting(lseg_range, recall_range); } /* Returns 1 if lseg is removed from list, 0 otherwise */ @@ -355,7 +357,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, - u32 iomode) + struct pnfs_layout_range *recall_range) { struct pnfs_layout_segment *lseg, *next; int invalid = 0, removed = 0; @@ -368,7 +370,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return 0; } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) - if (should_free_lseg(lseg->pls_range.iomode, iomode)) { + if (!recall_range || + should_free_lseg(&lseg->pls_range, recall_range)) { dprintk("%s: freeing lseg %p iomode %d " "offset %llu length %llu\n", __func__, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, @@ -417,7 +420,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) lo = nfsi->layout; if (lo) { lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ - mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); + mark_matching_lsegs_invalid(lo, &tmp_list, NULL); } spin_unlock(&nfsi->vfs_inode.i_lock); pnfs_free_lseg_list(&tmp_list); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 78f8a4a171b..f37ab3539cb 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -154,7 +154,7 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct nfs4_state *open_state); int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, - u32 iomode); + struct pnfs_layout_range *recall_range); bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); -- cgit v1.2.3-70-g09d2 From ae50c0b5c6f6fa340f1fe2d244b358145f7e5a15 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 22 May 2011 19:48:21 +0300 Subject: pnfs: client stats A pNFS client auto-negotiates a lot of features (minorversion level, pNFS layout type, etc.). This is convenient, but makes certain kinds of failures hard for a user to detect. For example, if the client falls back on 4.0, or falls back to MDS IO because the user didn't connect to the right iscsi disks before mounting, the only symptoms may be reduced performance, which may not be noticed till long after the actual failure, and may be difficult for a user to diagnose. However, such "failures" may also be perfectly normal in some cases, so we don't want to spam the system logs with them. One approach would be to put some more information into /proc/self/mountstats. Signed-off-by: J. Bruce Fields Signed-off-by: Benny Halevy [pnfs: add commit client stats] [fixup data types for "ret" variables in pnfs_try_to* inline funcs.] Signed-off-by: Benny Halevy [fix definition of show_pnfs for !CONFIG_PNFS] Signed-off-by: Benny Halevy [nfs41: Fix show_sessions in the not CONFIG_NFS_V4_1 case] There is a build error when CONFIG_NFS_V4 is set but CONFIG_NFS_V4_1 is *not* set. show_sessions() prototype was unbalanced between the two cases. Signed-off-by: Boaz Harrosh [pnfs: super.c remove CONFIG_PNFS] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy --- fs/nfs/super.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e288f06d3fa..ce40e5c568b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -63,6 +63,7 @@ #include "iostat.h" #include "internal.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -732,6 +733,28 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } +#ifdef CONFIG_NFS_V4_1 +void show_sessions(struct seq_file *m, struct nfs_server *server) +{ + if (nfs4_has_session(server->nfs_client)) + seq_printf(m, ",sessions"); +} +#else +void show_sessions(struct seq_file *m, struct nfs_server *server) {} +#endif + +#ifdef CONFIG_NFS_V4_1 +void show_pnfs(struct seq_file *m, struct nfs_server *server) +{ + seq_printf(m, ",pnfs="); + if (server->pnfs_curr_ld) + seq_printf(m, "%s", server->pnfs_curr_ld->name); + else + seq_printf(m, "not configured"); +} +#else /* CONFIG_NFS_V4_1 */ +void show_pnfs(struct seq_file *m, struct nfs_server *server) {} +#endif /* CONFIG_NFS_V4_1 */ static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) { @@ -792,6 +815,8 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); + show_sessions(m, nfss); + show_pnfs(m, nfss); } #endif -- cgit v1.2.3-70-g09d2 From c93407d03c3ccf60b33a309e5fcd41cd98969901 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:49:06 +0300 Subject: pnfs-obj: objlayoutdriver module skeleton * Define the PNFS_OBJLAYOUT Kconfig option in the nfs master Kconfig file. * Add the objlayout driver to the Kernel's Kbuild system. * Add the fs/nfs/objlayout/Kbuild file for building the objlayoutdriver.ko driver * Define fs/nfs/objlayout/objio_osd.c, register the driver on module initialization and unregister on exit. [pnfs-obj: remove of CONFIG_PNFS fallout] Signed-off-by: Boaz Harrosh [added "unsure" clause] [depend on NFS_V4_1] Signed-off-by: Benny Halevy --- fs/nfs/Kconfig | 10 ++++++ fs/nfs/Makefile | 2 ++ fs/nfs/objlayout/Kbuild | 5 +++ fs/nfs/objlayout/objio_osd.c | 76 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 93 insertions(+) create mode 100644 fs/nfs/objlayout/Kbuild create mode 100644 fs/nfs/objlayout/objio_osd.c diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index ba306658a6d..81515545ba7 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -87,6 +87,16 @@ config NFS_V4_1 config PNFS_FILE_LAYOUT tristate +config PNFS_OBJLAYOUT + tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)" + depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD + help + Say M here if you want your pNFS client to support the Objects Layout Driver. + Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and + upper level driver (SCSI_OSD_ULD). + + If unsure, say N. + config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 7516a8aaa42..6a34f7dd0e6 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -21,3 +21,5 @@ nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o + +obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild new file mode 100644 index 00000000000..2e5b9a44773 --- /dev/null +++ b/fs/nfs/objlayout/Kbuild @@ -0,0 +1,5 @@ +# +# Makefile for the pNFS Objects Layout Driver kernel module +# +objlayoutdriver-y := objio_osd.o +obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c new file mode 100644 index 00000000000..379595f2d1c --- /dev/null +++ b/fs/nfs/objlayout/objio_osd.c @@ -0,0 +1,76 @@ +/* + * pNFS Objects layout implementation over open-osd initiator library + * + * Copyright (C) 2009 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy + * Boaz Harrosh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "../pnfs.h" + +static struct pnfs_layoutdriver_type objlayout_type = { + .id = LAYOUT_OSD2_OBJECTS, + .name = "LAYOUT_OSD2_OBJECTS", +}; + +MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); +MODULE_AUTHOR("Benny Halevy "); +MODULE_LICENSE("GPL"); + +static int __init +objlayout_init(void) +{ + int ret = pnfs_register_layoutdriver(&objlayout_type); + + if (ret) + printk(KERN_INFO + "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", + __func__, ret); + else + printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", + __func__); + return ret; +} + +static void __exit +objlayout_exit(void) +{ + pnfs_unregister_layoutdriver(&objlayout_type); + printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", + __func__); +} + +module_init(objlayout_init); +module_exit(objlayout_exit); -- cgit v1.2.3-70-g09d2 From 38b7c401f6ade50543f246c4bc2c971edf2b19dd Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:49:32 +0300 Subject: pnfs-obj: pnfs_osd XDR definitions * Add the pnfs_osd_xdr.h header * defintions the pnfs_osd_layout structure including all it's sub-types and constants. * Declare the pnfs_osd_xdr_decode_layout API + all needed inline helpers. * Define the pnfs_osd_deviceaddr structure and all its subtypes and constants. * Declare API for decoding of a pnfs_osd_deviceaddr from XDR stream. * Define the pnfs_osd_ioerr structure, its substructures and constants. * Declare API for encoding of a pnfs_osd_ioerr into XDR stream. * Define the pnfs_osd_layoutupdate structure and its substructures. * Declare API for encoding of a pnfs_osd_layoutupdate into XDR stream. [Remove server definitions] Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- include/linux/pnfs_osd_xdr.h | 345 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 345 insertions(+) create mode 100644 include/linux/pnfs_osd_xdr.h diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h new file mode 100644 index 00000000000..76efbdd0162 --- /dev/null +++ b/include/linux/pnfs_osd_xdr.h @@ -0,0 +1,345 @@ +/* + * pNFS-osd on-the-wire data structures + * + * Copyright (C) 2007 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy + * Boaz Harrosh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __PNFS_OSD_XDR_H__ +#define __PNFS_OSD_XDR_H__ + +#include +#include +#include + +#define PNFS_OSD_OSDNAME_MAXSIZE 256 + +/* + * draft-ietf-nfsv4-minorversion-22 + * draft-ietf-nfsv4-pnfs-obj-12 + */ + +/* Layout Structure */ + +enum pnfs_osd_raid_algorithm4 { + PNFS_OSD_RAID_0 = 1, + PNFS_OSD_RAID_4 = 2, + PNFS_OSD_RAID_5 = 3, + PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ +}; + +/* struct pnfs_osd_data_map4 { + * uint32_t odm_num_comps; + * length4 odm_stripe_unit; + * uint32_t odm_group_width; + * uint32_t odm_group_depth; + * uint32_t odm_mirror_cnt; + * pnfs_osd_raid_algorithm4 odm_raid_algorithm; + * }; + */ +struct pnfs_osd_data_map { + u32 odm_num_comps; + u64 odm_stripe_unit; + u32 odm_group_width; + u32 odm_group_depth; + u32 odm_mirror_cnt; + u32 odm_raid_algorithm; +}; + +/* struct pnfs_osd_objid4 { + * deviceid4 oid_device_id; + * uint64_t oid_partition_id; + * uint64_t oid_object_id; + * }; + */ +struct pnfs_osd_objid { + struct nfs4_deviceid oid_device_id; + u64 oid_partition_id; + u64 oid_object_id; +}; + +/* For printout. I use: + * kprint("dev(%llx:%llx)", _DEVID_LO(pointer), _DEVID_HI(pointer)); + * BE style + */ +#define _DEVID_LO(oid_device_id) \ + (unsigned long long)be64_to_cpup((__be64 *)(oid_device_id)->data) + +#define _DEVID_HI(oid_device_id) \ + (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1) + +static inline int +pnfs_osd_objid_xdr_sz(void) +{ + return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2; +} + +enum pnfs_osd_version { + PNFS_OSD_MISSING = 0, + PNFS_OSD_VERSION_1 = 1, + PNFS_OSD_VERSION_2 = 2 +}; + +struct pnfs_osd_opaque_cred { + u32 cred_len; + void *cred; +}; + +enum pnfs_osd_cap_key_sec { + PNFS_OSD_CAP_KEY_SEC_NONE = 0, + PNFS_OSD_CAP_KEY_SEC_SSV = 1, +}; + +/* struct pnfs_osd_object_cred4 { + * pnfs_osd_objid4 oc_object_id; + * pnfs_osd_version4 oc_osd_version; + * pnfs_osd_cap_key_sec4 oc_cap_key_sec; + * opaque oc_capability_key<>; + * opaque oc_capability<>; + * }; + */ +struct pnfs_osd_object_cred { + struct pnfs_osd_objid oc_object_id; + u32 oc_osd_version; + u32 oc_cap_key_sec; + struct pnfs_osd_opaque_cred oc_cap_key; + struct pnfs_osd_opaque_cred oc_cap; +}; + +/* struct pnfs_osd_layout4 { + * pnfs_osd_data_map4 olo_map; + * uint32_t olo_comps_index; + * pnfs_osd_object_cred4 olo_components<>; + * }; + */ +struct pnfs_osd_layout { + struct pnfs_osd_data_map olo_map; + u32 olo_comps_index; + u32 olo_num_comps; + struct pnfs_osd_object_cred *olo_comps; +}; + +/* Device Address */ +enum pnfs_osd_targetid_type { + OBJ_TARGET_ANON = 1, + OBJ_TARGET_SCSI_NAME = 2, + OBJ_TARGET_SCSI_DEVICE_ID = 3, +}; + +/* union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) { + * case OBJ_TARGET_SCSI_NAME: + * string oti_scsi_name<>; + * + * case OBJ_TARGET_SCSI_DEVICE_ID: + * opaque oti_scsi_device_id<>; + * + * default: + * void; + * }; + * + * union pnfs_osd_targetaddr4 switch (bool ota_available) { + * case TRUE: + * netaddr4 ota_netaddr; + * case FALSE: + * void; + * }; + * + * struct pnfs_osd_deviceaddr4 { + * pnfs_osd_targetid4 oda_targetid; + * pnfs_osd_targetaddr4 oda_targetaddr; + * uint64_t oda_lun; + * opaque oda_systemid<>; + * pnfs_osd_object_cred4 oda_root_obj_cred; + * opaque oda_osdname<>; + * }; + */ +struct pnfs_osd_targetid { + u32 oti_type; + struct nfs4_string oti_scsi_device_id; +}; + +enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 }; + +/* struct netaddr4 { + * // see struct rpcb in RFC1833 + * string r_netid<>; // network id + * string r_addr<>; // universal address + * }; + */ +struct pnfs_osd_net_addr { + struct nfs4_string r_netid; + struct nfs4_string r_addr; +}; + +struct pnfs_osd_targetaddr { + u32 ota_available; + struct pnfs_osd_net_addr ota_netaddr; +}; + +enum { + NETWORK_ID_MAX = 16 / 4, + UNIVERSAL_ADDRESS_MAX = 64 / 4, + PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX, +}; + +struct pnfs_osd_deviceaddr { + struct pnfs_osd_targetid oda_targetid; + struct pnfs_osd_targetaddr oda_targetaddr; + u8 oda_lun[8]; + struct nfs4_string oda_systemid; + struct pnfs_osd_object_cred oda_root_obj_cred; + struct nfs4_string oda_osdname; +}; + +enum { + ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4, + PNFS_OSD_DEVICEADDR_MAX = + PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX + + 2 /*oda_lun*/ + + 1 + OSD_SYSTEMID_LEN + + 1 + ODA_OSDNAME_MAX, +}; + +/* LAYOUTCOMMIT: layoutupdate */ + +/* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) { + * case TRUE: + * int64_t dsu_delta; + * case FALSE: + * void; + * }; + * + * struct pnfs_osd_layoutupdate4 { + * pnfs_osd_deltaspaceused4 olu_delta_space_used; + * bool olu_ioerr_flag; + * }; + */ +struct pnfs_osd_layoutupdate { + u32 dsu_valid; + s64 dsu_delta; + u32 olu_ioerr_flag; +}; + +/* LAYOUTRETURN: I/O Rrror Report */ + +enum pnfs_osd_errno { + PNFS_OSD_ERR_EIO = 1, + PNFS_OSD_ERR_NOT_FOUND = 2, + PNFS_OSD_ERR_NO_SPACE = 3, + PNFS_OSD_ERR_BAD_CRED = 4, + PNFS_OSD_ERR_NO_ACCESS = 5, + PNFS_OSD_ERR_UNREACHABLE = 6, + PNFS_OSD_ERR_RESOURCE = 7 +}; + +/* struct pnfs_osd_ioerr4 { + * pnfs_osd_objid4 oer_component; + * length4 oer_comp_offset; + * length4 oer_comp_length; + * bool oer_iswrite; + * pnfs_osd_errno4 oer_errno; + * }; + */ +struct pnfs_osd_ioerr { + struct pnfs_osd_objid oer_component; + u64 oer_comp_offset; + u64 oer_comp_length; + u32 oer_iswrite; + u32 oer_errno; +}; + +/* OSD XDR API */ +/* Layout helpers */ +/* Layout decoding is done in two parts: + * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part + * of the layout. @iter members need not be initialized. + * Returned: + * @layout members are set. (@layout->olo_comps set to NULL). + * + * Zero on success, or negative error if passed xdr is broken. + * + * 2. 2nd Call pnfs_osd_xdr_decode_layout_comp() in a loop until it returns + * false, to decode the next component. + * Returned: + * true if there is more to decode or false if we are done or error. + * + * Example: + * struct pnfs_osd_xdr_decode_layout_iter iter; + * struct pnfs_osd_layout layout; + * struct pnfs_osd_object_cred comp; + * int status; + * + * status = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); + * if (unlikely(status)) + * goto err; + * while(pnfs_osd_xdr_decode_layout_comp(&comp, &iter, xdr, &status)) { + * // All of @comp strings point to inside the xdr_buffer + * // or scrach buffer. Copy them out to user memory eg. + * copy_single_comp(dest_comp++, &comp); + * } + * if (unlikely(status)) + * goto err; + */ + +struct pnfs_osd_xdr_decode_layout_iter { + unsigned total_comps; + unsigned decoded_comps; +}; + +extern int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr); + +extern bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, + int *err); + +/* Device Info helpers */ + +/* Note: All strings inside @deviceaddr point to space inside @p. + * @p should stay valid while @deviceaddr is in use. + */ +extern void pnfs_osd_xdr_decode_deviceaddr( + struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p); + +/* layoutupdate (layout_commit) xdr helpers */ +extern int +pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, + struct pnfs_osd_layoutupdate *lou); + +/* osd_ioerror encoding/decoding (layout_return) */ +/* Client */ +extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr); +extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr); + +#endif /* __PNFS_OSD_XDR_H__ */ -- cgit v1.2.3-70-g09d2 From f1bc893a89d012649e4e7f43575b2c290e08ee42 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 22 May 2011 19:49:57 +0300 Subject: pnfs-obj: pnfs_osd XDR client implementation * Add the fs/nfs/objlayout/pnfs_osd_xdr_cli.c file, which will include the XDR encode/decode implementations for the pNFS client objlayout driver. [Wrong type in comments] Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- fs/nfs/objlayout/Kbuild | 2 +- fs/nfs/objlayout/pnfs_osd_xdr_cli.c | 412 ++++++++++++++++++++++++++++++++++++ 2 files changed, 413 insertions(+), 1 deletion(-) create mode 100644 fs/nfs/objlayout/pnfs_osd_xdr_cli.c diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild index 2e5b9a44773..7b2a5a24065 100644 --- a/fs/nfs/objlayout/Kbuild +++ b/fs/nfs/objlayout/Kbuild @@ -1,5 +1,5 @@ # # Makefile for the pNFS Objects Layout Driver kernel module # -objlayoutdriver-y := objio_osd.o +objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c new file mode 100644 index 00000000000..16fc758e912 --- /dev/null +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c @@ -0,0 +1,412 @@ +/* + * Object-Based pNFS Layout XDR layer + * + * Copyright (C) 2007 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy + * Boaz Harrosh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +/* + * The following implementation is based on RFC5664 + */ + +/* + * struct pnfs_osd_objid { + * struct nfs4_deviceid oid_device_id; + * u64 oid_partition_id; + * u64 oid_object_id; + * }; // xdr size 32 bytes + */ +static __be32 * +_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid) +{ + p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data, + sizeof(objid->oid_device_id.data)); + + p = xdr_decode_hyper(p, &objid->oid_partition_id); + p = xdr_decode_hyper(p, &objid->oid_object_id); + return p; +} +/* + * struct pnfs_osd_opaque_cred { + * u32 cred_len; + * void *cred; + * }; // xdr size [variable] + * The return pointers are from the xdr buffer + */ +static int +_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred, + struct xdr_stream *xdr) +{ + __be32 *p = xdr_inline_decode(xdr, 1); + + if (!p) + return -EINVAL; + + opaque_cred->cred_len = be32_to_cpu(*p++); + + p = xdr_inline_decode(xdr, opaque_cred->cred_len); + if (!p) + return -EINVAL; + + opaque_cred->cred = p; + return 0; +} + +/* + * struct pnfs_osd_object_cred { + * struct pnfs_osd_objid oc_object_id; + * u32 oc_osd_version; + * u32 oc_cap_key_sec; + * struct pnfs_osd_opaque_cred oc_cap_key + * struct pnfs_osd_opaque_cred oc_cap; + * }; // xdr size 32 + 4 + 4 + [variable] + [variable] + */ +static int +_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp, + struct xdr_stream *xdr) +{ + __be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4); + int ret; + + if (!p) + return -EIO; + + p = _osd_xdr_decode_objid(p, &comp->oc_object_id); + comp->oc_osd_version = be32_to_cpup(p++); + comp->oc_cap_key_sec = be32_to_cpup(p); + + ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr); + if (unlikely(ret)) + return ret; + + ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr); + return ret; +} + +/* + * struct pnfs_osd_data_map { + * u32 odm_num_comps; + * u64 odm_stripe_unit; + * u32 odm_group_width; + * u32 odm_group_depth; + * u32 odm_mirror_cnt; + * u32 odm_raid_algorithm; + * }; // xdr size 4 + 8 + 4 + 4 + 4 + 4 + */ +static inline int +_osd_data_map_xdr_sz(void) +{ + return 4 + 8 + 4 + 4 + 4 + 4; +} + +static __be32 * +_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map) +{ + data_map->odm_num_comps = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &data_map->odm_stripe_unit); + data_map->odm_group_width = be32_to_cpup(p++); + data_map->odm_group_depth = be32_to_cpup(p++); + data_map->odm_mirror_cnt = be32_to_cpup(p++); + data_map->odm_raid_algorithm = be32_to_cpup(p++); + dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u " + "odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n", + __func__, + data_map->odm_num_comps, + (unsigned long long)data_map->odm_stripe_unit, + data_map->odm_group_width, + data_map->odm_group_depth, + data_map->odm_mirror_cnt, + data_map->odm_raid_algorithm); + return p; +} + +int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr) +{ + __be32 *p; + + memset(iter, 0, sizeof(*iter)); + + p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4); + if (unlikely(!p)) + return -EINVAL; + + p = _osd_xdr_decode_data_map(p, &layout->olo_map); + layout->olo_comps_index = be32_to_cpup(p++); + layout->olo_num_comps = be32_to_cpup(p++); + iter->total_comps = layout->olo_num_comps; + return 0; +} + +bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, + int *err) +{ + BUG_ON(iter->decoded_comps > iter->total_comps); + if (iter->decoded_comps == iter->total_comps) + return false; + + *err = _osd_xdr_decode_object_cred(comp, xdr); + if (unlikely(*err)) { + dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d " + "total_comps=%d\n", __func__, *err, + iter->decoded_comps, iter->total_comps); + return false; /* stop the loop */ + } + dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx " + "key_len=%u cap_len=%u\n", + __func__, + _DEVID_LO(&comp->oc_object_id.oid_device_id), + _DEVID_HI(&comp->oc_object_id.oid_device_id), + comp->oc_object_id.oid_partition_id, + comp->oc_object_id.oid_object_id, + comp->oc_cap_key.cred_len, comp->oc_cap.cred_len); + + iter->decoded_comps++; + return true; +} + +/* + * Get Device Information Decoding + * + * Note: since Device Information is currently done synchronously, all + * variable strings fields are left inside the rpc buffer and are only + * pointed to by the pnfs_osd_deviceaddr members. So the read buffer + * should not be freed while the returned information is in use. + */ +/* + *struct nfs4_string { + * unsigned int len; + * char *data; + *}; // size [variable] + * NOTE: Returned string points to inside the XDR buffer + */ +static __be32 * +__read_u8_opaque(__be32 *p, struct nfs4_string *str) +{ + str->len = be32_to_cpup(p++); + str->data = (char *)p; + + p += XDR_QUADLEN(str->len); + return p; +} + +/* + * struct pnfs_osd_targetid { + * u32 oti_type; + * struct nfs4_string oti_scsi_device_id; + * };// size 4 + [variable] + */ +static __be32 * +__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid) +{ + u32 oti_type; + + oti_type = be32_to_cpup(p++); + targetid->oti_type = oti_type; + + switch (oti_type) { + case OBJ_TARGET_SCSI_NAME: + case OBJ_TARGET_SCSI_DEVICE_ID: + p = __read_u8_opaque(p, &targetid->oti_scsi_device_id); + } + + return p; +} + +/* + * struct pnfs_osd_net_addr { + * struct nfs4_string r_netid; + * struct nfs4_string r_addr; + * }; + */ +static __be32 * +__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr) +{ + p = __read_u8_opaque(p, &netaddr->r_netid); + p = __read_u8_opaque(p, &netaddr->r_addr); + + return p; +} + +/* + * struct pnfs_osd_targetaddr { + * u32 ota_available; + * struct pnfs_osd_net_addr ota_netaddr; + * }; + */ +static __be32 * +__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr) +{ + u32 ota_available; + + ota_available = be32_to_cpup(p++); + targetaddr->ota_available = ota_available; + + if (ota_available) + p = __read_net_addr(p, &targetaddr->ota_netaddr); + + + return p; +} + +/* + * struct pnfs_osd_deviceaddr { + * struct pnfs_osd_targetid oda_targetid; + * struct pnfs_osd_targetaddr oda_targetaddr; + * u8 oda_lun[8]; + * struct nfs4_string oda_systemid; + * struct pnfs_osd_object_cred oda_root_obj_cred; + * struct nfs4_string oda_osdname; + * }; + */ + +/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does + * not have an xdr_stream + */ +static __be32 * +__read_opaque_cred(__be32 *p, + struct pnfs_osd_opaque_cred *opaque_cred) +{ + opaque_cred->cred_len = be32_to_cpu(*p++); + opaque_cred->cred = p; + return p + XDR_QUADLEN(opaque_cred->cred_len); +} + +static __be32 * +__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp) +{ + p = _osd_xdr_decode_objid(p, &comp->oc_object_id); + comp->oc_osd_version = be32_to_cpup(p++); + comp->oc_cap_key_sec = be32_to_cpup(p++); + + p = __read_opaque_cred(p, &comp->oc_cap_key); + p = __read_opaque_cred(p, &comp->oc_cap); + return p; +} + +void pnfs_osd_xdr_decode_deviceaddr( + struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p) +{ + p = __read_targetid(p, &deviceaddr->oda_targetid); + + p = __read_targetaddr(p, &deviceaddr->oda_targetaddr); + + p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun, + sizeof(deviceaddr->oda_lun)); + + p = __read_u8_opaque(p, &deviceaddr->oda_systemid); + + p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred); + + p = __read_u8_opaque(p, &deviceaddr->oda_osdname); + + /* libosd likes this terminated in dbg. It's last, so no problems */ + deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0; +} + +/* + * struct pnfs_osd_layoutupdate { + * u32 dsu_valid; + * s64 dsu_delta; + * u32 olu_ioerr_flag; + * }; xdr size 4 + 8 + 4 + */ +int +pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, + struct pnfs_osd_layoutupdate *lou) +{ + __be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4); + + if (!p) + return -E2BIG; + + *p++ = cpu_to_be32(lou->dsu_valid); + if (lou->dsu_valid) + p = xdr_encode_hyper(p, lou->dsu_delta); + *p++ = cpu_to_be32(lou->olu_ioerr_flag); + return 0; +} + +/* + * struct pnfs_osd_objid { + * struct nfs4_deviceid oid_device_id; + * u64 oid_partition_id; + * u64 oid_object_id; + * }; // xdr size 32 bytes + */ +static inline __be32 * +pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id) +{ + p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data, + sizeof(object_id->oid_device_id.data)); + p = xdr_encode_hyper(p, object_id->oid_partition_id); + p = xdr_encode_hyper(p, object_id->oid_object_id); + + return p; +} + +/* + * struct pnfs_osd_ioerr { + * struct pnfs_osd_objid oer_component; + * u64 oer_comp_offset; + * u64 oer_comp_length; + * u32 oer_iswrite; + * u32 oer_errno; + * }; // xdr size 32 + 24 bytes + */ +void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr) +{ + p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component); + p = xdr_encode_hyper(p, ioerr->oer_comp_offset); + p = xdr_encode_hyper(p, ioerr->oer_comp_length); + *p++ = cpu_to_be32(ioerr->oer_iswrite); + *p = cpu_to_be32(ioerr->oer_errno); +} + +__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 32 + 24); + if (unlikely(!p)) + dprintk("%s: out of xdr space\n", __func__); + + return p; +} -- cgit v1.2.3-70-g09d2 From 09f5bf4e6d0607399c16ec7a2d8d166f31086686 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 22 May 2011 19:50:20 +0300 Subject: pnfs-obj: decode layout, alloc/free lseg objlayout_alloc_lseg prepares an xdr_stream and calls the raid engins objio_alloc_lseg() to allocate a private pnfs_layout_segment. objio_osd.c::objio_alloc_lseg() uses passed xdr_stream to decode and store the layout_segment information in an objio_segment struct, using the pnfs_osd_xdr.h API for the actual parsing the layout xdr. objlayout_free_lseg calls objio_free_lseg() to free the allocated space. Signed-off-by: Boaz Harrosh [gfp_flags] [removed "extern" from function definitions] Signed-off-by: Benny Halevy --- fs/nfs/objlayout/Kbuild | 2 +- fs/nfs/objlayout/objio_osd.c | 170 ++++++++++++++++++++++++++++++++++++++++++- fs/nfs/objlayout/objlayout.c | 104 ++++++++++++++++++++++++++ fs/nfs/objlayout/objlayout.h | 67 +++++++++++++++++ 4 files changed, 341 insertions(+), 2 deletions(-) create mode 100644 fs/nfs/objlayout/objlayout.c create mode 100644 fs/nfs/objlayout/objlayout.h diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild index 7b2a5a24065..ed30ea072bb 100644 --- a/fs/nfs/objlayout/Kbuild +++ b/fs/nfs/objlayout/Kbuild @@ -1,5 +1,5 @@ # # Makefile for the pNFS Objects Layout Driver kernel module # -objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o +objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 379595f2d1c..08f1d90b4ce 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -38,11 +38,179 @@ */ #include -#include "../pnfs.h" +#include + +#include "objlayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +#define _LLU(x) ((unsigned long long)x) + +struct caps_buffers { + u8 caps_key[OSD_CRYPTO_KEYID_SIZE]; + u8 creds[OSD_CAP_LEN]; +}; + +struct objio_segment { + struct pnfs_layout_segment lseg; + + struct pnfs_osd_object_cred *comps; + + unsigned mirrors_p1; + unsigned stripe_unit; + unsigned group_width; /* Data stripe_units without integrity comps */ + u64 group_depth; + unsigned group_count; + + unsigned comps_index; + unsigned num_comps; + /* variable length */ + struct objio_dev_ent *ods[]; +}; + +static inline struct objio_segment * +OBJIO_LSEG(struct pnfs_layout_segment *lseg) +{ + return container_of(lseg, struct objio_segment, lseg); +} + +static int _verify_data_map(struct pnfs_osd_layout *layout) +{ + struct pnfs_osd_data_map *data_map = &layout->olo_map; + u64 stripe_length; + u32 group_width; + +/* FIXME: Only raid0 for now. if not go through MDS */ + if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) { + printk(KERN_ERR "Only RAID_0 for now\n"); + return -ENOTSUPP; + } + if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) { + printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n", + data_map->odm_num_comps, data_map->odm_mirror_cnt); + return -EINVAL; + } + + if (data_map->odm_group_width) + group_width = data_map->odm_group_width; + else + group_width = data_map->odm_num_comps / + (data_map->odm_mirror_cnt + 1); + + stripe_length = (u64)data_map->odm_stripe_unit * group_width; + if (stripe_length >= (1ULL << 32)) { + printk(KERN_ERR "Total Stripe length(0x%llx)" + " >= 32bit is not supported\n", _LLU(stripe_length)); + return -ENOTSUPP; + } + + if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) { + printk(KERN_ERR "Stripe Unit(0x%llx)" + " must be Multples of PAGE_SIZE(0x%lx)\n", + _LLU(data_map->odm_stripe_unit), PAGE_SIZE); + return -ENOTSUPP; + } + + return 0; +} + +static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp, + struct pnfs_osd_object_cred *src_comp, + struct caps_buffers *caps_p) +{ + WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key)); + WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds)); + + *cur_comp = *src_comp; + + memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred, + sizeof(caps_p->caps_key)); + cur_comp->oc_cap_key.cred = caps_p->caps_key; + + memcpy(caps_p->creds, src_comp->oc_cap.cred, + sizeof(caps_p->creds)); + cur_comp->oc_cap.cred = caps_p->creds; +} + +int objio_alloc_lseg(struct pnfs_layout_segment **outp, + struct pnfs_layout_hdr *pnfslay, + struct pnfs_layout_range *range, + struct xdr_stream *xdr, + gfp_t gfp_flags) +{ + struct objio_segment *objio_seg; + struct pnfs_osd_xdr_decode_layout_iter iter; + struct pnfs_osd_layout layout; + struct pnfs_osd_object_cred *cur_comp, src_comp; + struct caps_buffers *caps_p; + int err; + + err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); + if (unlikely(err)) + return err; + + err = _verify_data_map(&layout); + if (unlikely(err)) + return err; + + objio_seg = kzalloc(sizeof(*objio_seg) + + sizeof(objio_seg->ods[0]) * layout.olo_num_comps + + sizeof(*objio_seg->comps) * layout.olo_num_comps + + sizeof(struct caps_buffers) * layout.olo_num_comps, + gfp_flags); + if (!objio_seg) + return -ENOMEM; + + objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps); + cur_comp = objio_seg->comps; + caps_p = (void *)(cur_comp + layout.olo_num_comps); + while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) + copy_single_comp(cur_comp++, &src_comp, caps_p++); + if (unlikely(err)) + goto err; + + objio_seg->num_comps = layout.olo_num_comps; + objio_seg->comps_index = layout.olo_comps_index; + + objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; + objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit; + if (layout.olo_map.odm_group_width) { + objio_seg->group_width = layout.olo_map.odm_group_width; + objio_seg->group_depth = layout.olo_map.odm_group_depth; + objio_seg->group_count = layout.olo_map.odm_num_comps / + objio_seg->mirrors_p1 / + objio_seg->group_width; + } else { + objio_seg->group_width = layout.olo_map.odm_num_comps / + objio_seg->mirrors_p1; + objio_seg->group_depth = -1; + objio_seg->group_count = 1; + } + + *outp = &objio_seg->lseg; + return 0; + +err: + kfree(objio_seg); + dprintk("%s: Error: return %d\n", __func__, err); + *outp = NULL; + return err; +} + +void objio_free_lseg(struct pnfs_layout_segment *lseg) +{ + struct objio_segment *objio_seg = OBJIO_LSEG(lseg); + + kfree(objio_seg); +} + static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", + + .alloc_lseg = objlayout_alloc_lseg, + .free_lseg = objlayout_free_lseg, }; MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c new file mode 100644 index 00000000000..946526763d3 --- /dev/null +++ b/fs/nfs/objlayout/objlayout.c @@ -0,0 +1,104 @@ +/* + * pNFS Objects layout driver high level definitions + * + * Copyright (C) 2007 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy + * Boaz Harrosh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "objlayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD +/* + * Unmarshall layout and store it in pnfslay. + */ +struct pnfs_layout_segment * +objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay, + struct nfs4_layoutget_res *lgr, + gfp_t gfp_flags) +{ + int status = -ENOMEM; + struct xdr_stream stream; + struct xdr_buf buf = { + .pages = lgr->layoutp->pages, + .page_len = lgr->layoutp->len, + .buflen = lgr->layoutp->len, + .len = lgr->layoutp->len, + }; + struct page *scratch; + struct pnfs_layout_segment *lseg; + + dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay); + + scratch = alloc_page(gfp_flags); + if (!scratch) + goto err_nofree; + + xdr_init_decode(&stream, &buf, NULL); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + + status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags); + if (unlikely(status)) { + dprintk("%s: objio_alloc_lseg Return err %d\n", __func__, + status); + goto err; + } + + __free_page(scratch); + + dprintk("%s: Return %p\n", __func__, lseg); + return lseg; + +err: + __free_page(scratch); +err_nofree: + dprintk("%s: Err Return=>%d\n", __func__, status); + return ERR_PTR(status); +} + +/* + * Free a layout segement + */ +void +objlayout_free_lseg(struct pnfs_layout_segment *lseg) +{ + dprintk("%s: freeing layout segment %p\n", __func__, lseg); + + if (unlikely(!lseg)) + return; + + objio_free_lseg(lseg); +} + diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h new file mode 100644 index 00000000000..066280a07fa --- /dev/null +++ b/fs/nfs/objlayout/objlayout.h @@ -0,0 +1,67 @@ +/* + * Data types and function declerations for interfacing with the + * pNFS standard object layout driver. + * + * Copyright (C) 2007 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy + * Boaz Harrosh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _OBJLAYOUT_H +#define _OBJLAYOUT_H + +#include +#include +#include "../pnfs.h" + +/* + * Raid engine I/O API + */ +extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, + struct pnfs_layout_hdr *pnfslay, + struct pnfs_layout_range *range, + struct xdr_stream *xdr, + gfp_t gfp_flags); +extern void objio_free_lseg(struct pnfs_layout_segment *lseg); + +/* + * exported generic objects function vectors + */ +extern struct pnfs_layout_segment *objlayout_alloc_lseg( + struct pnfs_layout_hdr *, + struct nfs4_layoutget_res *, + gfp_t gfp_flags); +extern void objlayout_free_lseg(struct pnfs_layout_segment *); + +#endif /* _OBJLAYOUT_H */ -- cgit v1.2.3-70-g09d2 From b6c05f1693115164c7b797152ac7ea3ef8e5d296 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 26 May 2011 21:45:34 +0300 Subject: pnfs-obj: objio_osd device information retrieval and caching When a new layout is received in objio_alloc_lseg all device_ids referenced are retrieved. The device information is queried for from MDS and then the osd_device is looked-up from the osd-initiator library. The devices are cached in a per-mount-point list, for later use. At unmount all devices are "put" back to the library. objlayout_get_deviceinfo(), objlayout_put_deviceinfo() middleware API for retrieving device information given a device_id. TODO: The device cache can get big. Cap its size. Keep an LRU and start to return devices which were not used, when list gets to big, or when new entries allocation fail. [pnfs-obj: Bugs in new global-device-cache code] Signed-off-by: Boaz Harrosh [gfp_flags] [use global device cache] [use layout driver in global device cache] Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objio_osd.c | 157 +++++++++++++++++++++++++++++++++++++++++++ fs/nfs/objlayout/objlayout.c | 68 +++++++++++++++++++ fs/nfs/objlayout/objlayout.h | 8 +++ 3 files changed, 233 insertions(+) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 08f1d90b4ce..2255e2d22d0 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -46,6 +46,68 @@ #define _LLU(x) ((unsigned long long)x) +struct objio_dev_ent { + struct nfs4_deviceid_node id_node; + struct osd_dev *od; +}; + +static void +objio_free_deviceid_node(struct nfs4_deviceid_node *d) +{ + struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); + + dprintk("%s: free od=%p\n", __func__, de->od); + osduld_put_device(de->od); + kfree(de); +} + +static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss, + const struct nfs4_deviceid *d_id) +{ + struct nfs4_deviceid_node *d; + struct objio_dev_ent *de; + + d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id); + if (!d) + return NULL; + + de = container_of(d, struct objio_dev_ent, id_node); + return de; +} + +static struct objio_dev_ent * +_dev_list_add(const struct nfs_server *nfss, + const struct nfs4_deviceid *d_id, struct osd_dev *od, + gfp_t gfp_flags) +{ + struct nfs4_deviceid_node *d; + struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags); + struct objio_dev_ent *n; + + if (!de) { + dprintk("%s: -ENOMEM od=%p\n", __func__, od); + return NULL; + } + + dprintk("%s: Adding od=%p\n", __func__, od); + nfs4_init_deviceid_node(&de->id_node, + nfss->pnfs_curr_ld, + nfss->nfs_client, + d_id); + de->od = od; + + d = nfs4_insert_deviceid_node(&de->id_node); + n = container_of(d, struct objio_dev_ent, id_node); + if (n != de) { + dprintk("%s: Race with other n->od=%p\n", __func__, n->od); + objio_free_deviceid_node(&de->id_node); + de = n; + } + + atomic_inc(&de->id_node.ref); + return de; +} + struct caps_buffers { u8 caps_key[OSD_CRYPTO_KEYID_SIZE]; u8 creds[OSD_CAP_LEN]; @@ -74,6 +136,90 @@ OBJIO_LSEG(struct pnfs_layout_segment *lseg) return container_of(lseg, struct objio_segment, lseg); } +/* Send and wait for a get_device_info of devices in the layout, + then look them up with the osd_initiator library */ +static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, + struct objio_segment *objio_seg, unsigned comp, + gfp_t gfp_flags) +{ + struct pnfs_osd_deviceaddr *deviceaddr; + struct nfs4_deviceid *d_id; + struct objio_dev_ent *ode; + struct osd_dev *od; + struct osd_dev_info odi; + int err; + + d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id; + + ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); + if (ode) + return ode; + + err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); + if (unlikely(err)) { + dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", + __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); + return ERR_PTR(err); + } + + odi.systemid_len = deviceaddr->oda_systemid.len; + if (odi.systemid_len > sizeof(odi.systemid)) { + err = -EINVAL; + goto out; + } else if (odi.systemid_len) + memcpy(odi.systemid, deviceaddr->oda_systemid.data, + odi.systemid_len); + odi.osdname_len = deviceaddr->oda_osdname.len; + odi.osdname = (u8 *)deviceaddr->oda_osdname.data; + + if (!odi.osdname_len && !odi.systemid_len) { + dprintk("%s: !odi.osdname_len && !odi.systemid_len\n", + __func__); + err = -ENODEV; + goto out; + } + + od = osduld_info_lookup(&odi); + if (unlikely(IS_ERR(od))) { + err = PTR_ERR(od); + dprintk("%s: osduld_info_lookup => %d\n", __func__, err); + goto out; + } + + ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, + gfp_flags); + +out: + dprintk("%s: return=%d\n", __func__, err); + objlayout_put_deviceinfo(deviceaddr); + return err ? ERR_PTR(err) : ode; +} + +static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, + struct objio_segment *objio_seg, + gfp_t gfp_flags) +{ + unsigned i; + int err; + + /* lookup all devices */ + for (i = 0; i < objio_seg->num_comps; i++) { + struct objio_dev_ent *ode; + + ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags); + if (unlikely(IS_ERR(ode))) { + err = PTR_ERR(ode); + goto out; + } + objio_seg->ods[i] = ode; + } + err = 0; + +out: + dprintk("%s: return=%d\n", __func__, err); + return err; +} + static int _verify_data_map(struct pnfs_osd_layout *layout) { struct pnfs_osd_data_map *data_map = &layout->olo_map; @@ -171,6 +317,9 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp, objio_seg->num_comps = layout.olo_num_comps; objio_seg->comps_index = layout.olo_comps_index; + err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags); + if (err) + goto err; objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit; @@ -199,8 +348,14 @@ err: void objio_free_lseg(struct pnfs_layout_segment *lseg) { + int i; struct objio_segment *objio_seg = OBJIO_LSEG(lseg); + for (i = 0; i < objio_seg->num_comps; i++) { + if (!objio_seg->ods[i]) + break; + nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node); + } kfree(objio_seg); } @@ -211,6 +366,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { .alloc_lseg = objlayout_alloc_lseg, .free_lseg = objlayout_free_lseg, + + .free_deviceid_node = objio_free_deviceid_node, }; MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 946526763d3..10e5fca3b7f 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -102,3 +102,71 @@ objlayout_free_lseg(struct pnfs_layout_segment *lseg) objio_free_lseg(lseg); } +/* + * Get Device Info API for io engines + */ +struct objlayout_deviceinfo { + struct page *page; + struct pnfs_osd_deviceaddr da; /* This must be last */ +}; + +/* Initialize and call nfs_getdeviceinfo, then decode and return a + * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo() + * should be called. + */ +int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, + struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, + gfp_t gfp_flags) +{ + struct objlayout_deviceinfo *odi; + struct pnfs_device pd; + struct super_block *sb; + struct page *page, **pages; + u32 *p; + int err; + + page = alloc_page(gfp_flags); + if (!page) + return -ENOMEM; + + pages = &page; + pd.pages = pages; + + memcpy(&pd.dev_id, d_id, sizeof(*d_id)); + pd.layout_type = LAYOUT_OSD2_OBJECTS; + pd.pages = &page; + pd.pgbase = 0; + pd.pglen = PAGE_SIZE; + pd.mincount = 0; + + sb = pnfslay->plh_inode->i_sb; + err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd); + dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); + if (err) + goto err_out; + + p = page_address(page); + odi = kzalloc(sizeof(*odi), gfp_flags); + if (!odi) { + err = -ENOMEM; + goto err_out; + } + pnfs_osd_xdr_decode_deviceaddr(&odi->da, p); + odi->page = page; + *deviceaddr = &odi->da; + return 0; + +err_out: + __free_page(page); + return err; +} + +void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) +{ + struct objlayout_deviceinfo *odi = container_of(deviceaddr, + struct objlayout_deviceinfo, + da); + + __free_page(odi->page); + kfree(odi); +} diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 066280a07fa..0814271bb9b 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -55,6 +55,14 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, gfp_t gfp_flags); extern void objio_free_lseg(struct pnfs_layout_segment *lseg); +/* + * callback API + */ +extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, + struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, + gfp_t gfp_flags); +extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr); + /* * exported generic objects function vectors */ -- cgit v1.2.3-70-g09d2 From 636fb9c89d7e216aac3d406e458864420057e981 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:51:33 +0300 Subject: pnfs: alloc and free layout_hdr layoutdriver methods [gfp_flags] Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 21 ++++++++++++++++++--- fs/nfs/pnfs.h | 4 ++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 20436a5e76c..ef535f2a2c7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -177,13 +177,28 @@ get_layout_hdr(struct pnfs_layout_hdr *lo) atomic_inc(&lo->plh_refcount); } +static struct pnfs_layout_hdr * +pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; + return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : + kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); +} + +static void +pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; + return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); +} + static void destroy_layout_hdr(struct pnfs_layout_hdr *lo) { dprintk("%s: freeing layout cache %p\n", __func__, lo); BUG_ON(!list_empty(&lo->plh_layouts)); NFS_I(lo->plh_inode)->layout = NULL; - kfree(lo); + pnfs_free_layout_hdr(lo); } static void @@ -744,7 +759,7 @@ alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) { struct pnfs_layout_hdr *lo; - lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); + lo = pnfs_alloc_layout_hdr(ino, gfp_flags); if (!lo) return NULL; atomic_set(&lo->plh_refcount, 1); @@ -777,7 +792,7 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) if (likely(nfsi->layout == NULL)) /* Won the race? */ nfsi->layout = new; else - kfree(new); + pnfs_free_layout_hdr(new); return nfsi->layout; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f37ab3539cb..925d6ef8ba7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -73,6 +73,10 @@ struct pnfs_layoutdriver_type { const u32 id; const char *name; struct module *owner; + + struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags); + void (*free_layout_hdr) (struct pnfs_layout_hdr *); + struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); void (*free_lseg) (struct pnfs_layout_segment *lseg); -- cgit v1.2.3-70-g09d2 From e51b841dd0be9ff53f740c44c32c32679edcb7c8 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:51:48 +0300 Subject: pnfs-obj: define per-inode private structure allocate and deallocate per-inode private pnfs_layout_hdr in preparation for I/O implementation. Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objio_osd.c | 3 +++ fs/nfs/objlayout/objlayout.c | 26 ++++++++++++++++++++++++++ fs/nfs/objlayout/objlayout.h | 17 +++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 2255e2d22d0..353821f7937 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -364,6 +364,9 @@ static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", + .alloc_layout_hdr = objlayout_alloc_layout_hdr, + .free_layout_hdr = objlayout_free_layout_hdr, + .alloc_lseg = objlayout_alloc_lseg, .free_lseg = objlayout_free_lseg, diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 10e5fca3b7f..f14b4da3405 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -41,6 +41,32 @@ #include "objlayout.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD +/* + * Create a objlayout layout structure for the given inode and return it. + */ +struct pnfs_layout_hdr * +objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) +{ + struct objlayout *objlay; + + objlay = kzalloc(sizeof(struct objlayout), gfp_flags); + dprintk("%s: Return %p\n", __func__, objlay); + return &objlay->pnfs_layout; +} + +/* + * Free an objlayout layout structure + */ +void +objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo) +{ + struct objlayout *objlay = OBJLAYOUT(lo); + + dprintk("%s: objlay %p\n", __func__, objlay); + + kfree(objlay); +} + /* * Unmarshall layout and store it in pnfslay. */ diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 0814271bb9b..fa0262149f5 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -45,6 +45,19 @@ #include #include "../pnfs.h" +/* + * per-inode layout + */ +struct objlayout { + struct pnfs_layout_hdr pnfs_layout; +}; + +static inline struct objlayout * +OBJLAYOUT(struct pnfs_layout_hdr *lo) +{ + return container_of(lo, struct objlayout, pnfs_layout); +} + /* * Raid engine I/O API */ @@ -66,6 +79,10 @@ extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr); /* * exported generic objects function vectors */ + +extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags); +extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *); + extern struct pnfs_layout_segment *objlayout_alloc_lseg( struct pnfs_layout_hdr *, struct nfs4_layoutget_res *, -- cgit v1.2.3-70-g09d2 From d20581aa4be11407c9eeeb75992df5ef176bba0f Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:52:03 +0300 Subject: pnfs: support for non-rpc layout drivers Non-rpc layout driver such as for objects and blocks implement their own I/O path and error handling logic. Therefore bypass NFS-based error handling for these layout drivers. [fix lseg ref-count bugs, and null de-refs] [Fall out from: non-rpc layout drivers] Signed-off-by: Boaz Harrosh [get rid of PNFS_USE_RPC_CODE] [get rid of __nfs4_write_done_cb] [revert useless change in nfs4_write_done_cb] Signed-off-by: Benny Halevy --- fs/nfs/internal.h | 1 + fs/nfs/nfs4proc.c | 13 ++++++++++--- fs/nfs/pnfs.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/pnfs.h | 2 ++ include/linux/nfs_xdr.h | 2 ++ 5 files changed, 62 insertions(+), 4 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ce118ce885d..bcf0f0ff5ee 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -310,6 +310,7 @@ extern int nfs_migrate_page(struct address_space *, #endif /* nfs4proc.c */ +extern void __nfs4_read_done_cb(struct nfs_read_data *); extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); extern int nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf1b339c393..92c8bc4b5f9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3175,6 +3175,11 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } +void __nfs4_read_done_cb(struct nfs_read_data *data) +{ + nfs_invalidate_atime(data->inode); +} + static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_server *server = NFS_SERVER(data->inode); @@ -3184,7 +3189,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) return -EAGAIN; } - nfs_invalidate_atime(data->inode); + __nfs4_read_done_cb(data); if (task->tk_status > 0) renew_lease(server, data->timestamp); return 0; @@ -3198,7 +3203,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; - return data->read_done_cb(task, data); + return data->read_done_cb ? data->read_done_cb(task, data) : + nfs4_read_done_cb(task, data); } static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) @@ -3243,7 +3249,8 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; - return data->write_done_cb(task, data); + return data->write_done_cb ? data->write_done_cb(task, data) : + nfs4_write_done_cb(task, data); } /* Reset the the nfs_write_data to send the write to the MDS. */ diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ef535f2a2c7..171662114fd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -243,7 +243,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg) { struct inode *inode = lseg->pls_layout->plh_inode; - BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); + WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); if (list_empty(&lseg->pls_layout->plh_segs)) { set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); @@ -1054,6 +1054,29 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode) pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL; } +/* + * Called by non rpc-based layout drivers + */ +int +pnfs_ld_write_done(struct nfs_write_data *data) +{ + int status; + + if (!data->pnfs_error) { + pnfs_set_layoutcommit(data); + data->mds_ops->rpc_call_done(&data->task, data); + data->mds_ops->rpc_release(data); + return 0; + } + + dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, + data->pnfs_error); + status = nfs_initiate_write(data, NFS_CLIENT(data->inode), + data->mds_ops, NFS_FILE_SYNC); + return status ? : -EAGAIN; +} +EXPORT_SYMBOL_GPL(pnfs_ld_write_done); + enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *wdata, const struct rpc_call_ops *call_ops, int how) @@ -1078,6 +1101,29 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, return trypnfs; } +/* + * Called by non rpc-based layout drivers + */ +int +pnfs_ld_read_done(struct nfs_read_data *data) +{ + int status; + + if (!data->pnfs_error) { + __nfs4_read_done_cb(data); + data->mds_ops->rpc_call_done(&data->task, data); + data->mds_ops->rpc_release(data); + return 0; + } + + dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, + data->pnfs_error); + status = nfs_initiate_read(data, NFS_CLIENT(data->inode), + data->mds_ops); + return status ? : -EAGAIN; +} +EXPORT_SYMBOL_GPL(pnfs_ld_read_done); + /* * Call the appropriate parallel I/O subsystem read function. */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 925d6ef8ba7..0383e66e71f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -165,6 +165,8 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier); void pnfs_set_layoutcommit(struct nfs_write_data *wdata); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); +int pnfs_ld_write_done(struct nfs_write_data *); +int pnfs_ld_read_done(struct nfs_read_data *); /* pnfs_dev.c */ struct nfs4_deviceid_node { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7e371f7df9c..7c8ff0984a8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1087,6 +1087,7 @@ struct nfs_read_data { const struct rpc_call_ops *mds_ops; int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); __u64 mds_offset; + int pnfs_error; struct page *page_array[NFS_PAGEVEC_SIZE]; }; @@ -1112,6 +1113,7 @@ struct nfs_write_data { unsigned long timestamp; /* For lease renewal */ #endif __u64 mds_offset; /* Filelayout dense stripe */ + int pnfs_error; struct page *page_array[NFS_PAGEVEC_SIZE]; }; -- cgit v1.2.3-70-g09d2 From 04f83450388e87d86b387cf4a27b81eb7e69de7d Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 22 May 2011 19:52:19 +0300 Subject: pnfs-obj: osd raid engine read/write implementation With the use of the in-kernel osd library. Implement read/write of data from/to osd-objects according to information specified in the objects-layout. Support for stripping over mirrors with a received stripe_unit. There are however a few constrains which are not supported: 1. Stripe Unit must be a multiple of PAGE_SIZE 2. stripe length (stripe_unit * number_of_stripes) can not be bigger then 32bit. Also support raid-groups and partial-layout. Partial-layout is when not all the groups are received on the line, addressing only a partial range of the file. TODO: Only raid0! raid 4/5/6 support will come at later stage A none supported layout will send IO through the MDS [Important fallout from the last rebase] Signed-off-by: Boaz Harrosh [gfp_flags] Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objio_osd.c | 605 +++++++++++++++++++++++++++++++++++++++++++ fs/nfs/objlayout/objlayout.c | 254 ++++++++++++++++++ fs/nfs/objlayout/objlayout.h | 42 +++ 3 files changed, 901 insertions(+) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 353821f7937..cc92d3b3dc3 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -46,6 +46,10 @@ #define _LLU(x) ((unsigned long long)x) +enum { BIO_MAX_PAGES_KMALLOC = + (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), +}; + struct objio_dev_ent { struct nfs4_deviceid_node id_node; struct osd_dev *od; @@ -136,6 +140,31 @@ OBJIO_LSEG(struct pnfs_layout_segment *lseg) return container_of(lseg, struct objio_segment, lseg); } +struct objio_state; +typedef ssize_t (*objio_done_fn)(struct objio_state *ios); + +struct objio_state { + /* Generic layer */ + struct objlayout_io_state ol_state; + + struct objio_segment *layout; + + struct kref kref; + objio_done_fn done; + void *private; + + unsigned long length; + unsigned numdevs; /* Actually used devs in this IO */ + /* A per-device variable array of size numdevs */ + struct _objio_per_comp { + struct bio *bio; + struct osd_request *or; + unsigned long length; + u64 offset; + unsigned dev; + } per_dev[]; +}; + /* Send and wait for a get_device_info of devices in the layout, then look them up with the osd_initiator library */ static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, @@ -359,6 +388,578 @@ void objio_free_lseg(struct pnfs_layout_segment *lseg) kfree(objio_seg); } +int objio_alloc_io_state(struct pnfs_layout_segment *lseg, + struct objlayout_io_state **outp, + gfp_t gfp_flags) +{ + struct objio_segment *objio_seg = OBJIO_LSEG(lseg); + struct objio_state *ios; + const unsigned first_size = sizeof(*ios) + + objio_seg->num_comps * sizeof(ios->per_dev[0]); + + ios = kzalloc(first_size, gfp_flags); + if (unlikely(!ios)) + return -ENOMEM; + + ios->layout = objio_seg; + + *outp = &ios->ol_state; + return 0; +} + +void objio_free_io_state(struct objlayout_io_state *ol_state) +{ + struct objio_state *ios = container_of(ol_state, struct objio_state, + ol_state); + + kfree(ios); +} + +static void _clear_bio(struct bio *bio) +{ + struct bio_vec *bv; + unsigned i; + + __bio_for_each_segment(bv, bio, i, 0) { + unsigned this_count = bv->bv_len; + + if (likely(PAGE_SIZE == this_count)) + clear_highpage(bv->bv_page); + else + zero_user(bv->bv_page, bv->bv_offset, this_count); + } +} + +static int _io_check(struct objio_state *ios, bool is_write) +{ + enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR; + int lin_ret = 0; + int i; + + for (i = 0; i < ios->numdevs; i++) { + struct osd_sense_info osi; + struct osd_request *or = ios->per_dev[i].or; + unsigned dev; + int ret; + + if (!or) + continue; + + ret = osd_req_decode_sense(or, &osi); + if (likely(!ret)) + continue; + + if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { + /* start read offset passed endof file */ + BUG_ON(is_write); + _clear_bio(ios->per_dev[i].bio); + dprintk("%s: start read offset passed end of file " + "offset=0x%llx, length=0x%lx\n", __func__, + _LLU(ios->per_dev[i].offset), + ios->per_dev[i].length); + + continue; /* we recovered */ + } + dev = ios->per_dev[i].dev; + + if (osi.osd_err_pri >= oep) { + oep = osi.osd_err_pri; + lin_ret = ret; + } + } + + return lin_ret; +} + +/* + * Common IO state helpers. + */ +static void _io_free(struct objio_state *ios) +{ + unsigned i; + + for (i = 0; i < ios->numdevs; i++) { + struct _objio_per_comp *per_dev = &ios->per_dev[i]; + + if (per_dev->or) { + osd_end_request(per_dev->or); + per_dev->or = NULL; + } + + if (per_dev->bio) { + bio_put(per_dev->bio); + per_dev->bio = NULL; + } + } +} + +struct osd_dev *_io_od(struct objio_state *ios, unsigned dev) +{ + unsigned min_dev = ios->layout->comps_index; + unsigned max_dev = min_dev + ios->layout->num_comps; + + BUG_ON(dev < min_dev || max_dev <= dev); + return ios->layout->ods[dev - min_dev]->od; +} + +struct _striping_info { + u64 obj_offset; + u64 group_length; + unsigned dev; + unsigned unit_off; +}; + +static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, + struct _striping_info *si) +{ + u32 stripe_unit = ios->layout->stripe_unit; + u32 group_width = ios->layout->group_width; + u64 group_depth = ios->layout->group_depth; + u32 U = stripe_unit * group_width; + + u64 T = U * group_depth; + u64 S = T * ios->layout->group_count; + u64 M = div64_u64(file_offset, S); + + /* + G = (L - (M * S)) / T + H = (L - (M * S)) % T + */ + u64 LmodU = file_offset - M * S; + u32 G = div64_u64(LmodU, T); + u64 H = LmodU - G * T; + + u32 N = div_u64(H, U); + + div_u64_rem(file_offset, stripe_unit, &si->unit_off); + si->obj_offset = si->unit_off + (N * stripe_unit) + + (M * group_depth * stripe_unit); + + /* "H - (N * U)" is just "H % U" so it's bound to u32 */ + si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; + si->dev *= ios->layout->mirrors_p1; + + si->group_length = T - H; +} + +static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, + unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, + gfp_t gfp_flags) +{ + unsigned pg = *cur_pg; + struct request_queue *q = + osd_request_queue(_io_od(ios, per_dev->dev)); + + per_dev->length += cur_len; + + if (per_dev->bio == NULL) { + unsigned stripes = ios->layout->num_comps / + ios->layout->mirrors_p1; + unsigned pages_in_stripe = stripes * + (ios->layout->stripe_unit / PAGE_SIZE); + unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / + stripes; + + if (BIO_MAX_PAGES_KMALLOC < bio_size) + bio_size = BIO_MAX_PAGES_KMALLOC; + + per_dev->bio = bio_kmalloc(gfp_flags, bio_size); + if (unlikely(!per_dev->bio)) { + dprintk("Faild to allocate BIO size=%u\n", bio_size); + return -ENOMEM; + } + } + + while (cur_len > 0) { + unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); + unsigned added_len; + + BUG_ON(ios->ol_state.nr_pages <= pg); + cur_len -= pglen; + + added_len = bio_add_pc_page(q, per_dev->bio, + ios->ol_state.pages[pg], pglen, pgbase); + if (unlikely(pglen != added_len)) + return -ENOMEM; + pgbase = 0; + ++pg; + } + BUG_ON(cur_len); + + *cur_pg = pg; + return 0; +} + +static int _prepare_one_group(struct objio_state *ios, u64 length, + struct _striping_info *si, unsigned *last_pg, + gfp_t gfp_flags) +{ + unsigned stripe_unit = ios->layout->stripe_unit; + unsigned mirrors_p1 = ios->layout->mirrors_p1; + unsigned devs_in_group = ios->layout->group_width * mirrors_p1; + unsigned dev = si->dev; + unsigned first_dev = dev - (dev % devs_in_group); + unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; + unsigned cur_pg = *last_pg; + int ret = 0; + + while (length) { + struct _objio_per_comp *per_dev = &ios->per_dev[dev]; + unsigned cur_len, page_off = 0; + + if (!per_dev->length) { + per_dev->dev = dev; + if (dev < si->dev) { + per_dev->offset = si->obj_offset + stripe_unit - + si->unit_off; + cur_len = stripe_unit; + } else if (dev == si->dev) { + per_dev->offset = si->obj_offset; + cur_len = stripe_unit - si->unit_off; + page_off = si->unit_off & ~PAGE_MASK; + BUG_ON(page_off && + (page_off != ios->ol_state.pgbase)); + } else { /* dev > si->dev */ + per_dev->offset = si->obj_offset - si->unit_off; + cur_len = stripe_unit; + } + + if (max_comp < dev) + max_comp = dev; + } else { + cur_len = stripe_unit; + } + if (cur_len >= length) + cur_len = length; + + ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, + cur_len, gfp_flags); + if (unlikely(ret)) + goto out; + + dev += mirrors_p1; + dev = (dev % devs_in_group) + first_dev; + + length -= cur_len; + ios->length += cur_len; + } +out: + ios->numdevs = max_comp + mirrors_p1; + *last_pg = cur_pg; + return ret; +} + +static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags) +{ + u64 length = ios->ol_state.count; + u64 offset = ios->ol_state.offset; + struct _striping_info si; + unsigned last_pg = 0; + int ret = 0; + + while (length) { + _calc_stripe_info(ios, offset, &si); + + if (length < si.group_length) + si.group_length = length; + + ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags); + if (unlikely(ret)) + goto out; + + offset += si.group_length; + length -= si.group_length; + } + +out: + if (!ios->length) + return ret; + + return 0; +} + +static ssize_t _sync_done(struct objio_state *ios) +{ + struct completion *waiting = ios->private; + + complete(waiting); + return 0; +} + +static void _last_io(struct kref *kref) +{ + struct objio_state *ios = container_of(kref, struct objio_state, kref); + + ios->done(ios); +} + +static void _done_io(struct osd_request *or, void *p) +{ + struct objio_state *ios = p; + + kref_put(&ios->kref, _last_io); +} + +static ssize_t _io_exec(struct objio_state *ios) +{ + DECLARE_COMPLETION_ONSTACK(wait); + ssize_t status = 0; /* sync status */ + unsigned i; + objio_done_fn saved_done_fn = ios->done; + bool sync = ios->ol_state.sync; + + if (sync) { + ios->done = _sync_done; + ios->private = &wait; + } + + kref_init(&ios->kref); + + for (i = 0; i < ios->numdevs; i++) { + struct osd_request *or = ios->per_dev[i].or; + + if (!or) + continue; + + kref_get(&ios->kref); + osd_execute_request_async(or, _done_io, ios); + } + + kref_put(&ios->kref, _last_io); + + if (sync) { + wait_for_completion(&wait); + status = saved_done_fn(ios); + } + + return status; +} + +/* + * read + */ +static ssize_t _read_done(struct objio_state *ios) +{ + ssize_t status; + int ret = _io_check(ios, false); + + _io_free(ios); + + if (likely(!ret)) + status = ios->length; + else + status = ret; + + objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync); + return status; +} + +static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) +{ + struct osd_request *or = NULL; + struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; + unsigned dev = per_dev->dev; + struct pnfs_osd_object_cred *cred = + &ios->layout->comps[dev]; + struct osd_obj_id obj = { + .partition = cred->oc_object_id.oid_partition_id, + .id = cred->oc_object_id.oid_object_id, + }; + int ret; + + or = osd_start_request(_io_od(ios, dev), GFP_KERNEL); + if (unlikely(!or)) { + ret = -ENOMEM; + goto err; + } + per_dev->or = or; + + osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length); + + ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); + if (ret) { + dprintk("%s: Faild to osd_finalize_request() => %d\n", + __func__, ret); + goto err; + } + + dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", + __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), + per_dev->length); + +err: + return ret; +} + +static ssize_t _read_exec(struct objio_state *ios) +{ + unsigned i; + int ret; + + for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { + if (!ios->per_dev[i].length) + continue; + ret = _read_mirrors(ios, i); + if (unlikely(ret)) + goto err; + } + + ios->done = _read_done; + return _io_exec(ios); /* In sync mode exec returns the io status */ + +err: + _io_free(ios); + return ret; +} + +ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state) +{ + struct objio_state *ios = container_of(ol_state, struct objio_state, + ol_state); + int ret; + + ret = _io_rw_pagelist(ios, GFP_KERNEL); + if (unlikely(ret)) + return ret; + + return _read_exec(ios); +} + +/* + * write + */ +static ssize_t _write_done(struct objio_state *ios) +{ + ssize_t status; + int ret = _io_check(ios, true); + + _io_free(ios); + + if (likely(!ret)) { + /* FIXME: should be based on the OSD's persistence model + * See OSD2r05 Section 4.13 Data persistence model */ + ios->ol_state.committed = NFS_FILE_SYNC; + status = ios->length; + } else { + status = ret; + } + + objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync); + return status; +} + +static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) +{ + struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; + unsigned dev = ios->per_dev[cur_comp].dev; + unsigned last_comp = cur_comp + ios->layout->mirrors_p1; + int ret; + + for (; cur_comp < last_comp; ++cur_comp, ++dev) { + struct osd_request *or = NULL; + struct pnfs_osd_object_cred *cred = + &ios->layout->comps[dev]; + struct osd_obj_id obj = { + .partition = cred->oc_object_id.oid_partition_id, + .id = cred->oc_object_id.oid_object_id, + }; + struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; + struct bio *bio; + + or = osd_start_request(_io_od(ios, dev), GFP_NOFS); + if (unlikely(!or)) { + ret = -ENOMEM; + goto err; + } + per_dev->or = or; + + if (per_dev != master_dev) { + bio = bio_kmalloc(GFP_NOFS, + master_dev->bio->bi_max_vecs); + if (unlikely(!bio)) { + dprintk("Faild to allocate BIO size=%u\n", + master_dev->bio->bi_max_vecs); + ret = -ENOMEM; + goto err; + } + + __bio_clone(bio, master_dev->bio); + bio->bi_bdev = NULL; + bio->bi_next = NULL; + per_dev->bio = bio; + per_dev->dev = dev; + per_dev->length = master_dev->length; + per_dev->offset = master_dev->offset; + } else { + bio = master_dev->bio; + bio->bi_rw |= REQ_WRITE; + } + + osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length); + + ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); + if (ret) { + dprintk("%s: Faild to osd_finalize_request() => %d\n", + __func__, ret); + goto err; + } + + dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", + __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), + per_dev->length); + } + +err: + return ret; +} + +static ssize_t _write_exec(struct objio_state *ios) +{ + unsigned i; + int ret; + + for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { + if (!ios->per_dev[i].length) + continue; + ret = _write_mirrors(ios, i); + if (unlikely(ret)) + goto err; + } + + ios->done = _write_done; + return _io_exec(ios); /* In sync mode exec returns the io->status */ + +err: + _io_free(ios); + return ret; +} + +ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) +{ + struct objio_state *ios = container_of(ol_state, struct objio_state, + ol_state); + int ret; + + /* TODO: ios->stable = stable; */ + ret = _io_rw_pagelist(ios, GFP_NOFS); + if (unlikely(ret)) + return ret; + + return _write_exec(ios); +} + +/* + * objlayout_pg_test(). Called by nfs_can_coalesce_requests() + * + * return 1 : coalesce page + * return 0 : don't coalesce page + */ +int +objlayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + return 1; +} static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, @@ -370,6 +971,10 @@ static struct pnfs_layoutdriver_type objlayout_type = { .alloc_lseg = objlayout_alloc_lseg, .free_lseg = objlayout_free_lseg, + .read_pagelist = objlayout_read_pagelist, + .write_pagelist = objlayout_write_pagelist, + .pg_test = objlayout_pg_test, + .free_deviceid_node = objio_free_deviceid_node, }; diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index f14b4da3405..5157ef6d004 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -128,6 +128,260 @@ objlayout_free_lseg(struct pnfs_layout_segment *lseg) objio_free_lseg(lseg); } +/* + * I/O Operations + */ +static inline u64 +end_offset(u64 start, u64 len) +{ + u64 end; + + end = start + len; + return end >= start ? end : NFS4_MAX_UINT64; +} + +/* last octet in a range */ +static inline u64 +last_byte_offset(u64 start, u64 len) +{ + u64 end; + + BUG_ON(!len); + end = start + len; + return end > start ? end - 1 : NFS4_MAX_UINT64; +} + +static struct objlayout_io_state * +objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, + struct page **pages, + unsigned pgbase, + loff_t offset, + size_t count, + struct pnfs_layout_segment *lseg, + void *rpcdata, + gfp_t gfp_flags) +{ + struct objlayout_io_state *state; + u64 lseg_end_offset; + + dprintk("%s: allocating io_state\n", __func__); + if (objio_alloc_io_state(lseg, &state, gfp_flags)) + return NULL; + + BUG_ON(offset < lseg->pls_range.offset); + lseg_end_offset = end_offset(lseg->pls_range.offset, + lseg->pls_range.length); + BUG_ON(offset >= lseg_end_offset); + if (offset + count > lseg_end_offset) { + count = lseg->pls_range.length - + (offset - lseg->pls_range.offset); + dprintk("%s: truncated count %Zd\n", __func__, count); + } + + if (pgbase > PAGE_SIZE) { + pages += pgbase >> PAGE_SHIFT; + pgbase &= ~PAGE_MASK; + } + + state->lseg = lseg; + state->rpcdata = rpcdata; + state->pages = pages; + state->pgbase = pgbase; + state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; + state->offset = offset; + state->count = count; + state->sync = 0; + + return state; +} + +static void +objlayout_free_io_state(struct objlayout_io_state *state) +{ + dprintk("%s: freeing io_state\n", __func__); + if (unlikely(!state)) + return; + + objio_free_io_state(state); +} + +/* + * I/O done common code + */ +static void +objlayout_iodone(struct objlayout_io_state *state) +{ + dprintk("%s: state %p status\n", __func__, state); + + objlayout_free_io_state(state); +} + +/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete(). + * This is because the osd completion is called with ints-off from + * the block layer + */ +static void _rpc_read_complete(struct work_struct *work) +{ + struct rpc_task *task; + struct nfs_read_data *rdata; + + dprintk("%s enter\n", __func__); + task = container_of(work, struct rpc_task, u.tk_work); + rdata = container_of(task, struct nfs_read_data, task); + + pnfs_ld_read_done(rdata); +} + +void +objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) +{ + int eof = state->eof; + struct nfs_read_data *rdata; + + state->status = status; + dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof); + rdata = state->rpcdata; + rdata->task.tk_status = status; + if (status >= 0) { + rdata->res.count = status; + rdata->res.eof = eof; + } + objlayout_iodone(state); + /* must not use state after this point */ + + if (sync) + pnfs_ld_read_done(rdata); + else { + INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); + schedule_work(&rdata->task.u.tk_work); + } +} + +/* + * Perform sync or async reads. + */ +enum pnfs_try_status +objlayout_read_pagelist(struct nfs_read_data *rdata) +{ + loff_t offset = rdata->args.offset; + size_t count = rdata->args.count; + struct objlayout_io_state *state; + ssize_t status = 0; + loff_t eof; + + dprintk("%s: Begin inode %p offset %llu count %d\n", + __func__, rdata->inode, offset, (int)count); + + eof = i_size_read(rdata->inode); + if (unlikely(offset + count > eof)) { + if (offset >= eof) { + status = 0; + rdata->res.count = 0; + rdata->res.eof = 1; + goto out; + } + count = eof - offset; + } + + state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout, + rdata->args.pages, rdata->args.pgbase, + offset, count, + rdata->lseg, rdata, + GFP_KERNEL); + if (unlikely(!state)) { + status = -ENOMEM; + goto out; + } + + state->eof = state->offset + state->count >= eof; + + status = objio_read_pagelist(state); + out: + dprintk("%s: Return status %Zd\n", __func__, status); + rdata->pnfs_error = status; + return PNFS_ATTEMPTED; +} + +/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete(). + * This is because the osd completion is called with ints-off from + * the block layer + */ +static void _rpc_write_complete(struct work_struct *work) +{ + struct rpc_task *task; + struct nfs_write_data *wdata; + + dprintk("%s enter\n", __func__); + task = container_of(work, struct rpc_task, u.tk_work); + wdata = container_of(task, struct nfs_write_data, task); + + pnfs_ld_write_done(wdata); +} + +void +objlayout_write_done(struct objlayout_io_state *state, ssize_t status, + bool sync) +{ + struct nfs_write_data *wdata; + + dprintk("%s: Begin\n", __func__); + wdata = state->rpcdata; + state->status = status; + wdata->task.tk_status = status; + if (status >= 0) { + wdata->res.count = status; + wdata->verf.committed = state->committed; + dprintk("%s: Return status %d committed %d\n", + __func__, wdata->task.tk_status, + wdata->verf.committed); + } else + dprintk("%s: Return status %d\n", + __func__, wdata->task.tk_status); + objlayout_iodone(state); + /* must not use state after this point */ + + if (sync) + pnfs_ld_write_done(wdata); + else { + INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); + schedule_work(&wdata->task.u.tk_work); + } +} + +/* + * Perform sync or async writes. + */ +enum pnfs_try_status +objlayout_write_pagelist(struct nfs_write_data *wdata, + int how) +{ + struct objlayout_io_state *state; + ssize_t status; + + dprintk("%s: Begin inode %p offset %llu count %u\n", + __func__, wdata->inode, wdata->args.offset, wdata->args.count); + + state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout, + wdata->args.pages, + wdata->args.pgbase, + wdata->args.offset, + wdata->args.count, + wdata->lseg, wdata, + GFP_NOFS); + if (unlikely(!state)) { + status = -ENOMEM; + goto out; + } + + state->sync = how & FLUSH_SYNC; + + status = objio_write_pagelist(state, how & FLUSH_STABLE); + out: + dprintk("%s: Return status %Zd\n", __func__, status); + wdata->pnfs_error = status; + return PNFS_ATTEMPTED; +} + /* * Get Device Info API for io engines */ diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index fa0262149f5..9a405e8069f 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -58,6 +58,26 @@ OBJLAYOUT(struct pnfs_layout_hdr *lo) return container_of(lo, struct objlayout, pnfs_layout); } +/* + * per-I/O operation state + * embedded in objects provider io_state data structure + */ +struct objlayout_io_state { + struct pnfs_layout_segment *lseg; + + struct page **pages; + unsigned pgbase; + unsigned nr_pages; + unsigned long count; + loff_t offset; + bool sync; + + void *rpcdata; + int status; /* res */ + int eof; /* res */ + int committed; /* res */ +}; + /* * Raid engine I/O API */ @@ -68,9 +88,24 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, gfp_t gfp_flags); extern void objio_free_lseg(struct pnfs_layout_segment *lseg); +extern int objio_alloc_io_state( + struct pnfs_layout_segment *lseg, + struct objlayout_io_state **outp, + gfp_t gfp_flags); +extern void objio_free_io_state(struct objlayout_io_state *state); + +extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state); +extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, + bool stable); + /* * callback API */ +extern void objlayout_read_done(struct objlayout_io_state *state, + ssize_t status, bool sync); +extern void objlayout_write_done(struct objlayout_io_state *state, + ssize_t status, bool sync); + extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, gfp_t gfp_flags); @@ -89,4 +124,11 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg( gfp_t gfp_flags); extern void objlayout_free_lseg(struct pnfs_layout_segment *); +extern enum pnfs_try_status objlayout_read_pagelist( + struct nfs_read_data *); + +extern enum pnfs_try_status objlayout_write_pagelist( + struct nfs_write_data *, + int how); + #endif /* _OBJLAYOUT_H */ -- cgit v1.2.3-70-g09d2 From cbe8260369c9f88eafa035cd327dc3e02fad528c Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:52:37 +0300 Subject: pnfs: layoutreturn NFSv4.1 LAYOUTRETURN implementation Currently, does not support layout-type payload encoding. Signed-off-by: Alexandros Batsakis Signed-off-by: Andy Adamson Signed-off-by: Andy Adamson Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Fred Isaman Signed-off-by: Marc Eshel Signed-off-by: Zhang Jingwang [call pnfs_return_layout right before pnfs_destroy_layout] [remove assert_spin_locked from pnfs_clear_lseg_list] [remove wait parameter from the layoutreturn path.] [remove return_type field from nfs4_layoutreturn_args] [remove range from nfs4_layoutreturn_args] [no need to send layoutcommit from _pnfs_return_layout] [don't wait on sync layoutreturn] [fix layout stateid in layoutreturn args] [fixed NULL deref in _pnfs_return_layout] [removed recaim member of nfs4_layoutreturn_args] Signed-off-by: Benny Halevy --- fs/nfs/inode.c | 3 +- fs/nfs/nfs4proc.c | 82 +++++++++++++++++++++++++++++++++++ fs/nfs/nfs4xdr.c | 111 +++++++++++++++++++++++++++++++++++++++++++++--- fs/nfs/pnfs.c | 45 ++++++++++++++++++++ fs/nfs/pnfs.h | 18 ++++++++ include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 21 +++++++++ 7 files changed, 274 insertions(+), 7 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 57bb31ad7a5..e9c6d9f8f7e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1424,9 +1424,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ void nfs4_evict_inode(struct inode *inode) { - pnfs_destroy_layout(NFS_I(inode)); truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); + pnfs_return_layout(inode); + pnfs_destroy_layout(NFS_I(inode)); /* If we are holding a delegation, return it! */ nfs_inode_return_delegation_noreclaim(inode); /* First call standard NFS clear_inode() code */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 92c8bc4b5f9..5b4124e4c22 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5673,6 +5673,88 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) return status; } +static void +nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutreturn *lrp = calldata; + + dprintk("--> %s\n", __func__); + if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, + &lrp->res.seq_res, 0, task)) + return; + rpc_call_start(task); +} + +static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutreturn *lrp = calldata; + struct nfs_server *server; + + dprintk("--> %s\n", __func__); + + if (!nfs4_sequence_done(task, &lrp->res.seq_res)) + return; + + server = NFS_SERVER(lrp->args.inode); + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + nfs_restart_rpc(task, lrp->clp); + return; + } + if (task->tk_status == 0) { + struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; + + if (lrp->res.lrs_present) { + spin_lock(&lo->plh_inode->i_lock); + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + spin_unlock(&lo->plh_inode->i_lock); + } else + BUG_ON(!list_empty(&lo->plh_segs)); + } + dprintk("<-- %s\n", __func__); +} + +static void nfs4_layoutreturn_release(void *calldata) +{ + struct nfs4_layoutreturn *lrp = calldata; + + dprintk("--> %s\n", __func__); + put_layout_hdr(NFS_I(lrp->args.inode)->layout); + kfree(calldata); + dprintk("<-- %s\n", __func__); +} + +static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { + .rpc_call_prepare = nfs4_layoutreturn_prepare, + .rpc_call_done = nfs4_layoutreturn_done, + .rpc_release = nfs4_layoutreturn_release, +}; + +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) +{ + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], + .rpc_argp = &lrp->args, + .rpc_resp = &lrp->res, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = lrp->clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_layoutreturn_call_ops, + .callback_data = lrp, + }; + int status; + + dprintk("--> %s\n", __func__); + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = task->tk_status; + dprintk("<-- %s status=%d\n", __func__, status); + rpc_put_task(task); + return status; +} + static int _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c3ccd2c4683..f2421206435 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -338,7 +338,11 @@ static int nfs4_stat_to_errno(int); 1 /* layoutupdate4 layout type */ + \ 1 /* NULL filelayout layoutupdate4 payload */) #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) - +#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ + encode_stateid_maxsz + \ + 1 /* FIXME: opaque lrf_body always empty at the moment */) +#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ + 1 + decode_stateid_maxsz) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -760,7 +764,14 @@ static int nfs4_stat_to_errno(int); decode_putfh_maxsz + \ decode_layoutcommit_maxsz + \ decode_getattr_maxsz) - +#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_layoutreturn_maxsz) +#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_layoutreturn_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1889,6 +1900,31 @@ encode_layoutcommit(struct xdr_stream *xdr, hdr->replen += decode_layoutcommit_maxsz; return 0; } + +static void +encode_layoutreturn(struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 20); + *p++ = cpu_to_be32(OP_LAYOUTRETURN); + *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ + *p++ = cpu_to_be32(args->layout_type); + *p++ = cpu_to_be32(IOMODE_ANY); + *p = cpu_to_be32(RETURN_FILE); + p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); + p = xdr_encode_hyper(p, 0); + p = xdr_encode_hyper(p, NFS4_MAX_UINT64); + spin_lock(&args->inode->i_lock); + xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); + spin_unlock(&args->inode->i_lock); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(0); + hdr->nops++; + hdr->replen += decode_layoutreturn_maxsz; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2706,9 +2742,9 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, /* * Encode LAYOUTCOMMIT request */ -static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, - struct xdr_stream *xdr, - struct nfs4_layoutcommit_args *args) +static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_layoutcommit_args *args) { struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), @@ -2720,7 +2756,24 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, encode_layoutcommit(xdr, args, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; +} + +/* + * Encode LAYOUTRETURN request + */ +static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_layoutreturn_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, NFS_FH(args->inode), &hdr); + encode_layoutreturn(xdr, args, &hdr); + encode_nops(&hdr); } #endif /* CONFIG_NFS_V4_1 */ @@ -5203,6 +5256,27 @@ out_overflow: return -EIO; } +static int decode_layoutreturn(struct xdr_stream *xdr, + struct nfs4_layoutreturn_res *res) +{ + __be32 *p; + int status; + + status = decode_op_hdr(xdr, OP_LAYOUTRETURN); + if (status) + return status; + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->lrs_present = be32_to_cpup(p); + if (res->lrs_present) + status = decode_stateid(xdr, &res->stateid); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + static int decode_layoutcommit(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_layoutcommit_res *res) @@ -6319,6 +6393,30 @@ out: return status; } +/* + * Decode LAYOUTRETURN response + */ +static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_layoutreturn_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_layoutreturn(xdr, res); +out: + return status; +} + /* * Decode LAYOUTCOMMIT response */ @@ -6547,6 +6645,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), PROC(LAYOUTGET, enc_layoutget, dec_layoutget), PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), + PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 171662114fd..00b12824174 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -619,6 +619,51 @@ out_err_free: return NULL; } +/* Initiates a LAYOUTRETURN(FILE) */ +int +_pnfs_return_layout(struct inode *ino) +{ + struct pnfs_layout_hdr *lo = NULL; + struct nfs_inode *nfsi = NFS_I(ino); + LIST_HEAD(tmp_list); + struct nfs4_layoutreturn *lrp; + nfs4_stateid stateid; + int status = 0; + + dprintk("--> %s\n", __func__); + + spin_lock(&ino->i_lock); + lo = nfsi->layout; + if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) { + spin_unlock(&ino->i_lock); + dprintk("%s: no layout segments to return\n", __func__); + goto out; + } + stateid = nfsi->layout->plh_stateid; + /* Reference matched in nfs4_layoutreturn_release */ + get_layout_hdr(lo); + spin_unlock(&ino->i_lock); + pnfs_free_lseg_list(&tmp_list); + + WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); + + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); + if (unlikely(lrp == NULL)) { + status = -ENOMEM; + goto out; + } + + lrp->args.stateid = stateid; + lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; + lrp->args.inode = ino; + lrp->clp = NFS_SERVER(ino)->nfs_client; + + status = nfs4_proc_layoutreturn(lrp); +out: + dprintk("<-- %s status: %d\n", __func__, status); + return status; +} + bool pnfs_roc(struct inode *ino) { struct pnfs_layout_hdr *lo; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0383e66e71f..c34f7a0e3bc 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -129,6 +129,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); +extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ void get_layout_hdr(struct pnfs_layout_hdr *lo); @@ -165,6 +166,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier); void pnfs_set_layoutcommit(struct nfs_write_data *wdata); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); +int _pnfs_return_layout(struct inode *); int pnfs_ld_write_done(struct nfs_write_data *); int pnfs_ld_read_done(struct nfs_read_data *); @@ -256,6 +258,17 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) put_lseg(req->wb_commit_lseg); } +static inline int pnfs_return_layout(struct inode *ino) +{ + struct nfs_inode *nfsi = NFS_I(ino); + struct nfs_server *nfss = NFS_SERVER(ino); + + if (pnfs_enabled_sb(nfss) && nfsi->layout) + return _pnfs_return_layout(ino); + + return 0; +} + #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -298,6 +311,11 @@ pnfs_try_to_write_data(struct nfs_write_data *data, return PNFS_NOT_ATTEMPTED; } +static inline int pnfs_return_layout(struct inode *ino) +{ + return 0; +} + static inline bool pnfs_roc(struct inode *ino) { diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 178fafe0ff9..9376eaf26e1 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -562,6 +562,7 @@ enum { NFSPROC4_CLNT_LAYOUTGET, NFSPROC4_CLNT_GETDEVICEINFO, NFSPROC4_CLNT_LAYOUTCOMMIT, + NFSPROC4_CLNT_LAYOUTRETURN, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7c8ff0984a8..5e8444a11ad 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -269,6 +269,27 @@ struct nfs4_layoutcommit_data { struct nfs4_layoutcommit_res res; }; +struct nfs4_layoutreturn_args { + __u32 layout_type; + struct inode *inode; + nfs4_stateid stateid; + struct nfs4_sequence_args seq_args; +}; + +struct nfs4_layoutreturn_res { + struct nfs4_sequence_res seq_res; + u32 lrs_present; + nfs4_stateid stateid; +}; + +struct nfs4_layoutreturn { + struct nfs4_layoutreturn_args args; + struct nfs4_layoutreturn_res res; + struct rpc_cred *cred; + struct nfs_client *clp; + int rpc_status; +}; + /* * Arguments to the open call. */ -- cgit v1.2.3-70-g09d2 From 8a1636c459cb7a4b32ba4024cd1b2ba21fba6aed Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 14 Jul 2010 15:43:57 -0400 Subject: pnfs: layoutret_on_setattr With the objects layout security model, we have object capabilities that are associated with the layout and we anticipate that the server will issue a cb_layoutrecall for any setattr that changes security related attributes (user/group/mode/acl) or truncates the file. Therefore, the layout is returned before issuing the setattr to avoid the anticipated cb_layoutrecall. Signed-off-by: Benny Halevy --- fs/nfs/nfs4proc.c | 3 +++ fs/nfs/objlayout/objio_osd.c | 1 + fs/nfs/pnfs.h | 22 ++++++++++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5b4124e4c22..57340096c73 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2361,6 +2361,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct nfs4_state *state = NULL; int status; + if (pnfs_ld_layoutret_on_setattr(inode)) + pnfs_return_layout(inode); + nfs_fattr_init(fattr); /* Search for an existing open(O_WRITE) file */ diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index cc92d3b3dc3..4e8de3ec9a6 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -964,6 +964,7 @@ objlayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", + .flags = PNFS_LAYOUTRET_ON_SETATTR, .alloc_layout_hdr = objlayout_alloc_layout_hdr, .free_layout_hdr = objlayout_free_layout_hdr, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index c34f7a0e3bc..af3967a893a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -65,6 +65,11 @@ enum { NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ }; +enum layoutdriver_policy_flags { + /* Should the pNFS client commit and return the layout upon a setattr */ + PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, +}; + struct nfs4_deviceid_node; /* Per-layout driver specific registration structure */ @@ -73,6 +78,7 @@ struct pnfs_layoutdriver_type { const u32 id; const char *name; struct module *owner; + unsigned flags; struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags); void (*free_layout_hdr) (struct pnfs_layout_hdr *); @@ -258,6 +264,16 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) put_lseg(req->wb_commit_lseg); } +/* Should the pNFS client commit and return the layout upon a setattr */ +static inline bool +pnfs_ld_layoutret_on_setattr(struct inode *inode) +{ + if (!pnfs_enabled_sb(NFS_SERVER(inode))) + return false; + return NFS_SERVER(inode)->pnfs_curr_ld->flags & + PNFS_LAYOUTRET_ON_SETATTR; +} + static inline int pnfs_return_layout(struct inode *ino) { struct nfs_inode *nfsi = NFS_I(ino); @@ -316,6 +332,12 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } +static inline bool +pnfs_ld_layoutret_on_setattr(struct inode *inode) +{ + return false; +} + static inline bool pnfs_roc(struct inode *ino) { -- cgit v1.2.3-70-g09d2 From 04a555498e03b3804e2dec916a4669f5f560e503 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Sun, 22 May 2011 19:53:10 +0300 Subject: pnfs: encode_layoutreturn Add a layout driver method to encode the layout type specific opaque part of layout return in-line in the xdr stream. Currently the pnfs-objects layout driver uses it to encode i/o error information on LAYOUTRETURN. Signed-off-by: Andy Adamson [fixup layout header pointer for encode_layoutreturn] Signed-off-by: Benny Halevy --- fs/nfs/nfs4xdr.c | 9 +++++++-- fs/nfs/pnfs.h | 4 ++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f2421206435..d464badc006 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1920,8 +1920,13 @@ encode_layoutreturn(struct xdr_stream *xdr, spin_lock(&args->inode->i_lock); xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); spin_unlock(&args->inode->i_lock); - p = reserve_space(xdr, 4); - *p = cpu_to_be32(0); + if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { + NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( + NFS_I(args->inode)->layout, xdr, args); + } else { + p = reserve_space(xdr, 4); + *p = cpu_to_be32(0); + } hdr->nops++; hdr->replen += decode_layoutreturn_maxsz; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index af3967a893a..1b6b207a880 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -104,6 +104,10 @@ struct pnfs_layoutdriver_type { enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); void (*free_deviceid_node) (struct nfs4_deviceid_node *); + + void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, + struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args); }; struct pnfs_layout_hdr { -- cgit v1.2.3-70-g09d2 From adb58535e604a564495a7d50dfb0afa0ddc21bcb Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 26 May 2011 21:49:46 +0300 Subject: pnfs-obj: report errors and .encode_layoutreturn Implementation. An io_state pre-allocates an error information structure for each possible osd-device that might error during IO. When IO is done if all was well the io_state is freed. (as today). If the I/O has ended with an error, the io_state is queued on a per-layout err_list. When eventually encode_layoutreturn() is called, each error is properly encoded on the XDR buffer and only then the io_state is removed from err_list and de-allocated. It is up to the io_engine to fill in the segment that fault and the type of osd_error that occurred. By calling objlayout_io_set_result() for each failing device. In objio_osd: * Allocate io-error descriptors space as part of io_state * Use generic objlayout error reporting at end of io. Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objio_osd.c | 44 +++++++- fs/nfs/objlayout/objlayout.c | 232 ++++++++++++++++++++++++++++++++++++++++++- fs/nfs/objlayout/objlayout.h | 23 +++++ 3 files changed, 297 insertions(+), 2 deletions(-) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 4e8de3ec9a6..8bca5e13f3e 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -396,12 +396,16 @@ int objio_alloc_io_state(struct pnfs_layout_segment *lseg, struct objio_state *ios; const unsigned first_size = sizeof(*ios) + objio_seg->num_comps * sizeof(ios->per_dev[0]); + const unsigned sec_size = objio_seg->num_comps * + sizeof(ios->ol_state.ioerrs[0]); - ios = kzalloc(first_size, gfp_flags); + ios = kzalloc(first_size + sec_size, gfp_flags); if (unlikely(!ios)) return -ENOMEM; ios->layout = objio_seg; + ios->ol_state.ioerrs = ((void *)ios) + first_size; + ios->ol_state.num_comps = objio_seg->num_comps; *outp = &ios->ol_state; return 0; @@ -415,6 +419,36 @@ void objio_free_io_state(struct objlayout_io_state *ol_state) kfree(ios); } +enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) +{ + switch (oep) { + case OSD_ERR_PRI_NO_ERROR: + return (enum pnfs_osd_errno)0; + + case OSD_ERR_PRI_CLEAR_PAGES: + BUG_ON(1); + return 0; + + case OSD_ERR_PRI_RESOURCE: + return PNFS_OSD_ERR_RESOURCE; + case OSD_ERR_PRI_BAD_CRED: + return PNFS_OSD_ERR_BAD_CRED; + case OSD_ERR_PRI_NO_ACCESS: + return PNFS_OSD_ERR_NO_ACCESS; + case OSD_ERR_PRI_UNREACHABLE: + return PNFS_OSD_ERR_UNREACHABLE; + case OSD_ERR_PRI_NOT_FOUND: + return PNFS_OSD_ERR_NOT_FOUND; + case OSD_ERR_PRI_NO_SPACE: + return PNFS_OSD_ERR_NO_SPACE; + default: + WARN_ON(1); + /* fallthrough */ + case OSD_ERR_PRI_EIO: + return PNFS_OSD_ERR_EIO; + } +} + static void _clear_bio(struct bio *bio) { struct bio_vec *bv; @@ -461,6 +495,12 @@ static int _io_check(struct objio_state *ios, bool is_write) continue; /* we recovered */ } dev = ios->per_dev[i].dev; + objlayout_io_set_result(&ios->ol_state, dev, + &ios->layout->comps[dev].oc_object_id, + osd_pri_2_pnfs_err(osi.osd_err_pri), + ios->per_dev[i].offset, + ios->per_dev[i].length, + is_write); if (osi.osd_err_pri >= oep) { oep = osi.osd_err_pri; @@ -977,6 +1017,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { .pg_test = objlayout_pg_test, .free_deviceid_node = objio_free_deviceid_node, + + .encode_layoutreturn = objlayout_encode_layoutreturn, }; MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 5157ef6d004..f7caecff6b4 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -50,6 +50,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) struct objlayout *objlay; objlay = kzalloc(sizeof(struct objlayout), gfp_flags); + if (objlay) { + spin_lock_init(&objlay->lock); + INIT_LIST_HEAD(&objlay->err_list); + } dprintk("%s: Return %p\n", __func__, objlay); return &objlay->pnfs_layout; } @@ -64,6 +68,7 @@ objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo) dprintk("%s: objlay %p\n", __func__, objlay); + WARN_ON(!list_empty(&objlay->err_list)); kfree(objlay); } @@ -183,6 +188,7 @@ objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, pgbase &= ~PAGE_MASK; } + INIT_LIST_HEAD(&state->err_list); state->lseg = lseg; state->rpcdata = rpcdata; state->pages = pages; @@ -213,7 +219,52 @@ objlayout_iodone(struct objlayout_io_state *state) { dprintk("%s: state %p status\n", __func__, state); - objlayout_free_io_state(state); + if (likely(state->status >= 0)) { + objlayout_free_io_state(state); + } else { + struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); + + spin_lock(&objlay->lock); + list_add(&objlay->err_list, &state->err_list); + spin_unlock(&objlay->lock); + } +} + +/* + * objlayout_io_set_result - Set an osd_error code on a specific osd comp. + * + * The @index component IO failed (error returned from target). Register + * the error for later reporting at layout-return. + */ +void +objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, + struct pnfs_osd_objid *pooid, int osd_error, + u64 offset, u64 length, bool is_write) +{ + struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index]; + + BUG_ON(index >= state->num_comps); + if (osd_error) { + ioerr->oer_component = *pooid; + ioerr->oer_comp_offset = offset; + ioerr->oer_comp_length = length; + ioerr->oer_iswrite = is_write; + ioerr->oer_errno = osd_error; + + dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) " + "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n", + __func__, index, ioerr->oer_errno, + ioerr->oer_iswrite, + _DEVID_LO(&ioerr->oer_component.oid_device_id), + _DEVID_HI(&ioerr->oer_component.oid_device_id), + ioerr->oer_component.oid_partition_id, + ioerr->oer_component.oid_object_id, + ioerr->oer_comp_offset, + ioerr->oer_comp_length); + } else { + /* User need not call if no error is reported */ + ioerr->oer_errno = 0; + } } /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete(). @@ -382,6 +433,185 @@ objlayout_write_pagelist(struct nfs_write_data *wdata, return PNFS_ATTEMPTED; } +static int +err_prio(u32 oer_errno) +{ + switch (oer_errno) { + case 0: + return 0; + + case PNFS_OSD_ERR_RESOURCE: + return OSD_ERR_PRI_RESOURCE; + case PNFS_OSD_ERR_BAD_CRED: + return OSD_ERR_PRI_BAD_CRED; + case PNFS_OSD_ERR_NO_ACCESS: + return OSD_ERR_PRI_NO_ACCESS; + case PNFS_OSD_ERR_UNREACHABLE: + return OSD_ERR_PRI_UNREACHABLE; + case PNFS_OSD_ERR_NOT_FOUND: + return OSD_ERR_PRI_NOT_FOUND; + case PNFS_OSD_ERR_NO_SPACE: + return OSD_ERR_PRI_NO_SPACE; + default: + WARN_ON(1); + /* fallthrough */ + case PNFS_OSD_ERR_EIO: + return OSD_ERR_PRI_EIO; + } +} + +static void +merge_ioerr(struct pnfs_osd_ioerr *dest_err, + const struct pnfs_osd_ioerr *src_err) +{ + u64 dest_end, src_end; + + if (!dest_err->oer_errno) { + *dest_err = *src_err; + /* accumulated device must be blank */ + memset(&dest_err->oer_component.oid_device_id, 0, + sizeof(dest_err->oer_component.oid_device_id)); + + return; + } + + if (dest_err->oer_component.oid_partition_id != + src_err->oer_component.oid_partition_id) + dest_err->oer_component.oid_partition_id = 0; + + if (dest_err->oer_component.oid_object_id != + src_err->oer_component.oid_object_id) + dest_err->oer_component.oid_object_id = 0; + + if (dest_err->oer_comp_offset > src_err->oer_comp_offset) + dest_err->oer_comp_offset = src_err->oer_comp_offset; + + dest_end = end_offset(dest_err->oer_comp_offset, + dest_err->oer_comp_length); + src_end = end_offset(src_err->oer_comp_offset, + src_err->oer_comp_length); + if (dest_end < src_end) + dest_end = src_end; + + dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset; + + if ((src_err->oer_iswrite == dest_err->oer_iswrite) && + (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) { + dest_err->oer_errno = src_err->oer_errno; + } else if (src_err->oer_iswrite) { + dest_err->oer_iswrite = true; + dest_err->oer_errno = src_err->oer_errno; + } +} + +static void +encode_accumulated_error(struct objlayout *objlay, __be32 *p) +{ + struct objlayout_io_state *state, *tmp; + struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; + + list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { + unsigned i; + + for (i = 0; i < state->num_comps; i++) { + struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; + + if (!ioerr->oer_errno) + continue; + + printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " + "dev(%llx:%llx) par=0x%llx obj=0x%llx " + "offset=0x%llx length=0x%llx\n", + __func__, i, ioerr->oer_errno, + ioerr->oer_iswrite, + _DEVID_LO(&ioerr->oer_component.oid_device_id), + _DEVID_HI(&ioerr->oer_component.oid_device_id), + ioerr->oer_component.oid_partition_id, + ioerr->oer_component.oid_object_id, + ioerr->oer_comp_offset, + ioerr->oer_comp_length); + + merge_ioerr(&accumulated_err, ioerr); + } + list_del(&state->err_list); + objlayout_free_io_state(state); + } + + pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); +} + +void +objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, + struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args) +{ + struct objlayout *objlay = OBJLAYOUT(pnfslay); + struct objlayout_io_state *state, *tmp; + __be32 *start; + + dprintk("%s: Begin\n", __func__); + start = xdr_reserve_space(xdr, 4); + BUG_ON(!start); + + spin_lock(&objlay->lock); + + list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { + __be32 *last_xdr = NULL, *p; + unsigned i; + int res = 0; + + for (i = 0; i < state->num_comps; i++) { + struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; + + if (!ioerr->oer_errno) + continue; + + dprintk("%s: err[%d]: errno=%d is_write=%d " + "dev(%llx:%llx) par=0x%llx obj=0x%llx " + "offset=0x%llx length=0x%llx\n", + __func__, i, ioerr->oer_errno, + ioerr->oer_iswrite, + _DEVID_LO(&ioerr->oer_component.oid_device_id), + _DEVID_HI(&ioerr->oer_component.oid_device_id), + ioerr->oer_component.oid_partition_id, + ioerr->oer_component.oid_object_id, + ioerr->oer_comp_offset, + ioerr->oer_comp_length); + + p = pnfs_osd_xdr_ioerr_reserve_space(xdr); + if (unlikely(!p)) { + res = -E2BIG; + break; /* accumulated_error */ + } + + last_xdr = p; + pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]); + } + + /* TODO: use xdr_write_pages */ + if (unlikely(res)) { + /* no space for even one error descriptor */ + BUG_ON(!last_xdr); + + /* we've encountered a situation with lots and lots of + * errors and no space to encode them all. Use the last + * available slot to report the union of all the + * remaining errors. + */ + encode_accumulated_error(objlay, last_xdr); + goto loop_done; + } + list_del(&state->err_list); + objlayout_free_io_state(state); + } +loop_done: + spin_unlock(&objlay->lock); + + *start = cpu_to_be32((xdr->p - start - 1) * 4); + dprintk("%s: Return\n", __func__); +} + + /* * Get Device Info API for io engines */ diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 9a405e8069f..b0bb975058e 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -50,6 +50,10 @@ */ struct objlayout { struct pnfs_layout_hdr pnfs_layout; + + /* for layout_return */ + spinlock_t lock; + struct list_head err_list; }; static inline struct objlayout * @@ -76,6 +80,16 @@ struct objlayout_io_state { int status; /* res */ int eof; /* res */ int committed; /* res */ + + /* Error reporting (layout_return) */ + struct list_head err_list; + unsigned num_comps; + /* Pointer to array of error descriptors of size num_comps. + * It should contain as many entries as devices in the osd_layout + * that participate in the I/O. It is up to the io_engine to allocate + * needed space and set num_comps. + */ + struct pnfs_osd_ioerr *ioerrs; }; /* @@ -101,6 +115,10 @@ extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, /* * callback API */ +extern void objlayout_io_set_result(struct objlayout_io_state *state, + unsigned index, struct pnfs_osd_objid *pooid, + int osd_error, u64 offset, u64 length, bool is_write); + extern void objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync); extern void objlayout_write_done(struct objlayout_io_state *state, @@ -131,4 +149,9 @@ extern enum pnfs_try_status objlayout_write_pagelist( struct nfs_write_data *, int how); +extern void objlayout_encode_layoutreturn( + struct pnfs_layout_hdr *, + struct xdr_stream *, + const struct nfs4_layoutreturn_args *); + #endif /* _OBJLAYOUT_H */ -- cgit v1.2.3-70-g09d2 From ac7db7264ac3314cae09893bc838fcb7e83267a4 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:53:48 +0300 Subject: pnfs: encode_layoutcommit Add a layout driver method to encode the layout type specific opaque part of layout commit in-line in the xdr stream. Currently, the pnfs-objects layout driver uses it to encode metadata hints to the MDS and the blocks layout driver to commit provisionally allocated extents to the file. Signed-off-by: Benny Halevy --- fs/nfs/nfs4xdr.c | 16 +++++++++++++--- fs/nfs/pnfs.h | 4 ++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d464badc006..d869a5e5464 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1875,6 +1875,7 @@ encode_layoutget(struct xdr_stream *xdr, static int encode_layoutcommit(struct xdr_stream *xdr, + struct inode *inode, const struct nfs4_layoutcommit_args *args, struct compound_hdr *hdr) { @@ -1883,7 +1884,7 @@ encode_layoutcommit(struct xdr_stream *xdr, dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, NFS_SERVER(args->inode)->pnfs_curr_ld->id); - p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE); + p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); /* Only whole file layouts */ p = xdr_encode_hyper(p, 0); /* offset */ @@ -1894,7 +1895,14 @@ encode_layoutcommit(struct xdr_stream *xdr, p = xdr_encode_hyper(p, args->lastbytewritten); *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ - *p++ = cpu_to_be32(0); /* no file layout payload */ + + if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) + NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( + NFS_I(inode)->layout, xdr, args); + else { + p = reserve_space(xdr, 4); + *p = cpu_to_be32(0); /* no layout-type payload */ + } hdr->nops++; hdr->replen += decode_layoutcommit_maxsz; @@ -2751,6 +2759,8 @@ static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, struct xdr_stream *xdr, struct nfs4_layoutcommit_args *args) { + struct nfs4_layoutcommit_data *data = + container_of(args, struct nfs4_layoutcommit_data, args); struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; @@ -2758,7 +2768,7 @@ static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, NFS_FH(args->inode), &hdr); - encode_layoutcommit(xdr, args, &hdr); + encode_layoutcommit(xdr, data->args.inode, args, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1b6b207a880..3fda9714677 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -108,6 +108,10 @@ struct pnfs_layoutdriver_type { void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args); + + void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, + struct xdr_stream *xdr, + const struct nfs4_layoutcommit_args *args); }; struct pnfs_layout_hdr { -- cgit v1.2.3-70-g09d2 From a0fe8bf427f4987d7b82678292ca03cfd7331467 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 22 May 2011 19:54:13 +0300 Subject: pnfs-obj: objlayout_encode_layoutcommit implementation * Define API for io-engines to report delta_space_used in IOs * Encode the osd-layout specific information of the layoutcommit XDR buffer. Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objio_osd.c | 1 + fs/nfs/objlayout/objlayout.c | 30 ++++++++++++++++++++++++++++++ fs/nfs/objlayout/objlayout.h | 30 ++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 8bca5e13f3e..3be124160e9 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1018,6 +1018,7 @@ static struct pnfs_layoutdriver_type objlayout_type = { .free_deviceid_node = objio_free_deviceid_node, + .encode_layoutcommit = objlayout_encode_layoutcommit, .encode_layoutreturn = objlayout_encode_layoutreturn, }; diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index f7caecff6b4..dc3956c0de8 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -225,6 +225,7 @@ objlayout_iodone(struct objlayout_io_state *state) struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); spin_lock(&objlay->lock); + objlay->delta_space_valid = OBJ_DSU_INVALID; list_add(&objlay->err_list, &state->err_list); spin_unlock(&objlay->lock); } @@ -433,6 +434,35 @@ objlayout_write_pagelist(struct nfs_write_data *wdata, return PNFS_ATTEMPTED; } +void +objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay, + struct xdr_stream *xdr, + const struct nfs4_layoutcommit_args *args) +{ + struct objlayout *objlay = OBJLAYOUT(pnfslay); + struct pnfs_osd_layoutupdate lou; + __be32 *start; + + dprintk("%s: Begin\n", __func__); + + spin_lock(&objlay->lock); + lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID); + lou.dsu_delta = objlay->delta_space_used; + objlay->delta_space_used = 0; + objlay->delta_space_valid = OBJ_DSU_INIT; + lou.olu_ioerr_flag = !list_empty(&objlay->err_list); + spin_unlock(&objlay->lock); + + start = xdr_reserve_space(xdr, 4); + + BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou)); + + *start = cpu_to_be32((xdr->p - start - 1) * 4); + + dprintk("%s: Return delta_space_used %lld err %d\n", __func__, + lou.dsu_delta, lou.olu_ioerr_flag); +} + static int err_prio(u32 oer_errno) { diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index b0bb975058e..a8244c8e042 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -51,6 +51,14 @@ struct objlayout { struct pnfs_layout_hdr pnfs_layout; + /* for layout_commit */ + enum osd_delta_space_valid_enum { + OBJ_DSU_INIT = 0, + OBJ_DSU_VALID, + OBJ_DSU_INVALID, + } delta_space_valid; + s64 delta_space_used; /* consumed by write ops */ + /* for layout_return */ spinlock_t lock; struct list_head err_list; @@ -119,6 +127,23 @@ extern void objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, struct pnfs_osd_objid *pooid, int osd_error, u64 offset, u64 length, bool is_write); +static inline void +objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) +{ + struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); + + /* If one of the I/Os errored out and the delta_space_used was + * invalid we render the complete report as invalid. Protocol mandate + * the DSU be accurate or not reported. + */ + spin_lock(&objlay->lock); + if (objlay->delta_space_valid != OBJ_DSU_INVALID) { + objlay->delta_space_valid = OBJ_DSU_VALID; + objlay->delta_space_used += space_used; + } + spin_unlock(&objlay->lock); +} + extern void objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync); extern void objlayout_write_done(struct objlayout_io_state *state, @@ -149,6 +174,11 @@ extern enum pnfs_try_status objlayout_write_pagelist( struct nfs_write_data *, int how); +extern void objlayout_encode_layoutcommit( + struct pnfs_layout_hdr *, + struct xdr_stream *, + const struct nfs4_layoutcommit_args *); + extern void objlayout_encode_layoutreturn( struct pnfs_layout_hdr *, struct xdr_stream *, -- cgit v1.2.3-70-g09d2 From dfed206b8857d41a91ebba030f99e30017a44dda Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 25 May 2011 20:25:22 +0300 Subject: NFSv4.1: unify pnfs_pageio_init functions Use common code for pnfs_pageio_init_{read,write} and use a common generic pg_test function. Note that this function always assumes the the layout driver's pg_test method is implemented. [Fix BUG] Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- fs/nfs/pagelist.c | 2 ++ fs/nfs/pnfs.c | 55 +++++++++++++++---------------------------------------- fs/nfs/pnfs.h | 21 ++++++++++----------- fs/nfs/read.c | 1 - fs/nfs/write.c | 2 -- 5 files changed, 27 insertions(+), 54 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index c80add6e221..b8704fedcd1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -229,6 +229,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; + desc->pg_test = NULL; + pnfs_pageio_init(desc, inode); } /** diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 00b12824174..568ab0eef67 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1043,46 +1043,30 @@ out_forget_reply: goto out; } -static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, - struct nfs_page *prev, - struct nfs_page *req) -{ - if (pgio->pg_count == prev->wb_bytes) { - /* This is first coelesce call for a series of nfs_pages */ - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - prev->wb_context, - req_offset(req), - pgio->pg_count, - IOMODE_READ, - GFP_KERNEL); - } else if (pgio->pg_lseg && - req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length)) - return 0; - return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); -} - -void -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) +int +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) { - struct pnfs_layoutdriver_type *ld; + enum pnfs_iomode access_type; + gfp_t gfp_flags; - ld = NFS_SERVER(inode)->pnfs_curr_ld; - pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL; -} + /* We assume that pg_ioflags == 0 iff we're reading a page */ + if (pgio->pg_ioflags == 0) { + access_type = IOMODE_READ; + gfp_flags = GFP_KERNEL; + } else { + access_type = IOMODE_RW; + gfp_flags = GFP_NOFS; + } -static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, - struct nfs_page *prev, - struct nfs_page *req) -{ if (pgio->pg_count == prev->wb_bytes) { /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, req_offset(req), pgio->pg_count, - IOMODE_RW, - GFP_NOFS); + access_type, + gfp_flags); } else if (pgio->pg_lseg && req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length)) @@ -1090,15 +1074,6 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } -void -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode) -{ - struct pnfs_layoutdriver_type *ld; - - ld = NFS_SERVER(inode)->pnfs_curr_ld; - pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL; -} - /* * Called by non rpc-based layout drivers */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 3fda9714677..c056688ee92 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -158,8 +158,7 @@ enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, const struct rpc_call_ops *, int); enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, const struct rpc_call_ops *); -void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); -void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *); +int pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); @@ -293,6 +292,13 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } +static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, + struct inode *inode) +{ + if (NFS_SERVER(inode)->pnfs_curr_ld) + pgio->pg_test = pnfs_generic_pg_test; +} + #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -376,16 +382,9 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline void -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino) -{ - pgio->pg_test = NULL; -} - -static inline void -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino) +static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, + struct inode *inode) { - pgio->pg_test = NULL; } static inline void diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 540c8bc93f9..20a7f952e24 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -664,7 +664,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - pnfs_pageio_init_read(&pgio, inode); if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); else diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7edb72f27c2..e268e3b2349 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1036,8 +1036,6 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, { size_t wsize = NFS_SERVER(inode)->wsize; - pnfs_pageio_init_write(pgio, inode); - if (wsize < PAGE_CACHE_SIZE) nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); else -- cgit v1.2.3-70-g09d2 From 18ad0a9f2ccd260d37dd6bc5fa04c7819def4c84 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 25 May 2011 21:03:56 +0300 Subject: NFSv4.1: change pg_test return type to bool Signed-off-by: Benny Halevy --- fs/nfs/nfs4filelayout.c | 6 +++--- fs/nfs/objlayout/objio_osd.c | 7 +++---- fs/nfs/pagelist.c | 22 +++++++++++----------- fs/nfs/pnfs.c | 4 ++-- fs/nfs/pnfs.h | 4 ++-- include/linux/nfs_page.h | 2 +- 6 files changed, 22 insertions(+), 23 deletions(-) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 33bda24e8cd..24f05720daf 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -651,10 +651,10 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * - * return 1 : coalesce page - * return 0 : don't coalesce page + * return true : coalesce page + * return false : don't coalesce page */ -int +bool filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 3be124160e9..8c2bd3eb8e8 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -991,14 +991,13 @@ ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) /* * objlayout_pg_test(). Called by nfs_can_coalesce_requests() * - * return 1 : coalesce page - * return 0 : don't coalesce page + * return true iff coalesce page */ -int +bool objlayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - return 1; + return true; } static struct pnfs_layoutdriver_type objlayout_type = { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b8704fedcd1..5344371a257 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -244,29 +244,29 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, * * Return 'true' if this is the case, else return 'false'. */ -static int nfs_can_coalesce_requests(struct nfs_page *prev, - struct nfs_page *req, - struct nfs_pageio_descriptor *pgio) +static bool nfs_can_coalesce_requests(struct nfs_page *prev, + struct nfs_page *req, + struct nfs_pageio_descriptor *pgio) { if (req->wb_context->cred != prev->wb_context->cred) - return 0; + return false; if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) - return 0; + return false; if (req->wb_context->state != prev->wb_context->state) - return 0; + return false; if (req->wb_index != (prev->wb_index + 1)) - return 0; + return false; if (req->wb_pgbase != 0) - return 0; + return false; if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) - return 0; + return false; /* * Non-whole file layouts need to check that req is inside of * pgio->pg_lseg. */ if (pgio->pg_test && !pgio->pg_test(pgio, prev, req)) - return 0; - return 1; + return false; + return true; } /** diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 568ab0eef67..212fc292761 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1043,7 +1043,7 @@ out_forget_reply: goto out; } -int +bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { @@ -1070,7 +1070,7 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } else if (pgio->pg_lseg && req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length)) - return 0; + return false; return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index c056688ee92..65daae59c8a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -87,7 +87,7 @@ struct pnfs_layoutdriver_type { void (*free_lseg) (struct pnfs_layout_segment *lseg); /* test for nfs page cache coalescing */ - int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); /* Returns true if layoutdriver wants to divert this request to * driver's commit routine. @@ -158,7 +158,7 @@ enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, const struct rpc_call_ops *, int); enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, const struct rpc_call_ops *); -int pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); +bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 91af2e49fa3..3a34e80ae92 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -68,7 +68,7 @@ struct nfs_pageio_descriptor { int pg_ioflags; int pg_error; struct pnfs_layout_segment *pg_lseg; - int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) -- cgit v1.2.3-70-g09d2 From 89a58e32d9105c01022a757fb32ddc3b51bf0025 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 25 May 2011 20:54:40 +0300 Subject: NFSv4.1: use pnfs_generic_pg_test directly by layout driver Signed-off-by: Benny Halevy --- fs/nfs/nfs4filelayout.c | 3 +++ fs/nfs/objlayout/objio_osd.c | 14 +------------- fs/nfs/pnfs.c | 13 +++++++++---- fs/nfs/pnfs.h | 6 ++++-- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 24f05720daf..426908809c9 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -661,6 +661,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, u64 p_stripe, r_stripe; u32 stripe_unit; + if (!pnfs_generic_pg_test(pgio, prev, req)) + return 0; + if (!pgio->pg_lseg) return 1; p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 8c2bd3eb8e8..3b10d09e521 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -988,18 +988,6 @@ ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) return _write_exec(ios); } -/* - * objlayout_pg_test(). Called by nfs_can_coalesce_requests() - * - * return true iff coalesce page - */ -bool -objlayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, - struct nfs_page *req) -{ - return true; -} - static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", @@ -1013,7 +1001,7 @@ static struct pnfs_layoutdriver_type objlayout_type = { .read_pagelist = objlayout_read_pagelist, .write_pagelist = objlayout_write_pagelist, - .pg_test = objlayout_pg_test, + .pg_test = pnfs_generic_pg_test, .free_deviceid_node = objio_free_deviceid_node, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 212fc292761..d79f2df33a4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1067,12 +1067,17 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, pgio->pg_count, access_type, gfp_flags); - } else if (pgio->pg_lseg && - req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length)) + return true; + } + + if (pgio->pg_lseg && + req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length)) return false; - return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); + + return true; } +EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); /* * Called by non rpc-based layout drivers diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 65daae59c8a..48d0a8e4d06 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -295,8 +295,10 @@ static inline int pnfs_return_layout(struct inode *ino) static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, struct inode *inode) { - if (NFS_SERVER(inode)->pnfs_curr_ld) - pgio->pg_test = pnfs_generic_pg_test; + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + + if (ld) + pgio->pg_test = ld->pg_test; } #else /* CONFIG_NFS_V4_1 */ -- cgit v1.2.3-70-g09d2 From 5b36c7dc41d87d39e779a84fdc2b44b39bba32ca Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 29 May 2011 11:45:39 +0300 Subject: NFSv4.1: define nfs_generic_pg_test By default, unless pnfs is used coalesce pages until pg_bsize (rsize or wsize) is reached. pnfs layout drivers define their own pg_test methods that use pnfs_generic_pg_test and need to define their own I/O size limits (e.g. based on the file stripe size). [Move a check from nfs_pageio_do_add_request to nfs_generic_pg_test] Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- fs/nfs/pagelist.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 5344371a257..7913961aff2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -204,6 +204,21 @@ nfs_wait_on_request(struct nfs_page *req) TASK_UNINTERRUPTIBLE); } +static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) +{ + /* + * FIXME: ideally we should be able to coalesce all requests + * that are not block boundary aligned, but currently this + * is problematic for the case of bsize < PAGE_CACHE_SIZE, + * since nfs_flush_multi and nfs_pagein_multi assume you + * can have only one struct nfs_page. + */ + if (desc->pg_bsize < PAGE_SIZE) + return 0; + + return desc->pg_count + req->wb_bytes <= desc->pg_bsize; +} + /** * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor @@ -229,7 +244,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; - desc->pg_test = NULL; + desc->pg_test = nfs_generic_pg_test; pnfs_pageio_init(desc, inode); } @@ -260,13 +275,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) return false; - /* - * Non-whole file layouts need to check that req is inside of - * pgio->pg_lseg. - */ - if (pgio->pg_test && !pgio->pg_test(pgio, prev, req)) - return false; - return true; + return pgio->pg_test(pgio, prev, req); } /** @@ -280,31 +289,18 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { - size_t newlen = req->wb_bytes; - if (desc->pg_count != 0) { struct nfs_page *prev; - /* - * FIXME: ideally we should be able to coalesce all requests - * that are not block boundary aligned, but currently this - * is problematic for the case of bsize < PAGE_CACHE_SIZE, - * since nfs_flush_multi and nfs_pagein_multi assume you - * can have only one struct nfs_page. - */ - if (desc->pg_bsize < PAGE_SIZE) - return 0; - newlen += desc->pg_count; - if (newlen > desc->pg_bsize) - return 0; prev = nfs_list_entry(desc->pg_list.prev); if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; - } else + } else { desc->pg_base = req->wb_pgbase; + } nfs_list_remove_request(req); nfs_list_add_request(req, &desc->pg_list); - desc->pg_count = newlen; + desc->pg_count += req->wb_bytes; return 1; } -- cgit v1.2.3-70-g09d2 From 9342077011d54f42fa1b88b7bc1f7008dcf5fff9 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 25 May 2011 21:25:29 +0300 Subject: pnfs-obj: pg_test check for max_io_size Implement pg_test vector to test for max IO sizes. We calculate a max_io_size member only once, and cache it in lseg so to not do so on every page insert. Signed-off-by: Boaz Harrosh [simplify logic] Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objio_osd.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 3b10d09e521..9cf208df1f2 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -128,6 +128,8 @@ struct objio_segment { u64 group_depth; unsigned group_count; + unsigned max_io_size; + unsigned comps_index; unsigned num_comps; /* variable length */ @@ -365,6 +367,11 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp, objio_seg->group_count = 1; } + /* Cache this calculation it will hit for every page */ + objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - + objio_seg->stripe_unit) * + objio_seg->group_width; + *outp = &objio_seg->lseg; return 0; @@ -988,6 +995,16 @@ ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) return _write_exec(ios); } +static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, + struct nfs_page *prev, struct nfs_page *req) +{ + if (!pnfs_generic_pg_test(pgio, prev, req)) + return false; + + return pgio->pg_count + req->wb_bytes <= + OBJIO_LSEG(pgio->pg_lseg)->max_io_size; +} + static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", @@ -1001,7 +1018,7 @@ static struct pnfs_layoutdriver_type objlayout_type = { .read_pagelist = objlayout_read_pagelist, .write_pagelist = objlayout_write_pagelist, - .pg_test = pnfs_generic_pg_test, + .pg_test = objio_pg_test, .free_deviceid_node = objio_free_deviceid_node, -- cgit v1.2.3-70-g09d2 From ef1d57599dc904fdb31b8e9b5336350d21a1fde1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2011 13:46:08 +0100 Subject: cifs/ubifs: Fix shrinker API change fallout Commit 1495f230fa77 ("vmscan: change shrinker API by passing shrink_control struct") changed the API of ->shrink(), but missed ubifs and cifs instances. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/cifs/cifsacl.c | 3 ++- fs/ubifs/shrinker.c | 3 ++- fs/ubifs/ubifs.h | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 8f1700623b4..21de1d6d584 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -74,8 +74,9 @@ shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem, * Run idmap cache shrinker. */ static int -cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +cifs_idmap_shrinker(struct shrinker *shrink, struct shrink_control *sc) { + int nr_to_scan = sc->nr_to_scan; int nr_del = 0; int nr_rem = 0; struct rb_root *root; diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 46961c00323..ca953a94502 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -277,8 +277,9 @@ static int kick_a_thread(void) return 0; } -int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) +int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) { + int nr = sc->nr_to_scan; int freed, contention = 0; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 93d1412a06f..a70d7b4ffb2 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1614,7 +1614,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); /* shrinker.c */ -int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); +int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc); /* commit.c */ int ubifs_bg_thread(void *info); -- cgit v1.2.3-70-g09d2 From 6345d24daf0c1fffe6642081d783cdf653ebaa5c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 May 2011 11:32:28 -0700 Subject: mm: Fix boot crash in mm_alloc() Thomas Gleixner reports that we now have a boot crash triggered by CONFIG_CPUMASK_OFFSTACK=y: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] find_next_bit+0x55/0xb0 Call Trace: [] cpumask_any_but+0x2a/0x70 [] flush_tlb_mm+0x2b/0x80 [] pud_populate+0x35/0x50 [] pgd_alloc+0x9a/0xf0 [] mm_init+0xec/0x120 [] mm_alloc+0x53/0xd0 which was introduced by commit de03c72cfce5 ("mm: convert mm->cpu_vm_cpumask into cpumask_var_t"), and is due to wrong ordering of mm_init() vs mm_init_cpumask Thomas wrote a patch to just fix the ordering of initialization, but I hate the new double allocation in the fork path, so I ended up instead doing some more radical surgery to clean it all up. Reported-by: Thomas Gleixner Reported-by: Ingo Molnar Cc: KOSAKI Motohiro Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 14 ++++++++++++-- include/linux/sched.h | 1 - init/main.c | 2 +- kernel/fork.c | 42 ++++++++++-------------------------------- 4 files changed, 23 insertions(+), 36 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2a78aae78c6..027935c86c6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -264,6 +264,8 @@ struct mm_struct { struct linux_binfmt *binfmt; + cpumask_var_t cpu_vm_mask_var; + /* Architecture-specific MM context */ mm_context_t context; @@ -311,10 +313,18 @@ struct mm_struct { #ifdef CONFIG_TRANSPARENT_HUGEPAGE pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif - - cpumask_var_t cpu_vm_mask_var; +#ifdef CONFIG_CPUMASK_OFFSTACK + struct cpumask cpumask_allocation; +#endif }; +static inline void mm_init_cpumask(struct mm_struct *mm) +{ +#ifdef CONFIG_CPUMASK_OFFSTACK + mm->cpu_vm_mask_var = &mm->cpumask_allocation; +#endif +} + /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) { diff --git a/include/linux/sched.h b/include/linux/sched.h index bcddd013810..2a8621c4be1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2194,7 +2194,6 @@ static inline void mmdrop(struct mm_struct * mm) if (unlikely(atomic_dec_and_test(&mm->mm_count))) __mmdrop(mm); } -extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm); /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); diff --git a/init/main.c b/init/main.c index d2f1e086bf3..cafba67c13b 100644 --- a/init/main.c +++ b/init/main.c @@ -487,6 +487,7 @@ asmlinkage void __init start_kernel(void) printk(KERN_NOTICE "%s", linux_banner); setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); + mm_init_cpumask(&init_mm); setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); @@ -510,7 +511,6 @@ asmlinkage void __init start_kernel(void) sort_main_extable(); trap_init(); mm_init(); - BUG_ON(mm_init_cpumask(&init_mm, 0)); /* * Set up the scheduler prior starting any interrupts (such as the diff --git a/kernel/fork.c b/kernel/fork.c index ca406d91671..0276c30401a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -484,20 +484,6 @@ static void mm_init_aio(struct mm_struct *mm) #endif } -int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm) -{ -#ifdef CONFIG_CPUMASK_OFFSTACK - if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL)) - return -ENOMEM; - - if (oldmm) - cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm)); - else - memset(mm_cpumask(mm), 0, cpumask_size()); -#endif - return 0; -} - static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) { atomic_set(&mm->mm_users, 1); @@ -538,17 +524,8 @@ struct mm_struct * mm_alloc(void) return NULL; memset(mm, 0, sizeof(*mm)); - mm = mm_init(mm, current); - if (!mm) - return NULL; - - if (mm_init_cpumask(mm, NULL)) { - mm_free_pgd(mm); - free_mm(mm); - return NULL; - } - - return mm; + mm_init_cpumask(mm); + return mm_init(mm, current); } /* @@ -559,7 +536,6 @@ struct mm_struct * mm_alloc(void) void __mmdrop(struct mm_struct *mm) { BUG_ON(mm == &init_mm); - free_cpumask_var(mm->cpu_vm_mask_var); mm_free_pgd(mm); destroy_context(mm); mmu_notifier_mm_destroy(mm); @@ -753,6 +729,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk) goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); + mm_init_cpumask(mm); /* Initializing for Swap token stuff */ mm->token_priority = 0; @@ -765,9 +742,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk) if (!mm_init(mm, tsk)) goto fail_nomem; - if (mm_init_cpumask(mm, oldmm)) - goto fail_nocpumask; - if (init_new_context(tsk, mm)) goto fail_nocontext; @@ -794,9 +768,6 @@ fail_nomem: return NULL; fail_nocontext: - free_cpumask_var(mm->cpu_vm_mask_var); - -fail_nocpumask: /* * If init_new_context() failed, we cannot use mmput() to free the mm * because it calls destroy_context() @@ -1591,6 +1562,13 @@ void __init proc_caches_init(void) fs_cachep = kmem_cache_create("fs_cache", sizeof(struct fs_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); + /* + * FIXME! The "sizeof(struct mm_struct)" currently includes the + * whole struct cpumask for the OFFSTACK case. We could change + * this to *only* allocate as much of it as required by the + * maximum number of CPU's we can ever have. The cpumask_allocation + * is at the end of the structure, exactly for that reason. + */ mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); -- cgit v1.2.3-70-g09d2 From 3b06b3ebf44170c90c893c6c80916db6e922b9f2 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 24 May 2011 03:49:02 -0500 Subject: eCryptfs: Fix new inode race condition Only unlock and d_add() new inodes after the plaintext inode size has been read from the lower filesystem. This fixes a race condition that was sometimes seen during a multi-job kernel build in an eCryptfs mount. https://bugzilla.kernel.org/show_bug.cgi?id=36002 Signed-off-by: Tyler Hicks Reported-by: David Tested-by: David --- fs/ecryptfs/crypto.c | 4 ++-- fs/ecryptfs/ecryptfs_kernel.h | 4 ++-- fs/ecryptfs/file.c | 2 +- fs/ecryptfs/inode.c | 42 ++++++++++++++++++++++-------------------- fs/ecryptfs/main.c | 6 +++--- 5 files changed, 30 insertions(+), 28 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index b8d5c809102..f48c4987a15 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1568,11 +1568,11 @@ out: } int ecryptfs_read_and_validate_xattr_region(char *page_virt, - struct dentry *ecryptfs_dentry) + struct inode *inode) { int rc; - rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry->d_inode); + rc = ecryptfs_read_xattr_region(page_virt, inode); if (rc) goto out; if (!contains_ecryptfs_marker(page_virt + ECRYPTFS_FILE_SIZE_BYTES)) { diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 41a45323637..72aa24a4c71 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -662,7 +662,7 @@ void ecryptfs_write_crypt_stat_flags(char *page_virt, int ecryptfs_read_and_validate_header_region(char *data, struct inode *ecryptfs_inode); int ecryptfs_read_and_validate_xattr_region(char *page_virt, - struct dentry *ecryptfs_dentry); + struct inode *inode); u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes); int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code); void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); @@ -753,7 +753,7 @@ int ecryptfs_privileged_open(struct file **lower_file, struct dentry *lower_dentry, struct vfsmount *lower_mnt, const struct cred *cred); -int ecryptfs_get_lower_file(struct dentry *ecryptfs_dentry); +int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode); void ecryptfs_put_lower_file(struct inode *inode); int ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 566e5472f78..4ec9eb00a24 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -191,7 +191,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | ECRYPTFS_ENCRYPTED); } mutex_unlock(&crypt_stat->cs_mutex); - rc = ecryptfs_get_lower_file(ecryptfs_dentry); + rc = ecryptfs_get_lower_file(ecryptfs_dentry, inode); if (rc) { printk(KERN_ERR "%s: Error attempting to initialize " "the lower file for the dentry with name " diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index fc7d2b74850..f0ad965d7d5 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -259,7 +259,8 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) "context; rc = [%d]\n", rc); goto out; } - rc = ecryptfs_get_lower_file(ecryptfs_dentry); + rc = ecryptfs_get_lower_file(ecryptfs_dentry, + ecryptfs_dentry->d_inode); if (rc) { printk(KERN_ERR "%s: Error attempting to initialize " "the lower file for the dentry with name " @@ -350,50 +351,51 @@ static int ecryptfs_lookup_interpose(struct dentry *ecryptfs_dentry, __func__, rc); goto out; } - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - d_add(ecryptfs_dentry, inode); - if (S_ISDIR(lower_inode->i_mode)) - goto out; - if (S_ISLNK(lower_inode->i_mode)) - goto out; - if (special_file(lower_inode->i_mode)) + if (!S_ISREG(inode->i_mode)) { + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + d_add(ecryptfs_dentry, inode); goto out; + } /* Released in this function */ page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER); if (!page_virt) { printk(KERN_ERR "%s: Cannot kmem_cache_zalloc() a page\n", __func__); rc = -ENOMEM; + make_bad_inode(inode); goto out; } - rc = ecryptfs_get_lower_file(ecryptfs_dentry); + rc = ecryptfs_get_lower_file(ecryptfs_dentry, inode); if (rc) { printk(KERN_ERR "%s: Error attempting to initialize " "the lower file for the dentry with name " "[%s]; rc = [%d]\n", __func__, ecryptfs_dentry->d_name.name, rc); + make_bad_inode(inode); goto out_free_kmem; } put_lower = 1; - crypt_stat = &ecryptfs_inode_to_private( - ecryptfs_dentry->d_inode)->crypt_stat; + crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; /* TODO: lock for crypt_stat comparison */ if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) ecryptfs_set_default_sizes(crypt_stat); - rc = ecryptfs_read_and_validate_header_region(page_virt, - ecryptfs_dentry->d_inode); + rc = ecryptfs_read_and_validate_header_region(page_virt, inode); if (rc) { memset(page_virt, 0, PAGE_CACHE_SIZE); rc = ecryptfs_read_and_validate_xattr_region(page_virt, - ecryptfs_dentry); + inode); if (rc) { rc = 0; - goto out_free_kmem; + goto unlock_inode; } crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; } - ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode); + ecryptfs_i_size_init(page_virt, inode); +unlock_inode: + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + d_add(ecryptfs_dentry, inode); out_free_kmem: kmem_cache_free(ecryptfs_header_cache_2, page_virt); goto out; @@ -403,7 +405,7 @@ out_put: d_drop(ecryptfs_dentry); out: if (put_lower) - ecryptfs_put_lower_file(ecryptfs_dentry->d_inode); + ecryptfs_put_lower_file(inode); return rc; } @@ -843,7 +845,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, lower_ia->ia_valid &= ~ATTR_SIZE; return 0; } - rc = ecryptfs_get_lower_file(dentry); + rc = ecryptfs_get_lower_file(dentry, inode); if (rc) return rc; crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; @@ -999,7 +1001,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) mount_crypt_stat = &ecryptfs_superblock_to_private( dentry->d_sb)->mount_crypt_stat; - rc = ecryptfs_get_lower_file(dentry); + rc = ecryptfs_get_lower_file(dentry, inode); if (rc) { mutex_unlock(&crypt_stat->cs_mutex); goto out; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 7c697abab39..943a4f55ed6 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -135,12 +135,12 @@ static int ecryptfs_init_lower_file(struct dentry *dentry, return rc; } -int ecryptfs_get_lower_file(struct dentry *dentry) +int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode) { - struct ecryptfs_inode_info *inode_info = - ecryptfs_inode_to_private(dentry->d_inode); + struct ecryptfs_inode_info *inode_info; int count, rc = 0; + inode_info = ecryptfs_inode_to_private(inode); mutex_lock(&inode_info->lower_file_mutex); count = atomic_inc_return(&inode_info->lower_file_count); if (WARN_ON_ONCE(count < 1)) -- cgit v1.2.3-70-g09d2 From 7a86617e553f47761b10f57de472d7262562b7de Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 2 May 2011 00:39:54 -0500 Subject: eCryptfs: Return useful code from contains_ecryptfs_marker Instead of having the calling functions translate the true/false return code to either 0 or -EINVAL, have contains_ecryptfs_marker() return 0 or -EINVAL so that the calling functions can just reuse the return code. Also, rename the function to ecryptfs_validate_marker() to avoid callers mistakenly thinking that it returns true/false codes. Signed-off-by: Tyler Hicks --- fs/ecryptfs/crypto.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f48c4987a15..162f9baf9eb 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1024,25 +1024,25 @@ out: } /** - * contains_ecryptfs_marker - check for the ecryptfs marker + * ecryptfs_validate_marker - check for the ecryptfs marker * @data: The data block in which to check * - * Returns one if marker found; zero if not found + * Returns zero if marker found; -EINVAL if not found */ -static int contains_ecryptfs_marker(char *data) +static int ecryptfs_validate_marker(char *data) { u32 m_1, m_2; m_1 = get_unaligned_be32(data); m_2 = get_unaligned_be32(data + 4); if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) - return 1; + return 0; ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " "MAGIC_ECRYPTFS_MARKER = [0x%.8x]\n", m_1, m_2, MAGIC_ECRYPTFS_MARKER); ecryptfs_printk(KERN_DEBUG, "(m_1 ^ MAGIC_ECRYPTFS_MARKER) = " "[0x%.8x]\n", (m_1 ^ MAGIC_ECRYPTFS_MARKER)); - return 0; + return -EINVAL; } struct ecryptfs_flag_map_elem { @@ -1217,10 +1217,7 @@ int ecryptfs_read_and_validate_header_region(char *data, __func__, rc); goto out; } - if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { - rc = -EINVAL; - } else - rc = 0; + rc = ecryptfs_validate_marker(data + ECRYPTFS_FILE_SIZE_BYTES); out: return rc; } @@ -1496,11 +1493,9 @@ static int ecryptfs_read_headers_virt(char *page_virt, crypt_stat->mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; offset = ECRYPTFS_FILE_SIZE_BYTES; - rc = contains_ecryptfs_marker(page_virt + offset); - if (rc == 0) { - rc = -EINVAL; + rc = ecryptfs_validate_marker(page_virt + offset); + if (rc) goto out; - } if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED)) ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode); offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; @@ -1575,11 +1570,10 @@ int ecryptfs_read_and_validate_xattr_region(char *page_virt, rc = ecryptfs_read_xattr_region(page_virt, inode); if (rc) goto out; - if (!contains_ecryptfs_marker(page_virt + ECRYPTFS_FILE_SIZE_BYTES)) { + rc = ecryptfs_validate_marker(page_virt + ECRYPTFS_FILE_SIZE_BYTES); + if (rc) printk(KERN_WARNING "Valid data found in [%s] xattr, but " "the marker is invalid\n", ECRYPTFS_XATTR_NAME); - rc = -EINVAL; - } out: return rc; } -- cgit v1.2.3-70-g09d2 From 778aeb42a708d2a57e491d2cbb5a1e74f61270b9 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 24 May 2011 04:56:23 -0500 Subject: eCryptfs: Cleanup and optimize ecryptfs_lookup_interpose() ecryptfs_lookup_interpose() has turned into spaghetti code over the years. This is an effort to clean it up. - Shorten overly descriptive variable names such as ecryptfs_dentry - Simplify gotos and error paths - Create helper function for reading plaintext i_size from metadata It also includes an optimization when reading i_size from the metadata. A complete page-sized kmem_cache_alloc() was being done to read in 16 bytes of metadata. The buffer for that is now statically declared. Signed-off-by: Tyler Hicks --- fs/ecryptfs/crypto.c | 45 +++++++------- fs/ecryptfs/ecryptfs_kernel.h | 7 ++- fs/ecryptfs/inode.c | 141 +++++++++++++++++++----------------------- 3 files changed, 88 insertions(+), 105 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 162f9baf9eb..66d8e6748a4 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1201,24 +1201,19 @@ int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code) return rc; } -int ecryptfs_read_and_validate_header_region(char *data, - struct inode *ecryptfs_inode) +int ecryptfs_read_and_validate_header_region(struct inode *inode) { - struct ecryptfs_crypt_stat *crypt_stat = - &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); + u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES]; + u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES; int rc; - if (crypt_stat->extent_size == 0) - crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; - rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size, - ecryptfs_inode); - if (rc < 0) { - printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", - __func__, rc); - goto out; - } - rc = ecryptfs_validate_marker(data + ECRYPTFS_FILE_SIZE_BYTES); -out: + rc = ecryptfs_read_lower(file_size, 0, ECRYPTFS_SIZE_AND_MARKER_BYTES, + inode); + if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) + return rc >= 0 ? -EINVAL : rc; + rc = ecryptfs_validate_marker(marker); + if (!rc) + ecryptfs_i_size_init(file_size, inode); return rc; } @@ -1562,19 +1557,21 @@ out: return rc; } -int ecryptfs_read_and_validate_xattr_region(char *page_virt, +int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, struct inode *inode) { + u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES]; + u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES; int rc; - rc = ecryptfs_read_xattr_region(page_virt, inode); - if (rc) - goto out; - rc = ecryptfs_validate_marker(page_virt + ECRYPTFS_FILE_SIZE_BYTES); - if (rc) - printk(KERN_WARNING "Valid data found in [%s] xattr, but " - "the marker is invalid\n", ECRYPTFS_XATTR_NAME); -out: + rc = ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), + ECRYPTFS_XATTR_NAME, file_size, + ECRYPTFS_SIZE_AND_MARKER_BYTES); + if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) + return rc >= 0 ? -EINVAL : rc; + rc = ecryptfs_validate_marker(marker); + if (!rc) + ecryptfs_i_size_init(file_size, inode); return rc; } diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 72aa24a4c71..8297ddaca7c 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -200,6 +200,8 @@ ecryptfs_get_key_payload_data(struct key *key) #define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5 #define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */ #define ECRYPTFS_FILE_SIZE_BYTES (sizeof(u64)) +#define ECRYPTFS_SIZE_AND_MARKER_BYTES (ECRYPTFS_FILE_SIZE_BYTES \ + + MAGIC_ECRYPTFS_MARKER_SIZE_BYTES) #define ECRYPTFS_DEFAULT_CIPHER "aes" #define ECRYPTFS_DEFAULT_KEY_BYTES 16 #define ECRYPTFS_DEFAULT_HASH "md5" @@ -659,9 +661,8 @@ int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); void ecryptfs_write_crypt_stat_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, size_t *written); -int ecryptfs_read_and_validate_header_region(char *data, - struct inode *ecryptfs_inode); -int ecryptfs_read_and_validate_xattr_region(char *page_virt, +int ecryptfs_read_and_validate_header_region(struct inode *inode); +int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, struct inode *inode); u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes); int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index f0ad965d7d5..7349ade17de 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -307,105 +307,90 @@ out: return rc; } +static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode) +{ + struct ecryptfs_crypt_stat *crypt_stat; + int rc; + + rc = ecryptfs_get_lower_file(dentry, inode); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize " + "the lower file for the dentry with name " + "[%s]; rc = [%d]\n", __func__, + dentry->d_name.name, rc); + return rc; + } + + crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; + /* TODO: lock for crypt_stat comparison */ + if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) + ecryptfs_set_default_sizes(crypt_stat); + + rc = ecryptfs_read_and_validate_header_region(inode); + ecryptfs_put_lower_file(inode); + if (rc) { + rc = ecryptfs_read_and_validate_xattr_region(dentry, inode); + if (!rc) + crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; + } + + /* Must return 0 to allow non-eCryptfs files to be looked up, too */ + return 0; +} + /** * ecryptfs_lookup_interpose - Dentry interposition for a lookup */ -static int ecryptfs_lookup_interpose(struct dentry *ecryptfs_dentry, +static int ecryptfs_lookup_interpose(struct dentry *dentry, struct dentry *lower_dentry, - struct inode *ecryptfs_dir_inode) + struct inode *dir_inode) { - struct dentry *lower_dir_dentry; + struct inode *inode, *lower_inode = lower_dentry->d_inode; + struct ecryptfs_dentry_info *dentry_info; struct vfsmount *lower_mnt; - struct inode *inode, *lower_inode; - struct ecryptfs_crypt_stat *crypt_stat; - char *page_virt = NULL; - int put_lower = 0, rc = 0; - - lower_dir_dentry = lower_dentry->d_parent; - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( - ecryptfs_dentry->d_parent)); - lower_inode = lower_dentry->d_inode; - fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode); + int rc = 0; + + lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); + fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); BUG_ON(!lower_dentry->d_count); - ecryptfs_set_dentry_private(ecryptfs_dentry, - kmem_cache_alloc(ecryptfs_dentry_info_cache, - GFP_KERNEL)); - if (!ecryptfs_dentry_to_private(ecryptfs_dentry)) { - rc = -ENOMEM; + + dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); + ecryptfs_set_dentry_private(dentry, dentry_info); + if (!dentry_info) { printk(KERN_ERR "%s: Out of memory whilst attempting " "to allocate ecryptfs_dentry_info struct\n", __func__); - goto out_put; + dput(lower_dentry); + mntput(lower_mnt); + d_drop(dentry); + return -ENOMEM; } - ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry); - ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt); + ecryptfs_set_dentry_lower(dentry, lower_dentry); + ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); + if (!lower_dentry->d_inode) { /* We want to add because we couldn't find in lower */ - d_add(ecryptfs_dentry, NULL); - goto out; + d_add(dentry, NULL); + return 0; } - inode = __ecryptfs_get_inode(lower_inode, ecryptfs_dir_inode->i_sb); + inode = __ecryptfs_get_inode(lower_inode, dir_inode->i_sb); if (IS_ERR(inode)) { - rc = PTR_ERR(inode); - printk(KERN_ERR "%s: Error interposing; rc = [%d]\n", - __func__, rc); - goto out; - } - if (!S_ISREG(inode->i_mode)) { - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - d_add(ecryptfs_dentry, inode); - goto out; - } - /* Released in this function */ - page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER); - if (!page_virt) { - printk(KERN_ERR "%s: Cannot kmem_cache_zalloc() a page\n", - __func__); - rc = -ENOMEM; - make_bad_inode(inode); - goto out; - } - rc = ecryptfs_get_lower_file(ecryptfs_dentry, inode); - if (rc) { - printk(KERN_ERR "%s: Error attempting to initialize " - "the lower file for the dentry with name " - "[%s]; rc = [%d]\n", __func__, - ecryptfs_dentry->d_name.name, rc); - make_bad_inode(inode); - goto out_free_kmem; + printk(KERN_ERR "%s: Error interposing; rc = [%ld]\n", + __func__, PTR_ERR(inode)); + return PTR_ERR(inode); } - put_lower = 1; - crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; - /* TODO: lock for crypt_stat comparison */ - if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) - ecryptfs_set_default_sizes(crypt_stat); - rc = ecryptfs_read_and_validate_header_region(page_virt, inode); - if (rc) { - memset(page_virt, 0, PAGE_CACHE_SIZE); - rc = ecryptfs_read_and_validate_xattr_region(page_virt, - inode); + if (S_ISREG(inode->i_mode)) { + rc = ecryptfs_i_size_read(dentry, inode); if (rc) { - rc = 0; - goto unlock_inode; + make_bad_inode(inode); + return rc; } - crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; } - ecryptfs_i_size_init(page_virt, inode); -unlock_inode: + if (inode->i_state & I_NEW) unlock_new_inode(inode); - d_add(ecryptfs_dentry, inode); -out_free_kmem: - kmem_cache_free(ecryptfs_header_cache_2, page_virt); - goto out; -out_put: - dput(lower_dentry); - mntput(lower_mnt); - d_drop(ecryptfs_dentry); -out: - if (put_lower) - ecryptfs_put_lower_file(inode); + d_add(dentry, inode); + return rc; } -- cgit v1.2.3-70-g09d2 From 3063287053bca5207e121c567b95b2b6f0bdc2c8 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 24 May 2011 05:11:12 -0500 Subject: eCryptfs: Remove ecryptfs_header_cache_2 Now that ecryptfs_lookup_interpose() is no longer using ecryptfs_header_cache_2 to read in metadata, the kmem_cache can be removed and the ecryptfs_header_cache_1 kmem_cache can be renamed to ecryptfs_header_cache. Signed-off-by: Tyler Hicks --- fs/ecryptfs/crypto.c | 7 +++---- fs/ecryptfs/ecryptfs_kernel.h | 3 +-- fs/ecryptfs/main.c | 9 ++------- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 66d8e6748a4..58609bde3b9 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1234,8 +1234,7 @@ ecryptfs_write_header_metadata(char *virt, (*written) = 6; } -struct kmem_cache *ecryptfs_header_cache_1; -struct kmem_cache *ecryptfs_header_cache_2; +struct kmem_cache *ecryptfs_header_cache; /** * ecryptfs_write_headers_virt @@ -1601,7 +1600,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat, mount_crypt_stat); /* Read the first page from the underlying file */ - page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER); + page_virt = kmem_cache_alloc(ecryptfs_header_cache, GFP_USER); if (!page_virt) { rc = -ENOMEM; printk(KERN_ERR "%s: Unable to allocate page_virt\n", @@ -1646,7 +1645,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) out: if (page_virt) { memset(page_virt, 0, PAGE_CACHE_SIZE); - kmem_cache_free(ecryptfs_header_cache_1, page_virt); + kmem_cache_free(ecryptfs_header_cache, page_virt); } return rc; } diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 8297ddaca7c..43c7c43b06f 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -605,8 +605,7 @@ extern struct kmem_cache *ecryptfs_file_info_cache; extern struct kmem_cache *ecryptfs_dentry_info_cache; extern struct kmem_cache *ecryptfs_inode_info_cache; extern struct kmem_cache *ecryptfs_sb_info_cache; -extern struct kmem_cache *ecryptfs_header_cache_1; -extern struct kmem_cache *ecryptfs_header_cache_2; +extern struct kmem_cache *ecryptfs_header_cache; extern struct kmem_cache *ecryptfs_xattr_cache; extern struct kmem_cache *ecryptfs_key_record_cache; extern struct kmem_cache *ecryptfs_key_sig_cache; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 943a4f55ed6..9f1bb747d77 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -635,13 +635,8 @@ static struct ecryptfs_cache_info { .size = sizeof(struct ecryptfs_sb_info), }, { - .cache = &ecryptfs_header_cache_1, - .name = "ecryptfs_headers_1", - .size = PAGE_CACHE_SIZE, - }, - { - .cache = &ecryptfs_header_cache_2, - .name = "ecryptfs_headers_2", + .cache = &ecryptfs_header_cache, + .name = "ecryptfs_headers", .size = PAGE_CACHE_SIZE, }, { -- cgit v1.2.3-70-g09d2 From fac04863cef53a69830590b2e1c54345068a9747 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 May 2011 14:06:42 -0700 Subject: arm gpio drivers: make them 'depends on ARM' We had a few drivers move from arch/arm into drivers/gpio, but they don't actually compile without the ARM platform headers etc. As a result they were messing up allyesconfig on x86. Make them depend on ARM. Reported-by: Ingo Molnar Cc: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 592397629dd..4a7f6314345 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -89,24 +89,28 @@ config GPIO_IT8761E config GPIO_EXYNOS4 bool "Samsung Exynos4 GPIO library support" default y if CPU_EXYNOS4210 + depends on ARM help Say yes here to support Samsung Exynos4 series SoCs GPIO library config GPIO_PLAT_SAMSUNG bool "Samsung SoCs GPIO library support" default y if SAMSUNG_GPIOLIB_4BIT + depends on ARM help Say yes here to support Samsung SoCs GPIO library config GPIO_S5PC100 bool "Samsung S5PC100 GPIO library support" default y if CPU_S5PC100 + depends on ARM help Say yes here to support Samsung S5PC100 SoCs GPIO library config GPIO_S5PV210 bool "Samsung S5PV210/S5PC110 GPIO library support" default y if CPU_S5PV210 + depends on ARM help Say yes here to support Samsung S5PV210/S5PC110 SoCs GPIO library -- cgit v1.2.3-70-g09d2 From 55922c9d1b84b89cb946c777fddccb3247e7df2c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 May 2011 17:43:36 -0700 Subject: Linux 3.0-rc1 .. except there are various scripts that really know that there are three numbers, so it calls itself "3.0.0-rc1". Hopefully by the time the final 3.0 is out, we'll have that extra zero all figured out. Signed-off-by: Linus Torvalds --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 529d93fa243..afb8e0d26f2 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ -VERSION = 2 -PATCHLEVEL = 6 -SUBLEVEL = 39 -EXTRAVERSION = -NAME = Flesh-Eating Bats with Fangs +VERSION = 3 +PATCHLEVEL = 0 +SUBLEVEL = 0 +EXTRAVERSION = -rc1 +NAME = Sneaky Weasel # *DOCUMENTATION* # To see a list of typical targets execute "make help" -- cgit v1.2.3-70-g09d2 From 15517f7c213442e4d8a098cf0732b237f764c576 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 May 2011 11:14:08 -0600 Subject: lguest: fix timer interrupt setup Without an IRQ chip set, we now get a WARN_ON and no timer interrupt. This prevents booting. Fortunately, the fix is a one-liner: set up the timer IRQ like everything else. Signed-off-by: Rusty Russell Cc: stable@kernel.org # .39.x --- arch/x86/lguest/boot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index e191c096ab9..db832fd65ec 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -993,6 +993,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) static void lguest_time_init(void) { /* Set up the timer interrupt (0) to go to our simple timer routine */ + lguest_setup_irq(0); irq_set_handler(0, lguest_time_irq); clocksource_register_hz(&lguest_clock, NSEC_PER_SEC); -- cgit v1.2.3-70-g09d2 From bc805a03c26e1e25171bc627c6264553d27f746c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 May 2011 11:14:11 -0600 Subject: lguest: fix up compilation after move ed16648eb5b86917f0b90bdcdbc857202da72f90 "Move kvm, uml, and lguest subdirectories" broke the lguest example launcher. Signed-off-by: Rusty Russell --- Documentation/virtual/lguest/Makefile | 2 +- Documentation/virtual/lguest/lguest.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile index bebac6b4f33..0ac34206f7a 100644 --- a/Documentation/virtual/lguest/Makefile +++ b/Documentation/virtual/lguest/Makefile @@ -1,5 +1,5 @@ # This creates the demonstration utility "lguest" which runs a Linux guest. -# Missing headers? Add "-I../../include -I../../arch/x86/include" +# Missing headers? Add "-I../../../include -I../../../arch/x86/include" CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE all: lguest diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c index d9da7e14853..711ba9e9a00 100644 --- a/Documentation/virtual/lguest/lguest.c +++ b/Documentation/virtual/lguest/lguest.c @@ -49,7 +49,7 @@ #include #include #include -#include "../../include/linux/lguest_launcher.h" +#include "../../../include/linux/lguest_launcher.h" /*L:110 * We can ignore the 42 include files we need for this program, but I do want * to draw attention to the use of kernel-style types. -- cgit v1.2.3-70-g09d2 From 990c91f0af46c57f0291060d928c7ab82f9d5667 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 May 2011 11:14:12 -0600 Subject: lguest: remove support for VIRTIO_F_NOTIFY_ON_EMPTY. No virtio device does this any more, so no need to clutter lguest with it. Signed-off-by: Rusty Russell --- Documentation/virtual/lguest/lguest.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c index 711ba9e9a00..cd9d6af61d0 100644 --- a/Documentation/virtual/lguest/lguest.c +++ b/Documentation/virtual/lguest/lguest.c @@ -135,9 +135,6 @@ struct device { /* Is it operational */ bool running; - /* Does Guest want an intrrupt on empty? */ - bool irq_on_empty; - /* Device-specific data. */ void *priv; }; @@ -637,10 +634,7 @@ static void trigger_irq(struct virtqueue *vq) /* If they don't want an interrupt, don't send one... */ if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { - /* ... unless they've asked us to force one on empty. */ - if (!vq->dev->irq_on_empty - || lg_last_avail(vq) != vq->vring.avail->idx) - return; + return; } /* Send the Guest an interrupt tell them we used something up. */ @@ -1057,15 +1051,6 @@ static void create_thread(struct virtqueue *vq) close(vq->eventfd); } -static bool accepted_feature(struct device *dev, unsigned int bit) -{ - const u8 *features = get_feature_bits(dev) + dev->feature_len; - - if (dev->feature_len < bit / CHAR_BIT) - return false; - return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT)); -} - static void start_device(struct device *dev) { unsigned int i; @@ -1079,8 +1064,6 @@ static void start_device(struct device *dev) verbose(" %02x", get_feature_bits(dev) [dev->feature_len+i]); - dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); - for (vq = dev->vq; vq; vq = vq->next) { if (vq->service) create_thread(vq); @@ -1564,7 +1547,6 @@ static void setup_tun_net(char *arg) /* Set up the tun device. */ configure_device(ipfd, tapif, ip); - add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); /* Expect Guest to handle everything except UFO */ add_feature(dev, VIRTIO_NET_F_CSUM); add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); -- cgit v1.2.3-70-g09d2 From 7a7c924cf03da2a76ea4dc0aac1a788cf95a9c29 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Feb 2011 21:43:48 +0100 Subject: virtio_blk: allow re-reading config space at runtime Wire up the virtio_driver config_changed method to get notified about config changes raised by the host. For now we just re-read the device size to support online resizing of devices, but once we add more attributes that might be changeable they could be added as well. Note that the config_changed method is called from irq context, so we'll have to use the workqueue infrastructure to provide us a proper user context for our changes. Signed-off-by: Christoph Hellwig Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 88 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 78 insertions(+), 10 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6ecf89cdf00..33a48a80c7e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -6,10 +6,12 @@ #include #include #include +#include #define PART_BITS 4 static int major, index; +struct workqueue_struct *virtblk_wq; struct virtio_blk { @@ -26,6 +28,9 @@ struct virtio_blk mempool_t *pool; + /* Process context for config space updates */ + struct work_struct config_work; + /* What host tells us, plus 2 for header & tailer. */ unsigned int sg_elems; @@ -291,6 +296,46 @@ static ssize_t virtblk_serial_show(struct device *dev, } DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); +static void virtblk_config_changed_work(struct work_struct *work) +{ + struct virtio_blk *vblk = + container_of(work, struct virtio_blk, config_work); + struct virtio_device *vdev = vblk->vdev; + struct request_queue *q = vblk->disk->queue; + char cap_str_2[10], cap_str_10[10]; + u64 capacity, size; + + /* Host must always specify the capacity. */ + vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), + &capacity, sizeof(capacity)); + + /* If capacity is too big, truncate with warning. */ + if ((sector_t)capacity != capacity) { + dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", + (unsigned long long)capacity); + capacity = (sector_t)-1; + } + + size = capacity * queue_logical_block_size(q); + string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); + string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); + + dev_notice(&vdev->dev, + "new size: %llu %d-byte logical blocks (%s/%s)\n", + (unsigned long long)capacity, + queue_logical_block_size(q), + cap_str_10, cap_str_2); + + set_capacity(vblk->disk, capacity); +} + +static void virtblk_config_changed(struct virtio_device *vdev) +{ + struct virtio_blk *vblk = vdev->priv; + + queue_work(virtblk_wq, &vblk->config_work); +} + static int __devinit virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -327,6 +372,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) vblk->vdev = vdev; vblk->sg_elems = sg_elems; sg_init_table(vblk->sg, vblk->sg_elems); + INIT_WORK(&vblk->config_work, virtblk_config_changed_work); /* We expect one virtqueue, for output. */ vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); @@ -477,6 +523,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + flush_work(&vblk->config_work); + /* Nothing should be pending. */ BUG_ON(!list_empty(&vblk->reqs)); @@ -508,27 +556,47 @@ static unsigned int features[] = { * Use __refdata to avoid this warning. */ static struct virtio_driver __refdata virtio_blk = { - .feature_table = features, - .feature_table_size = ARRAY_SIZE(features), - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = virtblk_probe, - .remove = __devexit_p(virtblk_remove), + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtblk_probe, + .remove = __devexit_p(virtblk_remove), + .config_changed = virtblk_config_changed, }; static int __init init(void) { + int error; + + virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); + if (!virtblk_wq) + return -ENOMEM; + major = register_blkdev(0, "virtblk"); - if (major < 0) - return major; - return register_virtio_driver(&virtio_blk); + if (major < 0) { + error = major; + goto out_destroy_workqueue; + } + + error = register_virtio_driver(&virtio_blk); + if (error) + goto out_unregister_blkdev; + return 0; + +out_unregister_blkdev: + unregister_blkdev(major, "virtblk"); +out_destroy_workqueue: + destroy_workqueue(virtblk_wq); + return error; } static void __exit fini(void) { unregister_blkdev(major, "virtblk"); unregister_virtio_driver(&virtio_blk); + destroy_workqueue(virtblk_wq); } module_init(init); module_exit(fini); -- cgit v1.2.3-70-g09d2 From 6917f83ffe5e6b6414ccc845263b792ed201c0f1 Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Sun, 24 Apr 2011 02:49:26 +0800 Subject: drivers, block: virtio_blk: Replace cryptic number with the macro It is easier to figure out the context by reading SCSI_SENSE_BUFFERSIZE instead of plain '96'. Signed-off-by: Liu Yuan Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 33a48a80c7e..079c08808d8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -7,6 +7,7 @@ #include #include #include +#include #define PART_BITS 4 @@ -146,7 +147,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, num = blk_rq_map_sg(q, vbr->req, vblk->sg + out); if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) { - sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96); + sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE); sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr, sizeof(vbr->in_hdr)); } -- cgit v1.2.3-70-g09d2 From 177dbd95637a52b9118aca757d5856ec3995d3e7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 May 2011 11:14:13 -0600 Subject: virtio console: don't manually set or finalize VIRTIO_CONSOLE_F_MULTIPORT. That's already been done by the virtio infrastructure before the probe function is called. Reported-by: alexey.kardashevskiy@au1.ibm.com Acked-by: Amit Shah Tested-by: Amit Shah Signed-off-by: Rusty Russell --- drivers/char/virtio_console.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 838568a7dbf..fb68b129537 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1677,17 +1677,12 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) portdev->config.max_nr_ports = 1; if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) { multiport = true; - vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT; - vdev->config->get(vdev, offsetof(struct virtio_console_config, max_nr_ports), &portdev->config.max_nr_ports, sizeof(portdev->config.max_nr_ports)); } - /* Let the Host know we support multiple ports.*/ - vdev->config->finalize_features(vdev); - err = init_vqs(portdev); if (err < 0) { dev_err(&vdev->dev, "Error %d initializing vqs\n", err); -- cgit v1.2.3-70-g09d2 From bf50e69f63d21091e525185c3ae761412be0ba72 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Apr 2011 10:43:25 -0700 Subject: virtio balloon: kill tell-host-first logic The virtio balloon driver has a VIRTIO_BALLOON_F_MUST_TELL_HOST feature bit. Whenever the bit is set, the guest kernel must always tell the host before we free pages back to the allocator. Without this feature, we might free a page (and have another user touch it) while the hypervisor is unprepared for it. But, if the bit is _not_ set, we are under no obligation to reverse the order; we're under no obligation to do _anything_. As of now, qemu-kvm defines the bit, but doesn't set it. This patch makes the "tell host first" logic the only case. This should make everybody happy, and reduce the amount of untested or untestable code in the kernel. This _also_ means that we don't have to preserve a pfn list after the pages are freed, which should let us get rid of some temporary storage (vb->pfns) eventually. Signed-off-by: Dave Hansen Signed-off-by: Rusty Russell --- drivers/virtio/virtio_balloon.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 0f1da45ba47..e058ace2a4a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -40,9 +40,6 @@ struct virtio_balloon /* Waiting for host to ack the pages we released. */ struct completion acked; - /* Do we have to tell Host *before* we reuse pages? */ - bool tell_host_first; - /* The pages we've told the Host we're not using. */ unsigned int num_pages; struct list_head pages; @@ -151,13 +148,14 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) vb->num_pages--; } - if (vb->tell_host_first) { - tell_host(vb, vb->deflate_vq); - release_pages_by_pfn(vb->pfns, vb->num_pfns); - } else { - release_pages_by_pfn(vb->pfns, vb->num_pfns); - tell_host(vb, vb->deflate_vq); - } + + /* + * Note that if + * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); + * is true, we *have* to do it in this order + */ + tell_host(vb, vb->deflate_vq); + release_pages_by_pfn(vb->pfns, vb->num_pfns); } static inline void update_stat(struct virtio_balloon *vb, int idx, @@ -325,9 +323,6 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_del_vqs; } - vb->tell_host_first - = virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); - return 0; out_del_vqs: -- cgit v1.2.3-70-g09d2 From a1b383870a28cfbd1657d4922c0fafc634a62ebd Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 May 2011 11:14:13 -0600 Subject: virtio: add full three-clause BSD text to headers. It's unclear to me if it's important, but it's obviously causing my technical colleages some headaches and I'd hate such imprecision to slow virtio adoption. I've emailed this to all non-trivial contributors for approval, too. Signed-off-by: Rusty Russell Acked-by: Grant Likely Acked-by: Ryan Harper Acked-by: Anthony Liguori Acked-by: Eric Van Hensbergen Acked-by: john cooper Acked-by: Aneesh Kumar K.V Acked-by: Christian Borntraeger Acked-by: Fernando Luis Vazquez Cao --- include/linux/virtio_9p.h | 25 ++++++++++++++++++++++++- include/linux/virtio_balloon.h | 25 ++++++++++++++++++++++++- include/linux/virtio_blk.h | 25 ++++++++++++++++++++++++- include/linux/virtio_config.h | 25 ++++++++++++++++++++++++- include/linux/virtio_console.h | 26 +++++++++++++++++++++++++- include/linux/virtio_ids.h | 24 +++++++++++++++++++++++- include/linux/virtio_net.h | 25 ++++++++++++++++++++++++- include/linux/virtio_pci.h | 23 +++++++++++++++++++++++ include/linux/virtio_ring.h | 23 +++++++++++++++++++++++ 9 files changed, 214 insertions(+), 7 deletions(-) diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index e68b439b286..277c4ad44e8 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_9P_H #define _LINUX_VIRTIO_9P_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index a50ecd1b81a..652dc8bea92 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_BALLOON_H #define _LINUX_VIRTIO_BALLOON_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 167720d695e..e0edb40ca7a 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_BLK_H #define _LINUX_VIRTIO_BLK_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 800617b4ddd..39c88c5ad19 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_CONFIG_H #define _LINUX_VIRTIO_CONFIG_H /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers. */ + * anyone can use the definitions to implement compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ /* Virtio devices use a standardized configuration space to define their * features and pass configuration information, but each implementation can diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index e4d333543a3..bdf4b003473 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -5,7 +5,31 @@ #include /* * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers. + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * Copyright (C) Red Hat, Inc., 2009, 2010, 2011 * Copyright (C) Amit Shah , 2009, 2010, 2011 diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h index 06660c0a78d..85bb0bb66ff 100644 --- a/include/linux/virtio_ids.h +++ b/include/linux/virtio_ids.h @@ -5,7 +5,29 @@ * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. - */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #define VIRTIO_ID_NET 1 /* virtio net */ #define VIRTIO_ID_BLOCK 2 /* virtio block */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 085e42298ce..136040bba3e 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_NET_H #define _LINUX_VIRTIO_NET_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index 9a3d7c48c62..ea66f3f60d6 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -11,6 +11,29 @@ * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #ifndef _LINUX_VIRTIO_PCI_H diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index e4d144b132b..a813e5d460e 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -7,6 +7,29 @@ * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * * Copyright Rusty Russell IBM Corporation 2007. */ #include -- cgit v1.2.3-70-g09d2 From 770b31a85e000b0194974922f238a30ade4246b6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:10:17 +0300 Subject: virtio: event index interface Define a new feature bit for the guest and host to utilize an event index (like Xen) instead if a flag bit to enable/disable interrupts and kicks. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- include/linux/virtio_ring.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index a813e5d460e..c4eef73deb3 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -52,6 +52,12 @@ /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC 28 +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ struct vring_desc { /* Address (guest-physical). */ @@ -106,6 +112,7 @@ struct vring { * __u16 avail_flags; * __u16 avail_idx; * __u16 available[num]; + * __u16 used_event_idx; * * // Padding to the next align boundary. * char pad[]; @@ -114,8 +121,14 @@ struct vring { * __u16 used_flags; * __u16 used_idx; * struct vring_used_elem used[num]; + * __u16 avail_event_idx; * }; */ +/* We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num]) + static inline void vring_init(struct vring *vr, unsigned int num, void *p, unsigned long align) { @@ -130,7 +143,7 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) { return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) + align - 1) & ~(align - 1)) - + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; + + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; } #ifdef __KERNEL__ -- cgit v1.2.3-70-g09d2 From bf7035bf20563a6cadcb9e870406e7b21daf5e30 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:10:27 +0300 Subject: virtio ring: inline function to check for events With the new used_event and avail_event and features, both host and guest need similar logic to check whether events are enabled, so it helps to put the common code in the header. Note that Xen has similar logic for notification hold-off in include/xen/interface/io/ring.h with req_event and req_prod corresponding to event_idx + 1 and new_idx respectively. +1 comes from the fact that req_event and req_prod in Xen start at 1, while event index in virtio starts at 0. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- include/linux/virtio_ring.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index c4eef73deb3..4a32cb6da42 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -146,6 +146,20 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; } +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other size, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); +} + #ifdef __KERNEL__ #include struct virtio_device; -- cgit v1.2.3-70-g09d2 From a5c262c5fd83ece01bd649fb08416c501d4c59d7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:10:44 +0300 Subject: virtio_ring: support event idx feature Support for the new event idx feature: 1. When enabling interrupts, publish the current avail index value to the host to get interrupts on the next update. 2. Use the new avail_event feature to reduce the number of exits from the guest. Simple test with the simulator: [virtio]# time ./virtio_test spurious wakeus: 0x7 real 0m0.169s user 0m0.140s sys 0m0.019s [virtio]# time ./virtio_test --no-event-idx spurious wakeus: 0x11 real 0m0.649s user 0m0.295s sys 0m0.335s Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b0043fb26a4..a5fadc40c44 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -82,6 +82,9 @@ struct vring_virtqueue /* Host supports indirect buffers */ bool indirect; + /* Host publishes avail event idx */ + bool event; + /* Number of free buffers */ unsigned int num_free; /* Head of free buffer list. */ @@ -237,18 +240,22 @@ EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp); void virtqueue_kick(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + u16 new, old; START_USE(vq); /* Descriptors and available array need to be set before we expose the * new available array entries. */ virtio_wmb(); - vq->vring.avail->idx += vq->num_added; + old = vq->vring.avail->idx; + new = vq->vring.avail->idx = old + vq->num_added; vq->num_added = 0; /* Need to update avail index before checking if we should notify */ virtio_mb(); - if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) + if (vq->event ? + vring_need_event(vring_avail_event(&vq->vring), new, old) : + !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) /* Prod other side to tell it about changes. */ vq->notify(&vq->vq); @@ -324,6 +331,14 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) ret = vq->data[i]; detach_buf(vq, i); vq->last_used_idx++; + /* If we expect an interrupt for the next entry, tell host + * by writing event index and flush out the write before + * the read in the next get_buf call. */ + if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { + vring_used_event(&vq->vring) = vq->last_used_idx; + virtio_mb(); + } + END_USE(vq); return ret; } @@ -345,7 +360,11 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) /* We optimistically turn back on interrupts, then check if there was * more to do. */ + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + vring_used_event(&vq->vring) = vq->last_used_idx; virtio_mb(); if (unlikely(more_used(vq))) { END_USE(vq); @@ -438,6 +457,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, #endif vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); + vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); /* No callback? Tell other side not to bother us. */ if (!callback) @@ -472,6 +492,8 @@ void vring_transport_features(struct virtio_device *vdev) switch (i) { case VIRTIO_RING_F_INDIRECT_DESC: break; + case VIRTIO_RING_F_EVENT_IDX: + break; default: /* We don't understand this bit. */ clear_bit(i, vdev->features); -- cgit v1.2.3-70-g09d2 From 8ea8cf89e19aeb596b818ee5f2bec8a8b0586b60 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:10:54 +0300 Subject: vhost: support event index Support the new event index feature. When acked, utilize it to reduce the # of interrupts sent to the guest. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- drivers/vhost/net.c | 12 ++--- drivers/vhost/test.c | 6 +-- drivers/vhost/vhost.c | 138 +++++++++++++++++++++++++++++++++++++------------- drivers/vhost/vhost.h | 21 +++++--- 4 files changed, 127 insertions(+), 50 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2f7c76a85e5..e224a92baa1 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -144,7 +144,7 @@ static void handle_tx(struct vhost_net *net) } mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); if (wmem < sock->sk->sk_sndbuf / 2) tx_poll_stop(net); @@ -166,8 +166,8 @@ static void handle_tx(struct vhost_net *net) set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); break; } - if (unlikely(vhost_enable_notify(vq))) { - vhost_disable_notify(vq); + if (unlikely(vhost_enable_notify(&net->dev, vq))) { + vhost_disable_notify(&net->dev, vq); continue; } break; @@ -315,7 +315,7 @@ static void handle_rx(struct vhost_net *net) return; mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); vhost_hlen = vq->vhost_hlen; sock_hlen = vq->sock_hlen; @@ -334,10 +334,10 @@ static void handle_rx(struct vhost_net *net) break; /* OK, now we need to know about added descriptors. */ if (!headcount) { - if (unlikely(vhost_enable_notify(vq))) { + if (unlikely(vhost_enable_notify(&net->dev, vq))) { /* They have slipped one in as we were * doing that: check again. */ - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); continue; } /* Nothing new? Wait for eventfd to tell us diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 099f30230d0..734e1d74ad8 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -49,7 +49,7 @@ static void handle_vq(struct vhost_test *n) return; mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&n->dev, vq); for (;;) { head = vhost_get_vq_desc(&n->dev, vq, vq->iov, @@ -61,8 +61,8 @@ static void handle_vq(struct vhost_test *n) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { - if (unlikely(vhost_enable_notify(vq))) { - vhost_disable_notify(vq); + if (unlikely(vhost_enable_notify(&n->dev, vq))) { + vhost_disable_notify(&n->dev, vq); continue; } break; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 7aa4eea930f..ea966b35635 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -37,6 +37,9 @@ enum { VHOST_MEMORY_F_LOG = 0x1, }; +#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) +#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -161,6 +164,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->last_avail_idx = 0; vq->avail_idx = 0; vq->last_used_idx = 0; + vq->signalled_used = 0; + vq->signalled_used_valid = false; vq->used_flags = 0; vq->log_used = false; vq->log_addr = -1ull; @@ -489,16 +494,17 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, return 1; } -static int vq_access_ok(unsigned int num, +static int vq_access_ok(struct vhost_dev *d, unsigned int num, struct vring_desc __user *desc, struct vring_avail __user *avail, struct vring_used __user *used) { + size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return access_ok(VERIFY_READ, desc, num * sizeof *desc) && access_ok(VERIFY_READ, avail, - sizeof *avail + num * sizeof *avail->ring) && + sizeof *avail + num * sizeof *avail->ring + s) && access_ok(VERIFY_WRITE, used, - sizeof *used + num * sizeof *used->ring); + sizeof *used + num * sizeof *used->ring + s); } /* Can we log writes? */ @@ -514,9 +520,11 @@ int vhost_log_access_ok(struct vhost_dev *dev) /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ -static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) +static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq, + void __user *log_base) { struct vhost_memory *mp; + size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; mp = rcu_dereference_protected(vq->dev->memory, lockdep_is_held(&vq->mutex)); @@ -524,15 +532,15 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + - vq->num * sizeof *vq->used->ring)); + vq->num * sizeof *vq->used->ring + s)); } /* Can we start vq? */ /* Caller should have vq mutex and device mutex */ int vhost_vq_access_ok(struct vhost_virtqueue *vq) { - return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) && - vq_log_access_ok(vq, vq->log_base); + return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) && + vq_log_access_ok(vq->dev, vq, vq->log_base); } static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) @@ -577,6 +585,7 @@ static int init_used(struct vhost_virtqueue *vq, if (r) return r; + vq->signalled_used_valid = false; return get_user(vq->last_used_idx, &used->idx); } @@ -674,7 +683,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) * If it is not, we don't as size might not have been setup. * We will verify when backend is configured. */ if (vq->private_data) { - if (!vq_access_ok(vq->num, + if (!vq_access_ok(d, vq->num, (void __user *)(unsigned long)a.desc_user_addr, (void __user *)(unsigned long)a.avail_user_addr, (void __user *)(unsigned long)a.used_user_addr)) { @@ -818,7 +827,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg) vq = d->vqs + i; mutex_lock(&vq->mutex); /* If ring is inactive, will check when it's enabled. */ - if (vq->private_data && !vq_log_access_ok(vq, base)) + if (vq->private_data && !vq_log_access_ok(d, vq, base)) r = -EFAULT; else vq->log_base = base; @@ -1219,6 +1228,10 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, /* On success, increment avail index. */ vq->last_avail_idx++; + + /* Assume notifications from guest are disabled at this point, + * if they aren't we would need to update avail_event index. */ + BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); return head; } @@ -1267,6 +1280,12 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) eventfd_signal(vq->log_ctx, 1); } vq->last_used_idx++; + /* If the driver never bothers to signal in a very long while, + * used index might wrap around. If that happens, invalidate + * signalled_used index we stored. TODO: make sure driver + * signals at least once in 2^16 and remove this. */ + if (unlikely(vq->last_used_idx == vq->signalled_used)) + vq->signalled_used_valid = false; return 0; } @@ -1275,6 +1294,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, unsigned count) { struct vring_used_elem __user *used; + u16 old, new; int start; start = vq->last_used_idx % vq->num; @@ -1292,7 +1312,14 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, ((void __user *)used - (void __user *)vq->used), count * sizeof *used); } - vq->last_used_idx += count; + old = vq->last_used_idx; + new = (vq->last_used_idx += count); + /* If the driver never bothers to signal in a very long while, + * used index might wrap around. If that happens, invalidate + * signalled_used index we stored. TODO: make sure driver + * signals at least once in 2^16 and remove this. */ + if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) + vq->signalled_used_valid = false; return 0; } @@ -1331,29 +1358,47 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, return r; } -/* This actually signals the guest, using eventfd. */ -void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) +static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { - __u16 flags; - + __u16 old, new, event; + bool v; /* Flush out used index updates. This is paired * with the barrier that the Guest executes when enabling * interrupts. */ smp_mb(); - if (__get_user(flags, &vq->avail->flags)) { - vq_err(vq, "Failed to get flags"); - return; + if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && + unlikely(vq->avail_idx == vq->last_avail_idx)) + return true; + + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + __u16 flags; + if (__get_user(flags, &vq->avail->flags)) { + vq_err(vq, "Failed to get flags"); + return true; + } + return !(flags & VRING_AVAIL_F_NO_INTERRUPT); } + old = vq->signalled_used; + v = vq->signalled_used_valid; + new = vq->signalled_used = vq->last_used_idx; + vq->signalled_used_valid = true; - /* If they don't want an interrupt, don't signal, unless empty. */ - if ((flags & VRING_AVAIL_F_NO_INTERRUPT) && - (vq->avail_idx != vq->last_avail_idx || - !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY))) - return; + if (unlikely(!v)) + return true; + if (get_user(event, vhost_used_event(vq))) { + vq_err(vq, "Failed to get used event idx"); + return true; + } + return vring_need_event(event, new, old); +} + +/* This actually signals the guest, using eventfd. */ +void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) +{ /* Signal the Guest tell them we used something up. */ - if (vq->call_ctx) + if (vq->call_ctx && vhost_notify(dev, vq)) eventfd_signal(vq->call_ctx, 1); } @@ -1376,7 +1421,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, } /* OK, now we need to know about added descriptors. */ -bool vhost_enable_notify(struct vhost_virtqueue *vq) +bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { u16 avail_idx; int r; @@ -1384,11 +1429,34 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) return false; vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; - r = put_user(vq->used_flags, &vq->used->flags); - if (r) { - vq_err(vq, "Failed to enable notification at %p: %d\n", - &vq->used->flags, r); - return false; + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + r = put_user(vq->used_flags, &vq->used->flags); + if (r) { + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); + return false; + } + } else { + r = put_user(vq->avail_idx, vhost_avail_event(vq)); + if (r) { + vq_err(vq, "Failed to update avail event index at %p: %d\n", + vhost_avail_event(vq), r); + return false; + } + } + if (unlikely(vq->log_used)) { + void __user *used; + /* Make sure data is seen before log. */ + smp_wmb(); + used = vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ? + &vq->used->flags : vhost_avail_event(vq); + /* Log used flags or event index entry write. Both are 16 bit + * fields. */ + log_write(vq->log_base, vq->log_addr + + (used - (void __user *)vq->used), + sizeof(u16)); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); } /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ @@ -1404,15 +1472,17 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) } /* We don't need to be notified again. */ -void vhost_disable_notify(struct vhost_virtqueue *vq) +void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (vq->used_flags & VRING_USED_F_NO_NOTIFY) return; vq->used_flags |= VRING_USED_F_NO_NOTIFY; - r = put_user(vq->used_flags, &vq->used->flags); - if (r) - vq_err(vq, "Failed to enable notification at %p: %d\n", - &vq->used->flags, r); + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + r = put_user(vq->used_flags, &vq->used->flags); + if (r) + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); + } } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index b3363ae3851..8e03379dd30 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -84,6 +84,12 @@ struct vhost_virtqueue { /* Used flags */ u16 used_flags; + /* Last used index value we have signalled on */ + u16 signalled_used; + + /* Last used index value we have signalled on */ + bool signalled_used_valid; + /* Log writes to used structure. */ bool log_used; u64 log_addr; @@ -149,8 +155,8 @@ void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *, void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); -void vhost_disable_notify(struct vhost_virtqueue *); -bool vhost_enable_notify(struct vhost_virtqueue *); +void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); +bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len); @@ -162,11 +168,12 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, } while (0) enum { - VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) | - (1 << VIRTIO_RING_F_INDIRECT_DESC) | - (1 << VHOST_F_LOG_ALL) | - (1 << VHOST_NET_F_VIRTIO_NET_HDR) | - (1 << VIRTIO_NET_F_MRG_RXBUF), + VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | + (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX) | + (1ULL << VHOST_F_LOG_ALL) | + (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | + (1ULL << VIRTIO_NET_F_MRG_RXBUF), }; static inline int vhost_has_feature(struct vhost_dev *dev, int bit) -- cgit v1.2.3-70-g09d2 From 4423fe40b03f32b11e72ecfa03077e702e55d5a9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:11:05 +0300 Subject: virtio_test: support event index Add ability to test the new event idx feature, enable by default. Signed-off-by: Rusty Russell --- tools/virtio/virtio_test.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index df0c6d2c386..74d3331bdaf 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -197,6 +197,14 @@ const struct option longopts[] = { .name = "help", .val = 'h', }, + { + .name = "event-idx", + .val = 'E', + }, + { + .name = "no-event-idx", + .val = 'e', + }, { .name = "indirect", .val = 'I', @@ -211,13 +219,17 @@ const struct option longopts[] = { static void help() { - fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n"); + fprintf(stderr, "Usage: virtio_test [--help]" + " [--no-indirect]" + " [--no-event-idx]" + "\n"); } int main(int argc, char **argv) { struct vdev_info dev; - unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC; + unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX); int o; for (;;) { @@ -228,6 +240,9 @@ int main(int argc, char **argv) case '?': help(); exit(2); + case 'e': + features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); + break; case 'h': help(); goto done; -- cgit v1.2.3-70-g09d2 From 7ab358c23cbf15cea08129cd722d1ce77433a94d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:11:14 +0300 Subject: virtio: add api for delayed callbacks Add an API that tells the other side that callbacks should be delayed until a lot of work has been done. Implement using the new event_idx feature. Note: it might seem advantageous to let the drivers ask for a callback after a specific capacity has been reached. However, as a single head can free many entries in the descriptor table, we don't really have a clue about capacity until get_buf is called. The API is the simplest to implement at the moment, we'll see what kind of hints drivers can pass when there's more than one user of the feature. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 27 +++++++++++++++++++++++++++ include/linux/virtio.h | 9 +++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a5fadc40c44..68b9136847a 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -376,6 +376,33 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_enable_cb); +bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 bufs; + + START_USE(vq); + + /* We optimistically turn back on interrupts, then check if there was + * more to do. */ + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ + vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + /* TODO: tune this threshold */ + bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; + vring_used_event(&vq->vring) = vq->last_used_idx + bufs; + virtio_mb(); + if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { + END_USE(vq); + return false; + } + + END_USE(vq); + return true; +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); + void *virtqueue_detach_unused_buf(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index aff5b4f7404..71088574960 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -51,6 +51,13 @@ struct virtqueue { * This re-enables callbacks; it returns "false" if there are pending * buffers in the queue, to detect a possible race between the driver * checking for more work, and enabling callbacks. + * virtqueue_enable_cb_delayed: restart callbacks after disable_cb. + * vq: the struct virtqueue we're talking about. + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. * virtqueue_detach_unused_buf: detach first unused buffer * vq: the struct virtqueue we're talking about. * Returns NULL or the "data" token handed to add_buf @@ -86,6 +93,8 @@ void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); +bool virtqueue_enable_cb_delayed(struct virtqueue *vq); + void *virtqueue_detach_unused_buf(struct virtqueue *vq); /** -- cgit v1.2.3-70-g09d2 From 7a66f784375c5922315bbe879b789ee50b924d26 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:11:23 +0300 Subject: virtio_net: delay TX callbacks Ask for delayed callbacks on TX ring full, to give the other side more of a chance to make progress. Signed-off-by: Michael S. Tsirkin Acked-by: David S. Miller Signed-off-by: Rusty Russell --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0cb0b063267..f6853247a62 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -609,7 +609,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) * before it gets out of hand. Naturally, this wastes entries. */ if (capacity < 2+MAX_SKB_FRAGS) { netif_stop_queue(dev); - if (unlikely(!virtqueue_enable_cb(vi->svq))) { + if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) { /* More just got used, free them then recheck. */ capacity += free_old_xmit_skbs(vi); if (capacity >= 2+MAX_SKB_FRAGS) { -- cgit v1.2.3-70-g09d2 From a1706ac4c0201ea0143dc0db0659001b26ceeabb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 May 2011 07:42:51 +0200 Subject: Revert "block: Remove extra discard_alignment from hd_struct." It was not a good idea to start dereferencing disk->queue from the fs sysfs strategy for displaying discard alignment. We ran into first a NULL pointer deref, and after fixing that we sometimes see unvalid disk->queue pointer values. Since discard is the only one of the bunch actually looking into the queue, just revert the change. This reverts commit 23ceb5b7719e9276d4fa72a3ecf94dd396755276. Conflicts: fs/partitions/check.c --- fs/partitions/check.c | 10 +++------- include/linux/genhd.h | 1 + 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index f82e762eeca..d545e97d99c 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -255,13 +255,7 @@ ssize_t part_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - struct gendisk *disk = dev_to_disk(dev); - unsigned int alignment = 0; - - if (disk->queue) - alignment = queue_limit_discard_alignment(&disk->queue->limits, - p->start_sect); - return sprintf(buf, "%u\n", alignment); + return sprintf(buf, "%u\n", p->discard_alignment); } ssize_t part_stat_show(struct device *dev, @@ -455,6 +449,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, p->start_sect = start; p->alignment_offset = queue_limit_alignment_offset(&disk->queue->limits, start); + p->discard_alignment = + queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno; p->policy = get_disk_ro(disk); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b78956b3c2e..300d7582006 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -100,6 +100,7 @@ struct hd_struct { sector_t start_sect; sector_t nr_sects; sector_t alignment_offset; + unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; int policy, partno; -- cgit v1.2.3-70-g09d2 From 3cebde2413ba42504cf2c10ec1d47582912435cd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 29 May 2011 21:20:59 -0700 Subject: vfs: shrink_dcache_parent before rmdir, dir rename The dentry_unhash push-down series missed that shink_dcache_parent needs to be called prior to rmdir or dir rename to clear DCACHE_REFERENCED and allow efficient dentry reclaim. Reported-by: Dave Chinner Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/namei.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index 1ab641f2e78..e2e4e8d032e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2579,6 +2579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (error) goto out; + shrink_dcache_parent(dentry); error = dir->i_op->rmdir(dir, dentry); if (error) goto out; @@ -2993,6 +2994,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) goto out; + if (target) + shrink_dcache_parent(new_dentry); error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (error) goto out; -- cgit v1.2.3-70-g09d2 From c7427d23f7ed695ac226dbe3a84d7f19091d34ce Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 May 2011 01:50:53 -0400 Subject: autofs4: bogus dentry_unhash() added in ->unlink() Signed-off-by: Al Viro --- fs/autofs4/root.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 87d95a8cddb..f55ae23b137 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -583,8 +583,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) return -EACCES; - dentry_unhash(dentry); - if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); if (p_ino && dentry->d_parent != dentry) -- cgit v1.2.3-70-g09d2 From 598e887d8b01655780c81cc86a9e7820ed091580 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 30 May 2011 11:38:06 +0200 Subject: x86 idle: Fix mwait deprecation warning message Fix: arch/x86/kernel/process.c:645:1: warning: unknown escape sequence '\i' due to missing escape backslash, introduced by this commit: 5d4c47e0195b: x86 idle: deprecate mwait_idle() and "idle=mwait" cmdline param Signed-off-by: Borislav Petkov Cc: Len Brown Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1306748286-24701-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 426a5b66f7e..3d83a71af7d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -642,7 +642,7 @@ static int __init idle_setup(char *str) boot_option_idle_override = IDLE_POLL; } else if (!strcmp(str, "mwait")) { boot_option_idle_override = IDLE_FORCE_MWAIT; - WARN_ONCE(1, "\idle=mwait\" will be removed in 2012\"\n"); + WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\"\n"); } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is -- cgit v1.2.3-70-g09d2 From 4f3c125c7420c85eaff627145557e392a871922d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 30 May 2011 08:23:57 -0400 Subject: x86: Fix mwait_play_dead() faulting on mwait-incapable cpus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A logic error in mwait_play_dead() causes the kernel to use mwait even on cpus which don't support it, such as KVM virtual cpus. Introduced by: 349c004e3d31: x86: A fast way to check capabilities of the current cpu Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=36222 Reported-by: Török Edwin Signed-off-by: Avi Kivity Cc: Christoph Lameter Cc: Tejun Heo Link: http://lkml.kernel.org/r/1306758237-9327-1-git-send-email-avi@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index eefd96765e7..33a0c11797d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1332,7 +1332,7 @@ static inline void mwait_play_dead(void) void *mwait_ptr; struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); - if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)) + if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) return; if (!this_cpu_has(X86_FEATURE_CLFLSH)) return; -- cgit v1.2.3-70-g09d2 From 194cd8dfc9e4f29249b3bd45c5cb5c0a33a6438c Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Tue, 31 May 2011 13:27:41 +0900 Subject: sh: asm/tlb.h needs linux/swap.h Commit 1e56a56410bb64bce62d44563e35a143fc2d515f introduced the mmu_gather rework for sh, but missed a linux/swap.h include: CC arch/sh/mm/tlb-urb.o In file included from arch/sh/mm/tlb-urb.c:14:0: arch/sh/include/asm/tlb.h: In function '__tlb_remove_page': arch/sh/include/asm/tlb.h:92:2: error: implicit declaration of function 'free_page_and_swap_cache' Signed-off-by: Nobuhiro Iwamatsu CC: Peter Zijlstra Signed-off-by: Paul Mundt --- arch/sh/include/asm/tlb.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 6c308d8b9a5..ec88bfcdf7c 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -9,6 +9,7 @@ #include #ifdef CONFIG_MMU +#include #include #include #include -- cgit v1.2.3-70-g09d2 From 65d517eb7224d24ee4206416161390f30d69e622 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 14:37:44 +0900 Subject: sh64: asm/pgtable.h needs asm/mmu.h Needed to satisfy the __in_29bit_mode() check. Signed-off-by: Paul Mundt --- arch/sh/include/asm/pgtable.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index db85916b9e9..9210e93a92c 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -18,6 +18,7 @@ #include #endif #include +#include #ifndef __ASSEMBLY__ #include -- cgit v1.2.3-70-g09d2 From 3f9b8520b06013939ad247ba08b69529b5f14be1 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 14:38:29 +0900 Subject: sh64: Move from P1SEG to CAC_ADDR for consistent sync. sh64 doesn't define a P1SEGADDR, resulting in a build failure. The proper mapping can be attained for both sh32 and 64 via the CAC_ADDR macro, so switch to that instead. Signed-off-by: Paul Mundt --- arch/sh/mm/consistent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 40733a95240..f251b5f2765 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -82,7 +82,7 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size, void *addr; addr = __in_29bit_mode() ? - (void *)P1SEGADDR((unsigned long)vaddr) : vaddr; + (void *)CAC_ADDR((unsigned long)vaddr) : vaddr; switch (direction) { case DMA_FROM_DEVICE: /* invalidate only */ -- cgit v1.2.3-70-g09d2 From db7eba292e913390fa881272bfbc3da0a5380513 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 14:39:49 +0900 Subject: sh: Fix up asm-generic/ptrace.h fallout. There was an ordering issue with regards to instruction_pointer() being used in profile_pc() prior to the asm-generic/ptrace.h include, which subsequently provided the instruction_pointer() definition. In the interest of simplicity we simply open-code the regs->pc deref for the profile_pc() definition instead. The FP functions were also broken due to a lack of a common regs->fp, so provide a common GET_FP() that is safe for both architectures in order to fix up the frame pointer helpers too. Signed-off-by: Paul Mundt --- arch/sh/include/asm/ptrace.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h index 40725b4a801..88bd6be168a 100644 --- a/arch/sh/include/asm/ptrace.h +++ b/arch/sh/include/asm/ptrace.h @@ -41,7 +41,9 @@ #define user_mode(regs) (((regs)->sr & 0x40000000)==0) #define kernel_stack_pointer(_regs) ((unsigned long)(_regs)->regs[15]) -#define GET_USP(regs) ((regs)->regs[15]) + +#define GET_FP(regs) ((regs)->regs[14]) +#define GET_USP(regs) ((regs)->regs[15]) extern void show_regs(struct pt_regs *); @@ -131,7 +133,7 @@ extern void ptrace_triggered(struct perf_event *bp, int nmi, static inline unsigned long profile_pc(struct pt_regs *regs) { - unsigned long pc = instruction_pointer(regs); + unsigned long pc = regs->pc; if (virt_addr_uncached(pc)) return CAC_ADDR(pc); -- cgit v1.2.3-70-g09d2 From d4905ce38c73964b868037e49a5945e1cf47a7f2 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 15:23:20 +0900 Subject: Revert "clocksource: sh_tmu: Runtime PM support" This reverts commit 1b842e91fea9447eff5eb687e28ad61c02f5033e. There is a fundamental ordering race between the early and late probe paths and the runtime PM tie-in that results in __pm_runtime_resume() attempting to take a lock that hasn't been initialized yet (which by proxy also suggests that pm_runtime_init() hasn't yet been run on the device either, making the entire thing unsafe) -- resulting in instant death on SMP or on UP with spinlock debugging enabled: sh_tmu.0: used for clock events sh_tmu.0: used for periodic clock events BUG: spinlock trylock failure on UP on CPU#0, swapper/0 lock: 804db198, .magic: 00000000, .owner: /-1, .owner_cpu: 0 ... Revert it for now until the ordering issues can be resolved, or we can get some more help from the runtime PM framework to make this possible. Signed-off-by: Paul Mundt --- drivers/clocksource/sh_tmu.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 17296288a20..80813576861 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -110,12 +109,10 @@ static int sh_tmu_enable(struct sh_tmu_priv *p) { int ret; - /* wake up device and enable clock */ - pm_runtime_get_sync(&p->pdev->dev); + /* enable clock */ ret = clk_enable(p->clk); if (ret) { dev_err(&p->pdev->dev, "cannot enable clock\n"); - pm_runtime_put_sync(&p->pdev->dev); return ret; } @@ -144,9 +141,8 @@ static void sh_tmu_disable(struct sh_tmu_priv *p) /* disable interrupts in TMU block */ sh_tmu_write(p, TCR, 0x0000); - /* stop clock and mark device as idle */ + /* stop clock */ clk_disable(p->clk); - pm_runtime_put_sync(&p->pdev->dev); } static void sh_tmu_set_next(struct sh_tmu_priv *p, unsigned long delta, @@ -415,7 +411,6 @@ static int __devinit sh_tmu_probe(struct platform_device *pdev) if (p) { dev_info(&pdev->dev, "kept as earlytimer\n"); - pm_runtime_enable(&pdev->dev); return 0; } @@ -430,9 +425,6 @@ static int __devinit sh_tmu_probe(struct platform_device *pdev) kfree(p); platform_set_drvdata(pdev, NULL); } - - if (!is_early_platform_device(pdev)) - pm_runtime_enable(&pdev->dev); return ret; } -- cgit v1.2.3-70-g09d2 From 9436b4abec28a22edd961ae375535d940625f1f2 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 15:26:42 +0900 Subject: Revert "clocksource: sh_cmt: Runtime PM support" This reverts commit 01fa68b58492a5d6708a91c1f474b6a099a9509e. The same note as per the sh_tmu change applies here, too. Signed-off-by: Paul Mundt --- drivers/clocksource/sh_cmt.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 036e5865eb4..dc7c033ef58 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -153,12 +152,10 @@ static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate) { int ret; - /* wake up device and enable clock */ - pm_runtime_get_sync(&p->pdev->dev); + /* enable clock */ ret = clk_enable(p->clk); if (ret) { dev_err(&p->pdev->dev, "cannot enable clock\n"); - pm_runtime_put_sync(&p->pdev->dev); return ret; } @@ -190,9 +187,8 @@ static void sh_cmt_disable(struct sh_cmt_priv *p) /* disable interrupts in CMT block */ sh_cmt_write(p, CMCSR, 0); - /* stop clock and mark device as idle */ + /* stop clock */ clk_disable(p->clk); - pm_runtime_put_sync(&p->pdev->dev); } /* private flags */ @@ -664,7 +660,6 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev) if (p) { dev_info(&pdev->dev, "kept as earlytimer\n"); - pm_runtime_enable(&pdev->dev); return 0; } @@ -679,9 +674,6 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev) kfree(p); platform_set_drvdata(pdev, NULL); } - - if (!is_early_platform_device(pdev)) - pm_runtime_enable(&pdev->dev); return ret; } -- cgit v1.2.3-70-g09d2 From 5c2de44417523385010b529599a2b30f290831a3 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 15:53:03 +0900 Subject: dmaengine: shdma: Fix up fallout from runtime PM changes. The runtime PM changes introduce sh_dmae_rst() wrapping via the runtime_resume helper, depending on dev_get_drvdata() to fetch the platform data needed for the DMAOR initialization default at a time where drvdata hasn't yet been established by the probe path, resulting in general probe misery: Unable to handle kernel NULL pointer dereference at virtual address 000000c4 pc = 8025adee *pde = 00000000 Oops: 0000 [#1] Modules linked in: Pid : 1, Comm: swapper CPU : 0 Not tainted (3.0.0-rc1-00012-g9436b4a-dirty #1456) PC is at sh_dmae_rst+0x28/0x86 PR is at sh_dmae_rst+0x22/0x86 PC : 8025adee SP : 9e803d10 SR : 400080f1 TEA : 000000c4 R0 : 000000c4 R1 : 0000fff8 R2 : 00000000 R3 : 00000040 R4 : 000000f0 R5 : 00000000 R6 : 00000000 R7 : 804f184c R8 : 00000000 R9 : 804dd0e8 R10 : 80283204 R11 : ffffffda R12 : 000000a0 R13 : 804dd18c R14 : 9e803d10 MACH: 00000000 MACL: 00008f20 GBR : 00000000 PR : 8025ade8 Call trace: [<8025ae70>] sh_dmae_runtime_resume+0x24/0x34 [<80283238>] pm_generic_runtime_resume+0x34/0x3c [<80283370>] rpm_callback+0x4a/0x7e [<80283efc>] rpm_resume+0x240/0x384 [<80283f54>] rpm_resume+0x298/0x384 [<8028428c>] __pm_runtime_resume+0x44/0x7c [<8038a358>] __ioremap_caller+0x0/0xec [<80284296>] __pm_runtime_resume+0x4e/0x7c [<8038a358>] __ioremap_caller+0x0/0xec [<80666254>] sh_dmae_probe+0x180/0x6a0 [<802803ae>] platform_drv_probe+0x26/0x2e Fix up the ordering accordingly. Signed-off-by: Paul Mundt --- drivers/dma/shdma.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 636e40925b1..727e76ff13e 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -1144,6 +1144,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev) /* platform data */ shdev->pdata = pdata; + platform_set_drvdata(pdev, shdev); + pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -1256,7 +1258,6 @@ static int __init sh_dmae_probe(struct platform_device *pdev) pm_runtime_put(&pdev->dev); - platform_set_drvdata(pdev, shdev); dma_async_device_register(&shdev->common); return err; @@ -1278,6 +1279,8 @@ rst_err: if (dmars) iounmap(shdev->dmars); + + platform_set_drvdata(pdev, NULL); emapdmars: iounmap(shdev->chan_reg); synchronize_rcu(); @@ -1316,6 +1319,8 @@ static int __exit sh_dmae_remove(struct platform_device *pdev) iounmap(shdev->dmars); iounmap(shdev->chan_reg); + platform_set_drvdata(pdev, NULL); + synchronize_rcu(); kfree(shdev); -- cgit v1.2.3-70-g09d2 From d72bce0e67e8afc6eb959f656013cbb577426f1e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 May 2011 13:34:51 +0200 Subject: rcu: Cure load woes Commit cc3ce5176d83 (rcu: Start RCU kthreads in TASK_INTERRUPTIBLE state) fudges a sleeping task' state, resulting in the scheduler seeing a TASK_UNINTERRUPTIBLE task going to sleep, but a TASK_INTERRUPTIBLE task waking up. The result is unbalanced load calculation. The problem that patch tried to address is that the RCU threads could stay in UNINTERRUPTIBLE state for quite a while and triggering the hung task detector due to on-demand wake-ups. Cure the problem differently by always giving the tasks at least one wake-up once the CPU is fully up and running, this will kick them out of the initial UNINTERRUPTIBLE state and into the regular INTERRUPTIBLE wait state. [ The alternative would be teaching kthread_create() to start threads as INTERRUPTIBLE but that needs a tad more thought. ] Reported-by: Damien Wyart Signed-off-by: Peter Zijlstra Acked-by: Paul E. McKenney Link: http://lkml.kernel.org/r/1306755291.1200.2872.camel@twins Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 54 +++++++++++++++++++++++++++++++++++++++++-------- kernel/rcutree_plugin.h | 11 +++++++++- 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 77a7671dd14..89419ff92e9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1648,7 +1648,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) if (IS_ERR(t)) return PTR_ERR(t); kthread_bind(t, cpu); - set_task_state(t, TASK_INTERRUPTIBLE); per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); per_cpu(rcu_cpu_kthread_task, cpu) = t; @@ -1756,7 +1755,6 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, if (IS_ERR(t)) return PTR_ERR(t); raw_spin_lock_irqsave(&rnp->lock, flags); - set_task_state(t, TASK_INTERRUPTIBLE); rnp->node_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = 99; @@ -1765,6 +1763,8 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); } +static void rcu_wake_one_boost_kthread(struct rcu_node *rnp); + /* * Spawn all kthreads -- called as soon as the scheduler is running. */ @@ -1772,18 +1772,30 @@ static int __init rcu_spawn_kthreads(void) { int cpu; struct rcu_node *rnp; + struct task_struct *t; rcu_kthreads_spawnable = 1; for_each_possible_cpu(cpu) { per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) + if (cpu_online(cpu)) { (void)rcu_spawn_one_cpu_kthread(cpu); + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t) + wake_up_process(t); + } } rnp = rcu_get_root(rcu_state); (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + if (rnp->node_kthread_task) + wake_up_process(rnp->node_kthread_task); if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state, rnp) + rcu_for_each_leaf_node(rcu_state, rnp) { (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + t = rnp->node_kthread_task; + if (t) + wake_up_process(t); + rcu_wake_one_boost_kthread(rnp); + } } return 0; } @@ -2188,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } -static void __cpuinit rcu_online_cpu(int cpu) +static void __cpuinit rcu_prepare_cpu(int cpu) { rcu_init_percpu_data(cpu, &rcu_sched_state, 0); rcu_init_percpu_data(cpu, &rcu_bh_state, 0); rcu_preempt_init_percpu_data(cpu); } -static void __cpuinit rcu_online_kthreads(int cpu) +static void __cpuinit rcu_prepare_kthreads(int cpu) { struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; @@ -2208,6 +2220,31 @@ static void __cpuinit rcu_online_kthreads(int cpu) } } +/* + * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state, + * but the RCU threads are woken on demand, and if demand is low this + * could be a while triggering the hung task watchdog. + * + * In order to avoid this, poke all tasks once the CPU is fully + * up and running. + */ +static void __cpuinit rcu_online_kthreads(int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); + struct rcu_node *rnp = rdp->mynode; + struct task_struct *t; + + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t) + wake_up_process(t); + + t = rnp->node_kthread_task; + if (t) + wake_up_process(t); + + rcu_wake_one_boost_kthread(rnp); +} + /* * Handle CPU online/offline notification events. */ @@ -2221,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - rcu_online_cpu(cpu); - rcu_online_kthreads(cpu); + rcu_prepare_cpu(cpu); + rcu_prepare_kthreads(cpu); break; case CPU_ONLINE: + rcu_online_kthreads(cpu); case CPU_DOWN_FAILED: rcu_node_kthread_setaffinity(rnp, -1); rcu_cpu_kthread_setrt(cpu, 1); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a767b7dac36..c8bff3099a8 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1295,7 +1295,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, if (IS_ERR(t)) return PTR_ERR(t); raw_spin_lock_irqsave(&rnp->lock, flags); - set_task_state(t, TASK_INTERRUPTIBLE); rnp->boost_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = RCU_KTHREAD_PRIO; @@ -1303,6 +1302,12 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } +static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) +{ + if (rnp->boost_kthread_task) + wake_up_process(rnp->boost_kthread_task); +} + #else /* #ifdef CONFIG_RCU_BOOST */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) @@ -1326,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } +static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) +{ +} + #endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifndef CONFIG_SMP -- cgit v1.2.3-70-g09d2 From 339dedf709e21d5718d6596750166f70e8bed40a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 31 May 2011 18:01:23 +1000 Subject: powerpc/pmac: Don't register pmac PIC syscore ops when HW not present The Apple custom PIC only exist in some earlier machine models, anything with an MPIC will crash on suspend if we register those syscore ops unconditionally. This is a regression caused by commit f5a592f7d74e ("PM / PowerPC: Use struct syscore_ops instead of sysdevs for PM") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/powermac/pic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 9089b042119..7667db448aa 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -715,7 +715,8 @@ static struct syscore_ops pmacpic_syscore_ops = { static int __init init_pmacpic_syscore(void) { - register_syscore_ops(&pmacpic_syscore_ops); + if (pmac_irq_hw[0]) + register_syscore_ops(&pmacpic_syscore_ops); return 0; } -- cgit v1.2.3-70-g09d2 From ea9d6553b3b3044e7374774cc33bb1b2eee19dd3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 31 May 2011 13:45:53 +0200 Subject: block: remove unwanted semicolons Since those defined functions require additional semicolon from the caller, they could cause potential syntax errors when used in if-else statements. Signed-off-by: Namhyung Kim Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ae9091a6848..1a23722e887 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1282,8 +1282,8 @@ queue_max_integrity_segments(struct request_queue *q) #define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) -#define blk_integrity_unregister(a) do { } while (0); -#define blk_queue_max_integrity_segments(a, b) do { } while (0); +#define blk_integrity_unregister(a) do { } while (0) +#define blk_queue_max_integrity_segments(a, b) do { } while (0) #define queue_max_integrity_segments(a) (0) #define blk_integrity_merge_rq(a, b, c) (0) #define blk_integrity_merge_bio(a, b, c) (0) -- cgit v1.2.3-70-g09d2 From 83caba8436b28bd9081013fd840424983127d4ca Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 31 May 2011 10:09:24 -0700 Subject: [IA64] wire up sendmmsg() syscall for Itanium Add entries in unistd.h and entry.S to make this new syscall visible. Signed-off-by: Tony Luck --- arch/ia64/include/asm/unistd.h | 3 ++- arch/ia64/kernel/entry.S | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 1cf0f496f74..7c928da35b1 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -320,11 +320,12 @@ #define __NR_clock_adjtime 1328 #define __NR_syncfs 1329 #define __NR_setns 1330 +#define __NR_sendmmsg 1331 #ifdef __KERNEL__ -#define NR_syscalls 307 /* length of syscall table */ +#define NR_syscalls 308 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 9ca80193cd4..97dd2abdeb1 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1776,6 +1776,7 @@ sys_call_table: data8 sys_clock_adjtime data8 sys_syncfs data8 sys_setns // 1330 + data8 sys_sendmmsg .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ -- cgit v1.2.3-70-g09d2 From 4495a7d41dbda03841c2a1c2a5ce7135a45131ba Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Tue, 31 May 2011 10:04:09 +0200 Subject: CFQ: Fix typo and remove unnecessary semicolon Fix comment typo and remove unnecessary semicolon at macro Signed-off-by: Kyungmin Park Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 7c52d688892..8a02c955c61 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -185,7 +185,7 @@ struct cfq_group { int nr_cfqq; /* - * Per group busy queus average. Useful for workload slice calc. We + * Per group busy queues average. Useful for workload slice calc. We * create the array for each prio class but at run time it is used * only for RT and BE class and slot for IDLE class remains unused. * This is primarily done to avoid confusion and a gcc warning. @@ -369,16 +369,16 @@ CFQ_CFQQ_FNS(wait_busy); #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ - blkg_path(&(cfqq)->cfqg->blkg), ##args); + blkg_path(&(cfqq)->cfqg->blkg), ##args) #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \ blk_add_trace_msg((cfqd)->queue, "%s " fmt, \ - blkg_path(&(cfqg)->blkg), ##args); \ + blkg_path(&(cfqg)->blkg), ##args) \ #else #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) -#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0); +#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) #endif #define cfq_log(cfqd, fmt, args...) \ blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) -- cgit v1.2.3-70-g09d2 From 71005be40a7fc95edda3cc462361ce0243e4f5fa Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 26 May 2011 21:31:08 +0100 Subject: libertas: Set command sequence number later to ensure consistency Before this patch, the command sequence number is being set before lbs_queue_cmd() adds the command to the queue. However, lbs_queue_cmd() sometimes forces commands to queue-jump (e.g. CMD_802_11_WAKEUP_CONFIRM). It currently does this without considering that sequence numbers might need adjusting to keep things running in order. Fix this by setting the sequence number at a later stage, just before we're actually submitting the command to the hardware. Also fixes a possible race where seqnum was being modified outside of the driver lock. Signed-off-by: Daniel Drake Acked-by: Dan Williams Signed-off-by: John W. Linville --- drivers/net/wireless/libertas/cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c index 84566db486d..71c8f3fccfa 100644 --- a/drivers/net/wireless/libertas/cmd.c +++ b/drivers/net/wireless/libertas/cmd.c @@ -994,6 +994,8 @@ static void lbs_submit_command(struct lbs_private *priv, cmd = cmdnode->cmdbuf; spin_lock_irqsave(&priv->driver_lock, flags); + priv->seqnum++; + cmd->seqnum = cpu_to_le16(priv->seqnum); priv->cur_cmd = cmdnode; spin_unlock_irqrestore(&priv->driver_lock, flags); @@ -1621,11 +1623,9 @@ struct cmd_ctrl_node *__lbs_cmd_async(struct lbs_private *priv, /* Copy the incoming command to the buffer */ memcpy(cmdnode->cmdbuf, in_cmd, in_cmd_size); - /* Set sequence number, clean result, move to buffer */ - priv->seqnum++; + /* Set command, clean result, move to buffer */ cmdnode->cmdbuf->command = cpu_to_le16(command); cmdnode->cmdbuf->size = cpu_to_le16(in_cmd_size); - cmdnode->cmdbuf->seqnum = cpu_to_le16(priv->seqnum); cmdnode->cmdbuf->result = 0; lbs_deb_host("PREP_CMD: command 0x%04x\n", command); -- cgit v1.2.3-70-g09d2 From dd08682150e1815fe5cdd0673a2f2e9cd2d55a7a Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 27 May 2011 15:34:45 +0300 Subject: wl12xx: fix passive and radar channel generation for scheduled scan We were comparing bitwise AND results with a boolean, so when the boolean was set to true, it was not matching as it should. Fix this by booleanizing the bitwise AND results with !!. Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/scan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/wl12xx/scan.c b/drivers/net/wireless/wl12xx/scan.c index f37e5a39197..f223e0ed0c4 100644 --- a/drivers/net/wireless/wl12xx/scan.c +++ b/drivers/net/wireless/wl12xx/scan.c @@ -338,8 +338,8 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, flags = req->channels[i]->flags; if (!(flags & IEEE80211_CHAN_DISABLED) && - ((flags & IEEE80211_CHAN_PASSIVE_SCAN) == passive) && - ((flags & IEEE80211_CHAN_RADAR) == radar) && + (!!(flags & IEEE80211_CHAN_PASSIVE_SCAN) == passive) && + (!!(flags & IEEE80211_CHAN_RADAR) == radar) && (req->channels[i]->band == band)) { wl1271_debug(DEBUG_SCAN, "band %d, center_freq %d ", req->channels[i]->band, -- cgit v1.2.3-70-g09d2 From 2497a246e880d1fb537f754f551177c01fa39242 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 27 May 2011 15:34:46 +0300 Subject: wl12xx: fix DFS channels handling in scheduled scan DFS channels were never getting included in the scheduled scans, because they always contain the passive flag as well and the call was asking for DFS and active channels. Fix this by ignoring the passive flag when collecting DFS channels. Also, move the DFS channels in the channel list before the 5GHz active channels (this was implemented in the FW differently than specified). Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/scan.c | 31 ++++++++++++++++++++----------- drivers/net/wireless/wl12xx/scan.h | 3 +++ 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/wl12xx/scan.c b/drivers/net/wireless/wl12xx/scan.c index f223e0ed0c4..8ccbb911776 100644 --- a/drivers/net/wireless/wl12xx/scan.c +++ b/drivers/net/wireless/wl12xx/scan.c @@ -337,10 +337,12 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, i++) { flags = req->channels[i]->flags; - if (!(flags & IEEE80211_CHAN_DISABLED) && - (!!(flags & IEEE80211_CHAN_PASSIVE_SCAN) == passive) && + if ((req->channels[i]->band == band) && + !(flags & IEEE80211_CHAN_DISABLED) && (!!(flags & IEEE80211_CHAN_RADAR) == radar) && - (req->channels[i]->band == band)) { + /* if radar is set, we ignore the passive flag */ + (radar || + !!(flags & IEEE80211_CHAN_PASSIVE_SCAN) == passive)) { wl1271_debug(DEBUG_SCAN, "band %d, center_freq %d ", req->channels[i]->band, req->channels[i]->center_freq); @@ -350,6 +352,8 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, wl1271_debug(DEBUG_SCAN, "max_power %d", req->channels[i]->max_power); + if (flags & IEEE80211_CHAN_RADAR) + channels[j].flags |= SCAN_CHANNEL_FLAGS_DFS; if (flags & IEEE80211_CHAN_PASSIVE_SCAN) { channels[j].passive_duration = cpu_to_le16(c->dwell_time_passive); @@ -359,7 +363,7 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, channels[j].max_duration = cpu_to_le16(c->max_dwell_time_active); } - channels[j].tx_power_att = req->channels[j]->max_power; + channels[j].tx_power_att = req->channels[i]->max_power; channels[j].channel = req->channels[i]->hw_value; j++; @@ -386,7 +390,11 @@ wl1271_scan_sched_scan_channels(struct wl1271 *wl, wl1271_scan_get_sched_scan_channels(wl, req, cfg->channels, IEEE80211_BAND_2GHZ, false, false, idx); - idx += cfg->active[0]; + /* + * 5GHz channels always start at position 14, not immediately + * after the last 2.4GHz channel + */ + idx = 14; cfg->passive[1] = wl1271_scan_get_sched_scan_channels(wl, req, cfg->channels, @@ -394,22 +402,23 @@ wl1271_scan_sched_scan_channels(struct wl1271 *wl, false, true, idx); idx += cfg->passive[1]; - cfg->active[1] = + cfg->dfs = wl1271_scan_get_sched_scan_channels(wl, req, cfg->channels, IEEE80211_BAND_5GHZ, - false, false, 14); - idx += cfg->active[1]; + true, true, idx); + idx += cfg->dfs; - cfg->dfs = + cfg->active[1] = wl1271_scan_get_sched_scan_channels(wl, req, cfg->channels, IEEE80211_BAND_5GHZ, - true, false, idx); - idx += cfg->dfs; + false, false, idx); + idx += cfg->active[1]; wl1271_debug(DEBUG_SCAN, " 2.4GHz: active %d passive %d", cfg->active[0], cfg->passive[0]); wl1271_debug(DEBUG_SCAN, " 5GHz: active %d passive %d", cfg->active[1], cfg->passive[1]); + wl1271_debug(DEBUG_SCAN, " DFS: %d", cfg->dfs); return idx; } diff --git a/drivers/net/wireless/wl12xx/scan.h b/drivers/net/wireless/wl12xx/scan.h index c83319579ca..a0b6c5d67b0 100644 --- a/drivers/net/wireless/wl12xx/scan.h +++ b/drivers/net/wireless/wl12xx/scan.h @@ -137,6 +137,9 @@ enum { SCAN_BSS_TYPE_ANY, }; +#define SCAN_CHANNEL_FLAGS_DFS BIT(0) +#define SCAN_CHANNEL_FLAGS_DFS_ENABLED BIT(1) + struct conn_scan_ch_params { __le16 min_duration; __le16 max_duration; -- cgit v1.2.3-70-g09d2 From 50a66d7f04adbfab9db55144c58dc693358cb635 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 27 May 2011 15:34:47 +0300 Subject: wl12xx: add separate config value for DFS dwell time on sched scan Use a different value for DFS dwell time when performing a scheduled scan. Previously we were using the same value as for normal passive scans. This adds some flexibility between these two different types of passive scan. For now we use 150 TUs for DFS channel dwell time. This may need to be fine-tuned in the future. Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/conf.h | 3 +++ drivers/net/wireless/wl12xx/main.c | 1 + drivers/net/wireless/wl12xx/scan.c | 7 +++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/wl12xx/conf.h b/drivers/net/wireless/wl12xx/conf.h index 1ab6c86aac4..c83fefb6662 100644 --- a/drivers/net/wireless/wl12xx/conf.h +++ b/drivers/net/wireless/wl12xx/conf.h @@ -1157,6 +1157,9 @@ struct conf_sched_scan_settings { /* time to wait on the channel for passive scans (in TUs) */ u32 dwell_time_passive; + /* time to wait on the channel for DFS scans (in TUs) */ + u32 dwell_time_dfs; + /* number of probe requests to send on each channel in active scans */ u8 num_probe_reqs; diff --git a/drivers/net/wireless/wl12xx/main.c b/drivers/net/wireless/wl12xx/main.c index bc00e52f644..e6497dc669d 100644 --- a/drivers/net/wireless/wl12xx/main.c +++ b/drivers/net/wireless/wl12xx/main.c @@ -311,6 +311,7 @@ static struct conf_drv_settings default_conf = { .min_dwell_time_active = 8, .max_dwell_time_active = 30, .dwell_time_passive = 100, + .dwell_time_dfs = 150, .num_probe_reqs = 2, .rssi_threshold = -90, .snr_threshold = 0, diff --git a/drivers/net/wireless/wl12xx/scan.c b/drivers/net/wireless/wl12xx/scan.c index 8ccbb911776..28ec0addc65 100644 --- a/drivers/net/wireless/wl12xx/scan.c +++ b/drivers/net/wireless/wl12xx/scan.c @@ -352,9 +352,12 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, wl1271_debug(DEBUG_SCAN, "max_power %d", req->channels[i]->max_power); - if (flags & IEEE80211_CHAN_RADAR) + if (flags & IEEE80211_CHAN_RADAR) { channels[j].flags |= SCAN_CHANNEL_FLAGS_DFS; - if (flags & IEEE80211_CHAN_PASSIVE_SCAN) { + channels[j].passive_duration = + cpu_to_le16(c->dwell_time_dfs); + } + else if (flags & IEEE80211_CHAN_PASSIVE_SCAN) { channels[j].passive_duration = cpu_to_le16(c->dwell_time_passive); } else { -- cgit v1.2.3-70-g09d2 From 66870b1ccd5c1460e437c18b0026e2dcaab1ece9 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 27 May 2011 15:34:48 +0300 Subject: wl12xx: fix oops in sched_scan when forcing a passive scan Fix kernel oops when trying to use passive scheduled scans. The reason was that in passive scans there are no SSIDs, so there was a NULL pointer dereference. To solve the problem, we now check the number of SSIDs provided in the sched_scan request and only access the list if there's one or more (ie. passive scan is not forced). We also force all the channels to be passive by adding the IEEE80211_CHAN_PASSIVE_SCAN flag locally before the checks in the wl1271_scan_get_sched_scan_channels() function. Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/scan.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/wl12xx/scan.c b/drivers/net/wireless/wl12xx/scan.c index 28ec0addc65..56f76abc754 100644 --- a/drivers/net/wireless/wl12xx/scan.c +++ b/drivers/net/wireless/wl12xx/scan.c @@ -331,12 +331,16 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, struct conf_sched_scan_settings *c = &wl->conf.sched_scan; int i, j; u32 flags; + bool force_passive = !req->n_ssids; for (i = 0, j = start; i < req->n_channels && j < MAX_CHANNELS_ALL_BANDS; i++) { flags = req->channels[i]->flags; + if (force_passive) + flags |= IEEE80211_CHAN_PASSIVE_SCAN; + if ((req->channels[i]->band == band) && !(flags & IEEE80211_CHAN_DISABLED) && (!!(flags & IEEE80211_CHAN_RADAR) == radar) && @@ -433,6 +437,7 @@ int wl1271_scan_sched_scan_config(struct wl1271 *wl, struct wl1271_cmd_sched_scan_config *cfg = NULL; struct conf_sched_scan_settings *c = &wl->conf.sched_scan; int i, total_channels, ret; + bool force_passive = !req->n_ssids; wl1271_debug(DEBUG_CMD, "cmd sched_scan scan config"); @@ -456,7 +461,7 @@ int wl1271_scan_sched_scan_config(struct wl1271 *wl, for (i = 0; i < SCAN_MAX_CYCLE_INTERVALS; i++) cfg->intervals[i] = cpu_to_le32(req->interval); - if (req->ssids[0].ssid_len && req->ssids[0].ssid) { + if (!force_passive && req->ssids[0].ssid_len && req->ssids[0].ssid) { cfg->filter_type = SCAN_SSID_FILTER_SPECIFIC; cfg->ssid_len = req->ssids[0].ssid_len; memcpy(cfg->ssid, req->ssids[0].ssid, @@ -473,7 +478,7 @@ int wl1271_scan_sched_scan_config(struct wl1271 *wl, goto out; } - if (cfg->active[0]) { + if (!force_passive && cfg->active[0]) { ret = wl1271_cmd_build_probe_req(wl, req->ssids[0].ssid, req->ssids[0].ssid_len, ies->ie[IEEE80211_BAND_2GHZ], @@ -485,7 +490,7 @@ int wl1271_scan_sched_scan_config(struct wl1271 *wl, } } - if (cfg->active[1]) { + if (!force_passive && cfg->active[1]) { ret = wl1271_cmd_build_probe_req(wl, req->ssids[0].ssid, req->ssids[0].ssid_len, ies->ie[IEEE80211_BAND_5GHZ], -- cgit v1.2.3-70-g09d2 From 59342f6a6bc35df623fb44784daa5e1077063b8f Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 30 May 2011 10:15:47 +0300 Subject: zd1211rw: fix to work on OHCI zd1211 devices register 'EP 4 OUT' endpoint as Interrupt type on USB 2.0: Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x04 EP 4 OUT bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 1 However on USB 1.1 endpoint becomes Bulk: Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x04 EP 4 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Commit 37939810b937aba830dd751291fcdc51cae1a6cb assumed that endpoint is always interrupt type and changed usb_bulk_msg() calls to usb_interrupt_msg(). Problem here is that usb_bulk_msg() on interrupt endpoint selfcorrects the call and changes requested pipe to interrupt type (see usb_bulk_msg). However with usb_interrupt_msg() on bulk endpoint does not correct the pipe type to bulk, but instead URB is submitted with interrupt type pipe. So pre-2.6.39 used usb_bulk_msg() and therefore worked with both endpoint types, however in 2.6.39 usb_interrupt_msg() with bulk endpoint causes ohci_hcd to fail submitted URB instantly with -ENOSPC and preventing zd1211rw from working with OHCI. Fix this by detecting endpoint type and using correct endpoint/pipe types for URB. Also fix asynchronous zd_usb_iowrite16v_async() to use right URB type on 'EP 4 OUT'. Cc: stable@kernel.org Signed-off-by: Jussi Kivilinna Signed-off-by: John W. Linville --- drivers/net/wireless/zd1211rw/zd_usb.c | 53 +++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index 0e819943b9e..631194d4982 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -1533,6 +1533,31 @@ static void __exit usb_exit(void) module_init(usb_init); module_exit(usb_exit); +static int zd_ep_regs_out_msg(struct usb_device *udev, void *data, int len, + int *actual_length, int timeout) +{ + /* In USB 2.0 mode EP_REGS_OUT endpoint is interrupt type. However in + * USB 1.1 mode endpoint is bulk. Select correct type URB by endpoint + * descriptor. + */ + struct usb_host_endpoint *ep; + unsigned int pipe; + + pipe = usb_sndintpipe(udev, EP_REGS_OUT); + ep = usb_pipe_endpoint(udev, pipe); + if (!ep) + return -EINVAL; + + if (usb_endpoint_xfer_int(&ep->desc)) { + return usb_interrupt_msg(udev, pipe, data, len, + actual_length, timeout); + } else { + pipe = usb_sndbulkpipe(udev, EP_REGS_OUT); + return usb_bulk_msg(udev, pipe, data, len, actual_length, + timeout); + } +} + static int usb_int_regs_length(unsigned int count) { return sizeof(struct usb_int_regs) + count * sizeof(struct reg_data); @@ -1648,15 +1673,14 @@ int zd_usb_ioread16v(struct zd_usb *usb, u16 *values, udev = zd_usb_to_usbdev(usb); prepare_read_regs_int(usb); - r = usb_interrupt_msg(udev, usb_sndintpipe(udev, EP_REGS_OUT), - req, req_len, &actual_req_len, 50 /* ms */); + r = zd_ep_regs_out_msg(udev, req, req_len, &actual_req_len, 50 /*ms*/); if (r) { dev_dbg_f(zd_usb_dev(usb), - "error in usb_interrupt_msg(). Error number %d\n", r); + "error in zd_ep_regs_out_msg(). Error number %d\n", r); goto error; } if (req_len != actual_req_len) { - dev_dbg_f(zd_usb_dev(usb), "error in usb_interrupt_msg()\n" + dev_dbg_f(zd_usb_dev(usb), "error in zd_ep_regs_out_msg()\n" " req_len %d != actual_req_len %d\n", req_len, actual_req_len); r = -EIO; @@ -1818,9 +1842,17 @@ int zd_usb_iowrite16v_async(struct zd_usb *usb, const struct zd_ioreq16 *ioreqs, rw->value = cpu_to_le16(ioreqs[i].value); } - usb_fill_int_urb(urb, udev, usb_sndintpipe(udev, EP_REGS_OUT), - req, req_len, iowrite16v_urb_complete, usb, - ep->desc.bInterval); + /* In USB 2.0 mode endpoint is interrupt type. However in USB 1.1 mode + * endpoint is bulk. Select correct type URB by endpoint descriptor. + */ + if (usb_endpoint_xfer_int(&ep->desc)) + usb_fill_int_urb(urb, udev, usb_sndintpipe(udev, EP_REGS_OUT), + req, req_len, iowrite16v_urb_complete, usb, + ep->desc.bInterval); + else + usb_fill_bulk_urb(urb, udev, usb_sndbulkpipe(udev, EP_REGS_OUT), + req, req_len, iowrite16v_urb_complete, usb); + urb->transfer_flags |= URB_FREE_BUFFER; /* Submit previous URB */ @@ -1924,15 +1956,14 @@ int zd_usb_rfwrite(struct zd_usb *usb, u32 value, u8 bits) } udev = zd_usb_to_usbdev(usb); - r = usb_interrupt_msg(udev, usb_sndintpipe(udev, EP_REGS_OUT), - req, req_len, &actual_req_len, 50 /* ms */); + r = zd_ep_regs_out_msg(udev, req, req_len, &actual_req_len, 50 /*ms*/); if (r) { dev_dbg_f(zd_usb_dev(usb), - "error in usb_interrupt_msg(). Error number %d\n", r); + "error in zd_ep_regs_out_msg(). Error number %d\n", r); goto out; } if (req_len != actual_req_len) { - dev_dbg_f(zd_usb_dev(usb), "error in usb_interrupt_msg()" + dev_dbg_f(zd_usb_dev(usb), "error in zd_ep_regs_out_msg()" " req_len %d != actual_req_len %d\n", req_len, actual_req_len); r = -EIO; -- cgit v1.2.3-70-g09d2 From 1144181c1bc054dc5e001a6f10b4820167e6c883 Mon Sep 17 00:00:00 2001 From: Wey-Yi Guy Date: Mon, 30 May 2011 09:32:52 -0700 Subject: iwlagn: fix incorrect PCI subsystem id for 6150 devices For 6150 devices, modify the supported PCI subsystem ID. Cc: stable@kernel.org Signed-off-by: Wey-Yi Guy Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-6000.c | 28 ++++++++++++++++++---------- drivers/net/wireless/iwlwifi/iwl-agn.c | 6 +++--- drivers/net/wireless/iwlwifi/iwl-agn.h | 1 + 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-6000.c b/drivers/net/wireless/iwlwifi/iwl-6000.c index f8c710db6e6..fda6fe08cf9 100644 --- a/drivers/net/wireless/iwlwifi/iwl-6000.c +++ b/drivers/net/wireless/iwlwifi/iwl-6000.c @@ -603,19 +603,27 @@ struct iwl_cfg iwl6050_2abg_cfg = { IWL_DEVICE_6050, }; +#define IWL_DEVICE_6150 \ + .fw_name_pre = IWL6050_FW_PRE, \ + .ucode_api_max = IWL6050_UCODE_API_MAX, \ + .ucode_api_min = IWL6050_UCODE_API_MIN, \ + .ops = &iwl6150_ops, \ + .eeprom_ver = EEPROM_6150_EEPROM_VERSION, \ + .eeprom_calib_ver = EEPROM_6150_TX_POWER_VERSION, \ + .base_params = &iwl6050_base_params, \ + .need_dc_calib = true, \ + .led_mode = IWL_LED_BLINK, \ + .internal_wimax_coex = true + struct iwl_cfg iwl6150_bgn_cfg = { .name = "Intel(R) Centrino(R) Wireless-N + WiMAX 6150 BGN", - .fw_name_pre = IWL6050_FW_PRE, - .ucode_api_max = IWL6050_UCODE_API_MAX, - .ucode_api_min = IWL6050_UCODE_API_MIN, - .eeprom_ver = EEPROM_6150_EEPROM_VERSION, - .eeprom_calib_ver = EEPROM_6150_TX_POWER_VERSION, - .ops = &iwl6150_ops, - .base_params = &iwl6050_base_params, + IWL_DEVICE_6150, .ht_params = &iwl6000_ht_params, - .need_dc_calib = true, - .led_mode = IWL_LED_RF_STATE, - .internal_wimax_coex = true, +}; + +struct iwl_cfg iwl6150_bg_cfg = { + .name = "Intel(R) Centrino(R) Wireless-N + WiMAX 6150 BG", + IWL_DEVICE_6150, }; struct iwl_cfg iwl6000_3agn_cfg = { diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 11c6c1169e7..a662adcb2ad 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -3831,11 +3831,11 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = { /* 6150 WiFi/WiMax Series */ {IWL_PCI_DEVICE(0x0885, 0x1305, iwl6150_bgn_cfg)}, - {IWL_PCI_DEVICE(0x0885, 0x1306, iwl6150_bgn_cfg)}, + {IWL_PCI_DEVICE(0x0885, 0x1307, iwl6150_bg_cfg)}, {IWL_PCI_DEVICE(0x0885, 0x1325, iwl6150_bgn_cfg)}, - {IWL_PCI_DEVICE(0x0885, 0x1326, iwl6150_bgn_cfg)}, + {IWL_PCI_DEVICE(0x0885, 0x1327, iwl6150_bg_cfg)}, {IWL_PCI_DEVICE(0x0886, 0x1315, iwl6150_bgn_cfg)}, - {IWL_PCI_DEVICE(0x0886, 0x1316, iwl6150_bgn_cfg)}, + {IWL_PCI_DEVICE(0x0886, 0x1317, iwl6150_bg_cfg)}, /* 1000 Series WiFi */ {IWL_PCI_DEVICE(0x0083, 0x1205, iwl1000_bgn_cfg)}, diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.h b/drivers/net/wireless/iwlwifi/iwl-agn.h index 2495fe7a58c..d1716844002 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.h +++ b/drivers/net/wireless/iwlwifi/iwl-agn.h @@ -89,6 +89,7 @@ extern struct iwl_cfg iwl6000_3agn_cfg; extern struct iwl_cfg iwl6050_2agn_cfg; extern struct iwl_cfg iwl6050_2abg_cfg; extern struct iwl_cfg iwl6150_bgn_cfg; +extern struct iwl_cfg iwl6150_bg_cfg; extern struct iwl_cfg iwl1000_bgn_cfg; extern struct iwl_cfg iwl1000_bg_cfg; extern struct iwl_cfg iwl100_bgn_cfg; -- cgit v1.2.3-70-g09d2 From 48bdf072c3f1f8f739f76d19c74f4c79605cac46 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sun, 29 May 2011 10:55:44 +0000 Subject: ip_options_compile: properly handle unaligned pointer The current code takes an unaligned pointer and does htonl() on it to make it big-endian, then does a memcpy(). The problem is that the compiler decides that since the pointer is to a __be32, it is legal to optimize the copy into a processor word store. However, on an architecture that does not handled unaligned writes in kernel space, this produces an unaligned exception fault. The solution is to track the pointer as a "char *" (which removes a bunch of unpleasant casts in any case), and then just use put_unaligned_be32() to write the value to memory. Signed-off-by: Chris Metcalf Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index c3118e1cd3b..ec93335901d 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -350,7 +351,7 @@ int ip_options_compile(struct net *net, goto error; } if (optptr[2] <= optlen) { - __be32 *timeptr = NULL; + unsigned char *timeptr = NULL; if (optptr[2]+3 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -359,7 +360,7 @@ int ip_options_compile(struct net *net, case IPOPT_TS_TSONLY: opt->ts = optptr - iph; if (skb) - timeptr = (__be32*)&optptr[optptr[2]-1]; + timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; optptr[2] += 4; break; @@ -371,7 +372,7 @@ int ip_options_compile(struct net *net, opt->ts = optptr - iph; if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); - timeptr = (__be32*)&optptr[optptr[2]+3]; + timeptr = &optptr[optptr[2]+3]; } opt->ts_needaddr = 1; opt->ts_needtime = 1; @@ -389,7 +390,7 @@ int ip_options_compile(struct net *net, if (inet_addr_type(net, addr) == RTN_UNICAST) break; if (skb) - timeptr = (__be32*)&optptr[optptr[2]+3]; + timeptr = &optptr[optptr[2]+3]; } opt->ts_needtime = 1; optptr[2] += 8; @@ -403,10 +404,10 @@ int ip_options_compile(struct net *net, } if (timeptr) { struct timespec tv; - __be32 midtime; + u32 midtime; getnstimeofday(&tv); - midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC); - memcpy(timeptr, &midtime, sizeof(__be32)); + midtime = (tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC; + put_unaligned_be32(midtime, timeptr); opt->is_changed = 1; } } else { -- cgit v1.2.3-70-g09d2 From b10cec8a4e8167075b9e1ff3f05419769e7f381a Mon Sep 17 00:00:00 2001 From: Dennis Aberilla Date: Sun, 29 May 2011 11:46:54 +0000 Subject: drivers/net: ks8842 Fix crash on received packet when in PIO mode. This patch fixes a driver crash during packet reception due to not enough bytes allocated in the skb. Since the loop reads out 4 bytes at a time, we need to allow for up to 3 bytes of slack space. Signed-off-by: Dennis Aberilla Signed-off-by: David S. Miller --- drivers/net/ks8842.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ks8842.c b/drivers/net/ks8842.c index f0d8346d0fa..9bd0f551a38 100644 --- a/drivers/net/ks8842.c +++ b/drivers/net/ks8842.c @@ -662,7 +662,7 @@ static void ks8842_rx_frame(struct net_device *netdev, /* check the status */ if ((status & RXSR_VALID) && !(status & RXSR_ERROR)) { - struct sk_buff *skb = netdev_alloc_skb_ip_align(netdev, len); + struct sk_buff *skb = netdev_alloc_skb_ip_align(netdev, len + 3); if (skb) { -- cgit v1.2.3-70-g09d2 From a000c01e60e40e15304ffe48fff051d17a7bea91 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 29 May 2011 23:23:36 +0000 Subject: sctp: stop pending timers and purge queues when peer restart asoc If the peer restart the asoc, we should not only fail any unsent/unacked data, but also stop the T3-rtx, SACK, T4-rto timers, and teardown ASCONF queues. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- include/net/sctp/command.h | 1 + include/net/sctp/structs.h | 2 +- net/sctp/associola.c | 23 ++++++++++++++--------- net/sctp/sm_sideeffect.c | 3 +++ net/sctp/sm_statefuns.c | 14 ++++++++++++-- 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 2b447646ce4..dd6847e5d6e 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -107,6 +107,7 @@ typedef enum { SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */ SCTP_CMD_SEND_MSG, /* Send the whole use message */ SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */ + SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ SCTP_CMD_LAST } sctp_verb_t; diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 795f4886e11..7df327a6d56 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1993,7 +1993,7 @@ void sctp_assoc_clean_asconf_ack_cache(const struct sctp_association *asoc); struct sctp_chunk *sctp_assoc_lookup_asconf_ack( const struct sctp_association *asoc, __be32 serial); - +void sctp_asconf_queue_teardown(struct sctp_association *asoc); int sctp_cmp_addr_exact(const union sctp_addr *ss1, const union sctp_addr *ss2); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 525f97c467e..4a62888f2e4 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -444,15 +444,7 @@ void sctp_association_free(struct sctp_association *asoc) asoc->peer.transport_count = 0; - /* Free any cached ASCONF_ACK chunk. */ - sctp_assoc_free_asconf_acks(asoc); - - /* Free the ASCONF queue. */ - sctp_assoc_free_asconf_queue(asoc); - - /* Free any cached ASCONF chunk. */ - if (asoc->addip_last_asconf) - sctp_chunk_free(asoc->addip_last_asconf); + sctp_asconf_queue_teardown(asoc); /* AUTH - Free the endpoint shared keys */ sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); @@ -1646,3 +1638,16 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack( return NULL; } + +void sctp_asconf_queue_teardown(struct sctp_association *asoc) +{ + /* Free any cached ASCONF_ACK chunk. */ + sctp_assoc_free_asconf_acks(asoc); + + /* Free the ASCONF queue. */ + sctp_assoc_free_asconf_queue(asoc); + + /* Free any cached ASCONF chunk. */ + if (asoc->addip_last_asconf) + sctp_chunk_free(asoc->addip_last_asconf); +} diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index d612ca1ca6c..534c2e5feb0 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1670,6 +1670,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_SEND_NEXT_ASCONF: sctp_cmd_send_asconf(asoc); break; + case SCTP_CMD_PURGE_ASCONF_QUEUE: + sctp_asconf_queue_teardown(asoc); + break; default: pr_warn("Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7f4a4f8368e..a297283154d 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1718,11 +1718,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_CONSUME; } - /* For now, fail any unsent/unacked data. Consider the optional - * choice of resending of this data. + /* For now, stop pending T3-rtx and SACK timers, fail any unsent/unacked + * data. Consider the optional choice of resending of this data. */ + sctp_add_cmd_sf(commands, SCTP_CMD_T3_RTX_TIMERS_STOP, SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, + SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); + /* Stop pending T4-rto timer, teardown ASCONF queue, ASCONF-ACK queue + * and ASCONF-ACK cache. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, + SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); + sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL()); + repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem; -- cgit v1.2.3-70-g09d2 From 930a6eac9f40e692bd9670d89bcd9ac0f4019356 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Mon, 30 May 2011 07:06:24 +0000 Subject: drivers/net/usb/catc.c: Fix potential deadlock in catc_ctrl_run() catc_ctrl_run() calls usb_submit_urb() with GFP_KERNEL, while it is called from catc_ctrl_async() and catc_ctrl_done() with catc->ctrl_lock spinlock held. The patch replaces GFP_KERNEL with GFP_ATOMIC. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/usb/catc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index d7221c4a5dc..8056f8a27c6 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -495,7 +495,7 @@ static void catc_ctrl_run(struct catc *catc) if (!q->dir && q->buf && q->len) memcpy(catc->ctrl_buf, q->buf, q->len); - if ((status = usb_submit_urb(catc->ctrl_urb, GFP_KERNEL))) + if ((status = usb_submit_urb(catc->ctrl_urb, GFP_ATOMIC))) err("submit(ctrl_urb) status %d", status); } -- cgit v1.2.3-70-g09d2 From 948252cb9e01d65a89ecadf67be5018351eee15e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 May 2011 19:27:48 -0700 Subject: Revert "net: fix section mismatches" This reverts commit e5cb966c0838e4da43a3b0751bdcac7fe719f7b4. It causes new build regressions with gcc-4.2 which is pretty common on non-x86 platforms. Reported-by: James Bottomley Signed-off-by: David S. Miller --- drivers/net/3c509.c | 14 ++++++------- drivers/net/3c59x.c | 4 ++-- drivers/net/depca.c | 35 ++++++++++++++++---------------- drivers/net/hp100.c | 12 +++++------ drivers/net/ibmlana.c | 4 ++-- drivers/net/irda/smsc-ircc2.c | 44 ++++++++++++++++++++--------------------- drivers/net/ne3210.c | 15 ++++++-------- drivers/net/smc-mca.c | 6 +++--- drivers/net/tokenring/madgemc.c | 2 +- drivers/net/tulip/de4x5.c | 4 ++-- 10 files changed, 68 insertions(+), 72 deletions(-) diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c index 5f25889e27e..44b28b2d700 100644 --- a/drivers/net/3c509.c +++ b/drivers/net/3c509.c @@ -185,7 +185,7 @@ static int max_interrupt_work = 10; static int nopnp; #endif -static int el3_common_init(struct net_device *dev); +static int __devinit el3_common_init(struct net_device *dev); static void el3_common_remove(struct net_device *dev); static ushort id_read_eeprom(int index); static ushort read_eeprom(int ioaddr, int index); @@ -395,7 +395,7 @@ static struct isa_driver el3_isa_driver = { static int isa_registered; #ifdef CONFIG_PNP -static const struct pnp_device_id el3_pnp_ids[] __devinitconst = { +static struct pnp_device_id el3_pnp_ids[] = { { .id = "TCM5090" }, /* 3Com Etherlink III (TP) */ { .id = "TCM5091" }, /* 3Com Etherlink III */ { .id = "TCM5094" }, /* 3Com Etherlink III (combo) */ @@ -478,7 +478,7 @@ static int pnp_registered; #endif /* CONFIG_PNP */ #ifdef CONFIG_EISA -static const struct eisa_device_id el3_eisa_ids[] __devinitconst = { +static struct eisa_device_id el3_eisa_ids[] = { { "TCM5090" }, { "TCM5091" }, { "TCM5092" }, @@ -508,7 +508,7 @@ static int eisa_registered; #ifdef CONFIG_MCA static int el3_mca_probe(struct device *dev); -static const short el3_mca_adapter_ids[] __devinitconst = { +static short el3_mca_adapter_ids[] __initdata = { 0x627c, 0x627d, 0x62db, @@ -517,7 +517,7 @@ static const short el3_mca_adapter_ids[] __devinitconst = { 0x0000 }; -static const char *const el3_mca_adapter_names[] __devinitconst = { +static char *el3_mca_adapter_names[] __initdata = { "3Com 3c529 EtherLink III (10base2)", "3Com 3c529 EtherLink III (10baseT)", "3Com 3c529 EtherLink III (test mode)", @@ -601,7 +601,7 @@ static void el3_common_remove (struct net_device *dev) } #ifdef CONFIG_MCA -static int __devinit el3_mca_probe(struct device *device) +static int __init el3_mca_probe(struct device *device) { /* Based on Erik Nygren's (nygren@mit.edu) 3c529 patch, * heavily modified by Chris Beauregard @@ -671,7 +671,7 @@ static int __devinit el3_mca_probe(struct device *device) #endif /* CONFIG_MCA */ #ifdef CONFIG_EISA -static int __devinit el3_eisa_probe (struct device *device) +static int __init el3_eisa_probe (struct device *device) { short i; int ioaddr, irq, if_port; diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index 99f43d27544..8cc22568ebd 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -901,14 +901,14 @@ static const struct dev_pm_ops vortex_pm_ops = { #endif /* !CONFIG_PM */ #ifdef CONFIG_EISA -static const struct eisa_device_id vortex_eisa_ids[] __devinitconst = { +static struct eisa_device_id vortex_eisa_ids[] = { { "TCM5920", CH_3C592 }, { "TCM5970", CH_3C597 }, { "" } }; MODULE_DEVICE_TABLE(eisa, vortex_eisa_ids); -static int __devinit vortex_eisa_probe(struct device *device) +static int __init vortex_eisa_probe(struct device *device) { void __iomem *ioaddr; struct eisa_device *edev; diff --git a/drivers/net/depca.c b/drivers/net/depca.c index 17654059922..8b0084d17c8 100644 --- a/drivers/net/depca.c +++ b/drivers/net/depca.c @@ -331,18 +331,18 @@ static struct { "DE422",\ ""} -static const char* const depca_signature[] __devinitconst = DEPCA_SIGNATURE; +static char* __initdata depca_signature[] = DEPCA_SIGNATURE; enum depca_type { DEPCA, de100, de101, de200, de201, de202, de210, de212, de422, unknown }; -static const char depca_string[] = "depca"; +static char depca_string[] = "depca"; static int depca_device_remove (struct device *device); #ifdef CONFIG_EISA -static const struct eisa_device_id depca_eisa_ids[] __devinitconst = { +static struct eisa_device_id depca_eisa_ids[] = { { "DEC4220", de422 }, { "" } }; @@ -367,19 +367,19 @@ static struct eisa_driver depca_eisa_driver = { #define DE210_ID 0x628d #define DE212_ID 0x6def -static const short depca_mca_adapter_ids[] __devinitconst = { +static short depca_mca_adapter_ids[] = { DE210_ID, DE212_ID, 0x0000 }; -static const char *depca_mca_adapter_name[] = { +static char *depca_mca_adapter_name[] = { "DEC EtherWORKS MC Adapter (DE210)", "DEC EtherWORKS MC Adapter (DE212)", NULL }; -static const enum depca_type depca_mca_adapter_type[] = { +static enum depca_type depca_mca_adapter_type[] = { de210, de212, 0 @@ -541,9 +541,10 @@ static void SetMulticastFilter(struct net_device *dev); static int load_packet(struct net_device *dev, struct sk_buff *skb); static void depca_dbg_open(struct net_device *dev); -static const u_char de1xx_irq[] __devinitconst = { 2, 3, 4, 5, 7, 9, 0 }; -static const u_char de2xx_irq[] __devinitconst = { 5, 9, 10, 11, 15, 0 }; -static const u_char de422_irq[] __devinitconst = { 5, 9, 10, 11, 0 }; +static u_char de1xx_irq[] __initdata = { 2, 3, 4, 5, 7, 9, 0 }; +static u_char de2xx_irq[] __initdata = { 5, 9, 10, 11, 15, 0 }; +static u_char de422_irq[] __initdata = { 5, 9, 10, 11, 0 }; +static u_char *depca_irq; static int irq; static int io; @@ -579,7 +580,7 @@ static const struct net_device_ops depca_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -static int __devinit depca_hw_init (struct net_device *dev, struct device *device) +static int __init depca_hw_init (struct net_device *dev, struct device *device) { struct depca_private *lp; int i, j, offset, netRAM, mem_len, status = 0; @@ -747,7 +748,6 @@ static int __devinit depca_hw_init (struct net_device *dev, struct device *devic if (dev->irq < 2) { unsigned char irqnum; unsigned long irq_mask, delay; - const u_char *depca_irq; irq_mask = probe_irq_on(); @@ -770,7 +770,6 @@ static int __devinit depca_hw_init (struct net_device *dev, struct device *devic break; default: - depca_irq = NULL; break; /* Not reached */ } @@ -1303,7 +1302,7 @@ static void SetMulticastFilter(struct net_device *dev) } } -static int __devinit depca_common_init (u_long ioaddr, struct net_device **devp) +static int __init depca_common_init (u_long ioaddr, struct net_device **devp) { int status = 0; @@ -1334,7 +1333,7 @@ static int __devinit depca_common_init (u_long ioaddr, struct net_device **devp) /* ** Microchannel bus I/O device probe */ -static int __devinit depca_mca_probe(struct device *device) +static int __init depca_mca_probe(struct device *device) { unsigned char pos[2]; unsigned char where; @@ -1458,7 +1457,7 @@ static int __devinit depca_mca_probe(struct device *device) ** ISA bus I/O device probe */ -static void __devinit depca_platform_probe (void) +static void __init depca_platform_probe (void) { int i; struct platform_device *pldev; @@ -1498,7 +1497,7 @@ static void __devinit depca_platform_probe (void) } } -static enum depca_type __devinit depca_shmem_probe (ulong *mem_start) +static enum depca_type __init depca_shmem_probe (ulong *mem_start) { u_long mem_base[] = DEPCA_RAM_BASE_ADDRESSES; enum depca_type adapter = unknown; @@ -1559,7 +1558,7 @@ static int __devinit depca_isa_probe (struct platform_device *device) */ #ifdef CONFIG_EISA -static int __devinit depca_eisa_probe (struct device *device) +static int __init depca_eisa_probe (struct device *device) { enum depca_type adapter = unknown; struct eisa_device *edev; @@ -1630,7 +1629,7 @@ static int __devexit depca_device_remove (struct device *device) ** and Boot (readb) ROM. This will also give us a clue to the network RAM ** base address. */ -static int __devinit DepcaSignature(char *name, u_long base_addr) +static int __init DepcaSignature(char *name, u_long base_addr) { u_int i, j, k; void __iomem *ptr; diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c index c52a1df5d92..8e10d2f6a5a 100644 --- a/drivers/net/hp100.c +++ b/drivers/net/hp100.c @@ -188,14 +188,14 @@ struct hp100_private { * variables */ #ifdef CONFIG_ISA -static const char *const hp100_isa_tbl[] __devinitconst = { +static const char *hp100_isa_tbl[] = { "HWPF150", /* HP J2573 rev A */ "HWP1950", /* HP J2573 */ }; #endif #ifdef CONFIG_EISA -static const struct eisa_device_id hp100_eisa_tbl[] __devinitconst = { +static struct eisa_device_id hp100_eisa_tbl[] = { { "HWPF180" }, /* HP J2577 rev A */ { "HWP1920" }, /* HP 27248B */ { "HWP1940" }, /* HP J2577 */ @@ -336,7 +336,7 @@ static __devinit const char *hp100_read_id(int ioaddr) } #ifdef CONFIG_ISA -static __devinit int hp100_isa_probe1(struct net_device *dev, int ioaddr) +static __init int hp100_isa_probe1(struct net_device *dev, int ioaddr) { const char *sig; int i; @@ -372,7 +372,7 @@ static __devinit int hp100_isa_probe1(struct net_device *dev, int ioaddr) * EISA and PCI are handled by device infrastructure. */ -static int __devinit hp100_isa_probe(struct net_device *dev, int addr) +static int __init hp100_isa_probe(struct net_device *dev, int addr) { int err = -ENODEV; @@ -396,7 +396,7 @@ static int __devinit hp100_isa_probe(struct net_device *dev, int addr) #endif /* CONFIG_ISA */ #if !defined(MODULE) && defined(CONFIG_ISA) -struct net_device * __devinit hp100_probe(int unit) +struct net_device * __init hp100_probe(int unit) { struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private)); int err; @@ -2843,7 +2843,7 @@ static void cleanup_dev(struct net_device *d) } #ifdef CONFIG_EISA -static int __devinit hp100_eisa_probe (struct device *gendev) +static int __init hp100_eisa_probe (struct device *gendev) { struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private)); struct eisa_device *edev = to_eisa_device(gendev); diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c index 136d7544cc3..a7d6cad3295 100644 --- a/drivers/net/ibmlana.c +++ b/drivers/net/ibmlana.c @@ -895,12 +895,12 @@ static int ibmlana_irq; static int ibmlana_io; static int startslot; /* counts through slots when probing multiple devices */ -static const short ibmlana_adapter_ids[] __devinitconst = { +static short ibmlana_adapter_ids[] __initdata = { IBM_LANA_ID, 0x0000 }; -static const char *const ibmlana_adapter_names[] __devinitconst = { +static char *ibmlana_adapter_names[] __devinitdata = { "IBM LAN Adapter/A", NULL }; diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c index 69b5707db36..8800e1fe412 100644 --- a/drivers/net/irda/smsc-ircc2.c +++ b/drivers/net/irda/smsc-ircc2.c @@ -222,19 +222,19 @@ static void smsc_ircc_set_transceiver_for_speed(struct smsc_ircc_cb *self, u32 s static void smsc_ircc_sir_wait_hw_transmitter_finish(struct smsc_ircc_cb *self); /* Probing */ -static int smsc_ircc_look_for_chips(void); -static const struct smsc_chip * smsc_ircc_probe(unsigned short cfg_base, u8 reg, const struct smsc_chip *chip, char *type); -static int smsc_superio_flat(const struct smsc_chip *chips, unsigned short cfg_base, char *type); -static int smsc_superio_paged(const struct smsc_chip *chips, unsigned short cfg_base, char *type); -static int smsc_superio_fdc(unsigned short cfg_base); -static int smsc_superio_lpc(unsigned short cfg_base); +static int __init smsc_ircc_look_for_chips(void); +static const struct smsc_chip * __init smsc_ircc_probe(unsigned short cfg_base, u8 reg, const struct smsc_chip *chip, char *type); +static int __init smsc_superio_flat(const struct smsc_chip *chips, unsigned short cfg_base, char *type); +static int __init smsc_superio_paged(const struct smsc_chip *chips, unsigned short cfg_base, char *type); +static int __init smsc_superio_fdc(unsigned short cfg_base); +static int __init smsc_superio_lpc(unsigned short cfg_base); #ifdef CONFIG_PCI -static int preconfigure_smsc_chip(struct smsc_ircc_subsystem_configuration *conf); -static int preconfigure_through_82801(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf); -static void preconfigure_ali_port(struct pci_dev *dev, +static int __init preconfigure_smsc_chip(struct smsc_ircc_subsystem_configuration *conf); +static int __init preconfigure_through_82801(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf); +static void __init preconfigure_ali_port(struct pci_dev *dev, unsigned short port); -static int preconfigure_through_ali(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf); -static int smsc_ircc_preconfigure_subsystems(unsigned short ircc_cfg, +static int __init preconfigure_through_ali(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf); +static int __init smsc_ircc_preconfigure_subsystems(unsigned short ircc_cfg, unsigned short ircc_fir, unsigned short ircc_sir, unsigned char ircc_dma, @@ -366,7 +366,7 @@ static inline void register_bank(int iobase, int bank) } /* PNP hotplug support */ -static const struct pnp_device_id smsc_ircc_pnp_table[] __devinitconst = { +static const struct pnp_device_id smsc_ircc_pnp_table[] = { { .id = "SMCf010", .driver_data = 0 }, /* and presumably others */ { } @@ -515,7 +515,7 @@ static const struct net_device_ops smsc_ircc_netdev_ops = { * Try to open driver instance * */ -static int __devinit smsc_ircc_open(unsigned int fir_base, unsigned int sir_base, u8 dma, u8 irq) +static int __init smsc_ircc_open(unsigned int fir_base, unsigned int sir_base, u8 dma, u8 irq) { struct smsc_ircc_cb *self; struct net_device *dev; @@ -2273,7 +2273,7 @@ static int __init smsc_superio_paged(const struct smsc_chip *chips, unsigned sho } -static int __devinit smsc_access(unsigned short cfg_base, unsigned char reg) +static int __init smsc_access(unsigned short cfg_base, unsigned char reg) { IRDA_DEBUG(1, "%s\n", __func__); @@ -2281,7 +2281,7 @@ static int __devinit smsc_access(unsigned short cfg_base, unsigned char reg) return inb(cfg_base) != reg ? -1 : 0; } -static const struct smsc_chip * __devinit smsc_ircc_probe(unsigned short cfg_base, u8 reg, const struct smsc_chip *chip, char *type) +static const struct smsc_chip * __init smsc_ircc_probe(unsigned short cfg_base, u8 reg, const struct smsc_chip *chip, char *type) { u8 devid, xdevid, rev; @@ -2406,7 +2406,7 @@ static int __init smsc_superio_lpc(unsigned short cfg_base) #ifdef CONFIG_PCI #define PCIID_VENDOR_INTEL 0x8086 #define PCIID_VENDOR_ALI 0x10b9 -static const struct smsc_ircc_subsystem_configuration subsystem_configurations[] __devinitconst = { +static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __initdata = { /* * Subsystems needing entries: * 0x10b9:0x1533 0x103c:0x0850 HP nx9010 family @@ -2532,7 +2532,7 @@ static const struct smsc_ircc_subsystem_configuration subsystem_configurations[] * (FIR port, SIR port, FIR DMA, FIR IRQ) * through the chip configuration port. */ -static int __devinit preconfigure_smsc_chip(struct +static int __init preconfigure_smsc_chip(struct smsc_ircc_subsystem_configuration *conf) { @@ -2633,7 +2633,7 @@ static int __devinit preconfigure_smsc_chip(struct * or Intel 82801DB/DBL (ICH4/ICH4-L) LPC Interface Bridge. * They all work the same way! */ -static int __devinit preconfigure_through_82801(struct pci_dev *dev, +static int __init preconfigure_through_82801(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf) @@ -2786,7 +2786,7 @@ static int __devinit preconfigure_through_82801(struct pci_dev *dev, * This is based on reverse-engineering since ALi does not * provide any data sheet for the 1533 chip. */ -static void __devinit preconfigure_ali_port(struct pci_dev *dev, +static void __init preconfigure_ali_port(struct pci_dev *dev, unsigned short port) { unsigned char reg; @@ -2824,7 +2824,7 @@ static void __devinit preconfigure_ali_port(struct pci_dev *dev, IRDA_MESSAGE("Activated ALi 1533 ISA bridge port 0x%04x.\n", port); } -static int __devinit preconfigure_through_ali(struct pci_dev *dev, +static int __init preconfigure_through_ali(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf) @@ -2837,7 +2837,7 @@ static int __devinit preconfigure_through_ali(struct pci_dev *dev, return preconfigure_smsc_chip(conf); } -static int __devinit smsc_ircc_preconfigure_subsystems(unsigned short ircc_cfg, +static int __init smsc_ircc_preconfigure_subsystems(unsigned short ircc_cfg, unsigned short ircc_fir, unsigned short ircc_sir, unsigned char ircc_dma, @@ -2849,7 +2849,7 @@ static int __devinit smsc_ircc_preconfigure_subsystems(unsigned short ircc_cfg, int ret = 0; for_each_pci_dev(dev) { - const struct smsc_ircc_subsystem_configuration *conf; + struct smsc_ircc_subsystem_configuration *conf; /* * Cache the subsystem vendor/device: diff --git a/drivers/net/ne3210.c b/drivers/net/ne3210.c index e8984b0ca52..243ed2aee88 100644 --- a/drivers/net/ne3210.c +++ b/drivers/net/ne3210.c @@ -80,20 +80,17 @@ static void ne3210_block_output(struct net_device *dev, int count, const unsigne #define NE3210_DEBUG 0x0 -static const unsigned char irq_map[] __devinitconst = - { 15, 12, 11, 10, 9, 7, 5, 3 }; -static const unsigned int shmem_map[] __devinitconst = - { 0xff0, 0xfe0, 0xfff0, 0xd8, 0xffe0, 0xffc0, 0xd0, 0x0 }; -static const char *const ifmap[] __devinitconst = - { "UTP", "?", "BNC", "AUI" }; -static const int ifmap_val[] __devinitconst = { +static unsigned char irq_map[] __initdata = {15, 12, 11, 10, 9, 7, 5, 3}; +static unsigned int shmem_map[] __initdata = {0xff0, 0xfe0, 0xfff0, 0xd8, 0xffe0, 0xffc0, 0xd0, 0x0}; +static const char *ifmap[] __initdata = {"UTP", "?", "BNC", "AUI"}; +static int ifmap_val[] __initdata = { IF_PORT_10BASET, IF_PORT_UNKNOWN, IF_PORT_10BASE2, IF_PORT_AUI, }; -static int __devinit ne3210_eisa_probe (struct device *device) +static int __init ne3210_eisa_probe (struct device *device) { unsigned long ioaddr, phys_mem; int i, retval, port_index; @@ -316,7 +313,7 @@ static void ne3210_block_output(struct net_device *dev, int count, memcpy_toio(shmem, buf, count); } -static const struct eisa_device_id ne3210_ids[] __devinitconst = { +static struct eisa_device_id ne3210_ids[] = { { "EGL0101" }, { "NVL1801" }, { "" }, diff --git a/drivers/net/smc-mca.c b/drivers/net/smc-mca.c index 0f29f261fcf..d07c39cb4da 100644 --- a/drivers/net/smc-mca.c +++ b/drivers/net/smc-mca.c @@ -156,7 +156,7 @@ static const struct { { 14, 15 } }; -static const short smc_mca_adapter_ids[] __devinitconst = { +static short smc_mca_adapter_ids[] __initdata = { 0x61c8, 0x61c9, 0x6fc0, @@ -168,7 +168,7 @@ static const short smc_mca_adapter_ids[] __devinitconst = { 0x0000 }; -static const char *const smc_mca_adapter_names[] __devinitconst = { +static char *smc_mca_adapter_names[] __initdata = { "SMC Ethercard PLUS Elite/A BNC/AUI (WD8013EP/A)", "SMC Ethercard PLUS Elite/A UTP/AUI (WD8013WP/A)", "WD Ethercard PLUS/A (WD8003E/A or WD8003ET/A)", @@ -199,7 +199,7 @@ static const struct net_device_ops ultramca_netdev_ops = { #endif }; -static int __devinit ultramca_probe(struct device *gen_dev) +static int __init ultramca_probe(struct device *gen_dev) { unsigned short ioaddr; struct net_device *dev; diff --git a/drivers/net/tokenring/madgemc.c b/drivers/net/tokenring/madgemc.c index 1313aa1315f..2bedc0ace81 100644 --- a/drivers/net/tokenring/madgemc.c +++ b/drivers/net/tokenring/madgemc.c @@ -727,7 +727,7 @@ static int __devexit madgemc_remove(struct device *device) return 0; } -static const short madgemc_adapter_ids[] __devinitconst = { +static short madgemc_adapter_ids[] __initdata = { 0x002d, 0x0000 }; diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index 45144d5bd11..efaa1d69b72 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c @@ -1995,7 +1995,7 @@ SetMulticastFilter(struct net_device *dev) static u_char de4x5_irq[] = EISA_ALLOWED_IRQ_LIST; -static int __devinit de4x5_eisa_probe (struct device *gendev) +static int __init de4x5_eisa_probe (struct device *gendev) { struct eisa_device *edev; u_long iobase; @@ -2097,7 +2097,7 @@ static int __devexit de4x5_eisa_remove (struct device *device) return 0; } -static const struct eisa_device_id de4x5_eisa_ids[] __devinitconst = { +static struct eisa_device_id de4x5_eisa_ids[] = { { "DEC4250", 0 }, /* 0 is the board name index... */ { "" } }; -- cgit v1.2.3-70-g09d2 From a5b2c5b2ad5853591a6cac6134cd0f599a720865 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 31 May 2011 11:31:41 -0700 Subject: AppArmor: fix oops in apparmor_setprocattr When invalid parameters are passed to apparmor_setprocattr a NULL deref oops occurs when it tries to record an audit message. This is because it is passing NULL for the profile parameter for aa_audit. But aa_audit now requires that the profile passed is not NULL. Fix this by passing the current profile on the task that is trying to setprocattr. Signed-off-by: Kees Cook Signed-off-by: John Johansen Cc: stable@kernel.org Signed-off-by: James Morris --- security/apparmor/lsm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index ae3a698415e..ec1bcecf2cd 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -593,7 +593,8 @@ static int apparmor_setprocattr(struct task_struct *task, char *name, sa.aad.op = OP_SETPROCATTR; sa.aad.info = name; sa.aad.error = -EINVAL; - return aa_audit(AUDIT_APPARMOR_DENIED, NULL, GFP_KERNEL, + return aa_audit(AUDIT_APPARMOR_DENIED, + __aa_current_profile(), GFP_KERNEL, &sa, NULL); } } else if (strcmp(name, "exec") == 0) { -- cgit v1.2.3-70-g09d2 From 4c49ff3fe128ca68dabd07537415c419ad7f82f9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 Jun 2011 08:27:41 +0200 Subject: block: blkdev_get() should access ->bd_disk only after success d4dc210f69 (block: don't block events on excl write for non-optical devices) added dereferencing of bdev->bd_disk to test GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; however, bdev->bd_disk can be %NULL if open failed which can lead to an oops. Test the flag after testing open was successful, not before. Signed-off-by: Tejun Heo Reported-by: David Miller Tested-by: David Miller Cc: stable@kernel.org Signed-off-by: Jens Axboe --- fs/block_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 1f2b1997833..1a2421f908f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1272,8 +1272,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) * individual writeable reference is too fragile given the * way @mode is used in blkdev_get/put(). */ - if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) && - !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { + if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder && + (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { bdev->bd_write_holder = true; disk_block_events(disk); } -- cgit v1.2.3-70-g09d2 From 603d04b2010976a52f62b7633f9999d104046900 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Sat, 28 May 2011 10:04:25 -0400 Subject: kgdbts: only use new asm-generic/ptrace.h api when needed The new instruction_pointer_set helper is defined for people who have converted to asm-generic/ptrace.h, so don't use it generally unless the arch needs it (in which case it has been converted). This should fix building of kgdb tests for arches not yet converted. Signed-off-by: Mike Frysinger Acked-by: Stephen Rothwell Cc: Jason Wessel Cc: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/kgdbts.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index b0c56313dbb..8cebec5e85e 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -304,7 +304,10 @@ static int check_and_rewind_pc(char *put_str, char *arg) return 1; } /* Readjust the instruction pointer if needed */ - instruction_pointer_set(&kgdbts_regs, ip + offset); + ip += offset; +#ifdef GDB_ADJUSTS_BREAK_OFFSET + instruction_pointer_set(&kgdbts_regs, ip); +#endif return 0; } -- cgit v1.2.3-70-g09d2 From ab75950b11e74145ffe61376ac073d56645aab8a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 26 May 2011 06:51:48 +0300 Subject: UBIFS: supress false error messages Commit ab51afe05273741f72383529ef488aa1ea598ec6 was a good clean-up, but it introduced a regression - now UBIFS prints scary error messages during recovery on all corrupted nodes, even though the corruptions are expected (due to a power cut). This patch fixes the issue. Additionally fix a typo in a commentary introduced by the same commit. Signed-off-by: Artem Bityutskiy --- fs/ubifs/recovery.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 731d9e2e7b5..95e24183b71 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -635,7 +635,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * Scan quietly until there is an error from which we cannot * recover */ - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); if (ret == SCANNED_A_NODE) { /* A valid node, and not a padding node */ struct ubifs_ch *ch = buf; @@ -701,7 +701,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * While we are in the middle of the same min. I/O unit keep dropping * nodes. So basically, what we want is to make sure that the last min. * I/O unit where we saw the corruption is dropped completely with all - * the uncorrupted node which may possibly sit there. + * the uncorrupted nodes which may possibly sit there. * * In other words, let's name the min. I/O unit where the corruption * starts B, and the previous min. I/O unit A. The below code tries to -- cgit v1.2.3-70-g09d2 From 1a0b06997ceca96db9259e537eb935f9fe59a3de Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 26 May 2011 08:26:05 +0300 Subject: UBIFS: introduce a "grouped" journal head flag Journal heads are different in a way how UBIFS writes nodes there. All normal journal heads receive grouped nodes, while the GC journal heads receives ungrouped nodes. This patch adds a 'grouped' flag to 'struct ubifs_jhead' which describes this property. This patch is a preparation to a further recovery fix. Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 5 ++++- fs/ubifs/ubifs.h | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1ab0d22e4c9..1e40db740da 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -811,15 +811,18 @@ static int alloc_wbufs(struct ubifs_info *c) c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; c->jheads[i].wbuf.jhead = i; + c->jheads[i].grouped = 1; } c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; /* * Garbage Collector head likely contains long-term data and - * does not need to be synchronized by timer. + * does not need to be synchronized by timer. Also GC head nodes are + * not grouped. */ c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; c->jheads[GCHD].wbuf.no_timer = 1; + c->jheads[GCHD].grouped = 0; return 0; } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index a70d7b4ffb2..adeca14c0e9 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -722,12 +722,14 @@ struct ubifs_bud { * struct ubifs_jhead - journal head. * @wbuf: head's write-buffer * @buds_list: list of bud LEBs belonging to this journal head + * @grouped: non-zero if UBIFS groups nodes when writing to this journal head * * Note, the @buds list is protected by the @c->buds_lock. */ struct ubifs_jhead { struct ubifs_wbuf wbuf; struct list_head buds_list; + unsigned int grouped:1; }; /** -- cgit v1.2.3-70-g09d2 From efcfde54ca68091b164f9aec544c7233a9760aff Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 26 May 2011 08:36:52 +0300 Subject: UBIFS: amend ubifs_recover_leb interface Instead of passing "grouped" parameter to 'ubifs_recover_leb()' which tells whether the nodes are grouped in the LEB to recover, pass the journal head number and let 'ubifs_recover_leb()' look at the journal head's 'grouped' flag. This patch is a preparation to a further fix where we'll need to know the journal head number for other purposes. Signed-off-by: Artem Bityutskiy --- fs/ubifs/orphan.c | 2 +- fs/ubifs/recovery.c | 10 ++++++---- fs/ubifs/replay.c | 3 +-- fs/ubifs/ubifs.h | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index bd644bf587a..a5422fffbd6 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -674,7 +674,7 @@ static int kill_orphans(struct ubifs_info *c) if (IS_ERR(sleb)) { if (PTR_ERR(sleb) == -EUCLEAN) sleb = ubifs_recover_leb(c, lnum, 0, - c->sbuf, 0); + c->sbuf, -1); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 95e24183b71..6adb5328a01 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -604,7 +604,8 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) * @lnum: LEB number * @offs: offset * @sbuf: LEB-sized buffer to use - * @grouped: nodes may be grouped for recovery + * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not + * belong to any journal head) * * This function does a scan of a LEB, but caters for errors that might have * been caused by the unclean unmount from which we are attempting to recover. @@ -612,13 +613,14 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) * found, and a negative error code in case of failure. */ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, - int offs, void *sbuf, int grouped) + int offs, void *sbuf, int jhead) { int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; + int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; struct ubifs_scan_leb *sleb; void *buf = sbuf + offs; - dbg_rcvry("%d:%d", lnum, offs); + dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); sleb = ubifs_start_scan(c, lnum, offs, sbuf); if (IS_ERR(sleb)) @@ -881,7 +883,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, } ubifs_scan_destroy(sleb); } - return ubifs_recover_leb(c, lnum, offs, sbuf, 0); + return ubifs_recover_leb(c, lnum, offs, sbuf, -1); } /** diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 6617280d167..5e97161ce4d 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -557,8 +557,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) * these LEBs could possibly be written to at the power cut * time. */ - sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, - b->bud->jhead != GCHD); + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); else sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); if (IS_ERR(sleb)) diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index adeca14c0e9..f79983d6f86 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1744,7 +1744,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); int ubifs_recover_master_node(struct ubifs_info *c); int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, - int offs, void *sbuf, int grouped); + int offs, void *sbuf, int jhead); struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf); int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); -- cgit v1.2.3-70-g09d2 From da8b94ea61c5d80aae0cc7b7541f1e0fa7459391 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 26 May 2011 08:58:19 +0300 Subject: UBIFS: fix recovery broken by the previous recovery fix Unfortunately, the recovery fix d1606a59b6be4ea392eabd40d1250aa1eeb19efb (UBIFS: fix extremely rare mount failure) broke recovery. This commit make UBIFS drop the last min. I/O unit in all journal heads, but this is needed only for the GC head. And this does not work for non-GC heads. For example, if suppose we have min. I/O units A and B, and A contains a valid node X, which was fsynced, and then a group of nodes Y which spans the rest of A and B. In this case we'll drop not only Y, but also X, which is obviously incorrect. This patch fixes the issue and additionally makes recovery to drop last min. I/O unit only for the GC head, and leave things as they have been for ages for the other heads - this is safer. Signed-off-by: Artem Bityutskiy --- fs/ubifs/recovery.c | 152 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 87 insertions(+), 65 deletions(-) diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 6adb5328a01..783d8e0beb7 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -564,19 +564,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, } /** - * drop_last_node - drop the last node or group of nodes. + * drop_last_group - drop the last group of nodes. * @sleb: scanned LEB information * @offs: offset of dropped nodes is returned here - * @grouped: non-zero if whole group of nodes have to be dropped * * This is a helper function for 'ubifs_recover_leb()' which drops the last - * node of the scanned LEB or the last group of nodes if @grouped is not zero. - * This function returns %1 if a node was dropped and %0 otherwise. + * group of nodes of the scanned LEB. */ -static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) +static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) { - int dropped = 0; - while (!list_empty(&sleb->nodes)) { struct ubifs_scan_node *snod; struct ubifs_ch *ch; @@ -585,17 +581,40 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) list); ch = snod->node; if (ch->group_type != UBIFS_IN_NODE_GROUP) - return dropped; - dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); + break; + + dbg_rcvry("dropping grouped node at %d:%d", + sleb->lnum, snod->offs); + *offs = snod->offs; + list_del(&snod->list); + kfree(snod); + sleb->nodes_cnt -= 1; + } +} + +/** + * drop_last_node - drop the last node. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * @grouped: non-zero if whole group of nodes have to be dropped + * + * This is a helper function for 'ubifs_recover_leb()' which drops the last + * node of the scanned LEB. + */ +static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) +{ + struct ubifs_scan_node *snod; + + if (!list_empty(&sleb->nodes)) { + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + + dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); *offs = snod->offs; list_del(&snod->list); kfree(snod); sleb->nodes_cnt -= 1; - dropped = 1; - if (!grouped) - break; } - return dropped; } /** @@ -697,59 +716,62 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * If nodes are grouped, always drop the incomplete group at * the end. */ - drop_last_node(sleb, &offs, 1); + drop_last_group(sleb, &offs); - /* - * While we are in the middle of the same min. I/O unit keep dropping - * nodes. So basically, what we want is to make sure that the last min. - * I/O unit where we saw the corruption is dropped completely with all - * the uncorrupted nodes which may possibly sit there. - * - * In other words, let's name the min. I/O unit where the corruption - * starts B, and the previous min. I/O unit A. The below code tries to - * deal with a situation when half of B contains valid nodes or the end - * of a valid node, and the second half of B contains corrupted data or - * garbage. This means that UBIFS had been writing to B just before the - * power cut happened. I do not know how realistic is this scenario - * that half of the min. I/O unit had been written successfully and the - * other half not, but this is possible in our 'failure mode emulation' - * infrastructure at least. - * - * So what is the problem, why we need to drop those nodes? Whey can't - * we just clean-up the second half of B by putting a padding node - * there? We can, and this works fine with one exception which was - * reproduced with power cut emulation testing and happens extremely - * rarely. The description follows, but it is worth noting that that is - * only about the GC head, so we could do this trick only if the bud - * belongs to the GC head, but it does not seem to be worth an - * additional "if" statement. - * - * So, imagine the file-system is full, we run GC which is moving valid - * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head - * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X - * and will try to continue. Imagine that LEB X is currently the - * dirtiest LEB, and the amount of used space in LEB Y is exactly the - * same as amount of free space in LEB X. - * - * And a power cut happens when nodes are moved from LEB X to LEB Y. We - * are here trying to recover LEB Y which is the GC head LEB. We find - * the min. I/O unit B as described above. Then we clean-up LEB Y by - * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function - * fails, because it cannot find a dirty LEB which could be GC'd into - * LEB Y! Even LEB X does not match because the amount of valid nodes - * there does not fit the free space in LEB Y any more! And this is - * because of the padding node which we added to LEB Y. The - * user-visible effect of this which I once observed and analysed is - * that we cannot mount the file-system with -ENOSPC error. - * - * So obviously, to make sure that situation does not happen we should - * free min. I/O unit B in LEB Y completely and the last used min. I/O - * unit in LEB Y should be A. This is basically what the below code - * tries to do. - */ - while (min_io_unit == round_down(offs, c->min_io_size) && - min_io_unit != offs && - drop_last_node(sleb, &offs, grouped)); + if (jhead == GCHD) { + /* + * If this LEB belongs to the GC head then while we are in the + * middle of the same min. I/O unit keep dropping nodes. So + * basically, what we want is to make sure that the last min. + * I/O unit where we saw the corruption is dropped completely + * with all the uncorrupted nodes which may possibly sit there. + * + * In other words, let's name the min. I/O unit where the + * corruption starts B, and the previous min. I/O unit A. The + * below code tries to deal with a situation when half of B + * contains valid nodes or the end of a valid node, and the + * second half of B contains corrupted data or garbage. This + * means that UBIFS had been writing to B just before the power + * cut happened. I do not know how realistic is this scenario + * that half of the min. I/O unit had been written successfully + * and the other half not, but this is possible in our 'failure + * mode emulation' infrastructure at least. + * + * So what is the problem, why we need to drop those nodes? Why + * can't we just clean-up the second half of B by putting a + * padding node there? We can, and this works fine with one + * exception which was reproduced with power cut emulation + * testing and happens extremely rarely. + * + * Imagine the file-system is full, we run GC which starts + * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is + * the current GC head LEB). The @c->gc_lnum is -1, which means + * that GC will retain LEB X and will try to continue. Imagine + * that LEB X is currently the dirtiest LEB, and the amount of + * used space in LEB Y is exactly the same as amount of free + * space in LEB X. + * + * And a power cut happens when nodes are moved from LEB X to + * LEB Y. We are here trying to recover LEB Y which is the GC + * head LEB. We find the min. I/O unit B as described above. + * Then we clean-up LEB Y by padding min. I/O unit. And later + * 'ubifs_rcvry_gc_commit()' function fails, because it cannot + * find a dirty LEB which could be GC'd into LEB Y! Even LEB X + * does not match because the amount of valid nodes there does + * not fit the free space in LEB Y any more! And this is + * because of the padding node which we added to LEB Y. The + * user-visible effect of this which I once observed and + * analysed is that we cannot mount the file-system with + * -ENOSPC error. + * + * So obviously, to make sure that situation does not happen we + * should free min. I/O unit B in LEB Y completely and the last + * used min. I/O unit in LEB Y should be A. This is basically + * what the below code tries to do. + */ + while (offs > min_io_unit) + drop_last_node(sleb, &offs); + } buf = sbuf + offs; len = c->leb_size - offs; -- cgit v1.2.3-70-g09d2 From 63da029015b5255915cd6d61f19ffc276ad4635d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 23 May 2011 11:37:09 -0700 Subject: mtd: fix physmap.h warnings Fix build warnings in physmap.h: include/linux/mtd/physmap.h:25: warning: 'struct platform_device' declared inside parameter list include/linux/mtd/physmap.h:25: warning: its scope is only this definition or declaration, which is probably not what you want include/linux/mtd/physmap.h:26: warning: 'struct platform_device' declared inside parameter list include/linux/mtd/physmap.h:27: warning: 'struct platform_device' declared inside parameter list Signed-off-by: Randy Dunlap Signed-off-by: David Woodhouse --- include/linux/mtd/physmap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h index d40bfa1d9c9..e5f21d293c7 100644 --- a/include/linux/mtd/physmap.h +++ b/include/linux/mtd/physmap.h @@ -19,6 +19,7 @@ #include struct map_info; +struct platform_device; struct physmap_flash_data { unsigned int width; -- cgit v1.2.3-70-g09d2 From 6dd9a7c73761a8a5f5475d5cfdc15368a0f4c06d Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Wed, 25 May 2011 19:13:49 +0100 Subject: intel-iommu: Enable super page (2MiB, 1GiB, etc.) support There are no externally-visible changes with this. In the loop in the internal __domain_mapping() function, we simply detect if we are mapping: - size >= 2MiB, and - virtual address aligned to 2MiB, and - physical address aligned to 2MiB, and - on hardware that supports superpages. (and likewise for larger superpages). We automatically use a superpage for such mappings. We never have to worry about *breaking* superpages, since we trust that we will always *unmap* the same range that was mapped. So all we need to do is ensure that dma_pte_clear_range() will also cope with superpages. Adjust pfn_to_dma_pte() to take a superpage 'level' as an argument, so it can return a PTE at the appropriate level rather than always extending the page tables all the way down to level 1. Again, this is simplified by the fact that we should never encounter existing small pages when we're creating a mapping; any old mapping that used the same virtual range will have been entirely removed and its obsolete page tables freed. Provide an 'intel_iommu=sp_off' argument on the command line as a chicken bit. Not that it should ever be required. == The original commit seen in the iommu-2.6.git was Youquan's implementation (and completion) of my own half-baked code which I'd typed into an email. Followed by half a dozen subsequent 'fixes'. I've taken the unusual step of rewriting history and collapsing the original commits in order to keep the main history simpler, and make life easier for the people who are going to have to backport this to older kernels. And also so I can give it a more coherent commit comment which (hopefully) gives a better explanation of what's going on. The original sequence of commits leading to identical code was: Youquan Song (3): intel-iommu: super page support intel-iommu: Fix superpage alignment calculation error intel-iommu: Fix superpage level calculation error in dma_pfn_level_pte() David Woodhouse (4): intel-iommu: Precalculate superpage support for dmar_domain intel-iommu: Fix hardware_largepage_caps() intel-iommu: Fix inappropriate use of superpages in __domain_mapping() intel-iommu: Fix phys_pfn in __domain_mapping for sglist pages Signed-off-by: Youquan Song Signed-off-by: David Woodhouse --- Documentation/kernel-parameters.txt | 5 +- drivers/pci/intel-iommu.c | 157 +++++++++++++++++++++++++++++++----- include/linux/dma_remapping.h | 4 + 3 files changed, 147 insertions(+), 19 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index cc85a927819..d005487c1a2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. With this option on every unmap_single operation will result in a hardware IOTLB flush operation as opposed to batching them for performance. - + sp_off [Default Off] + By default, super page will be supported if Intel IOMMU + has the capability. With this option, super page will + not be supported. intremap= [X86-64, Intel-IOMMU] Format: { on (default) | off | nosid } on enable Interrupt Remapping (default) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 395f253c049..e6fe1994f9d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -115,6 +115,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level) return (pfn + level_size(level) - 1) & level_mask(level); } +static inline unsigned long lvl_to_nr_pages(unsigned int lvl) +{ + return 1 << ((lvl - 1) * LEVEL_STRIDE); +} + /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to work. */ static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) @@ -343,6 +348,9 @@ struct dmar_domain { int iommu_coherency;/* indicate coherency of iommu access */ int iommu_snooping; /* indicate snooping control feature*/ int iommu_count; /* reference count of iommu */ + int iommu_superpage;/* Level of superpages supported: + 0 == 4KiB (no superpages), 1 == 2MiB, + 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ spinlock_t iommu_lock; /* protect iommu set in domain */ u64 max_addr; /* maximum mapped address */ }; @@ -392,6 +400,7 @@ int dmar_disabled = 1; static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; +static int intel_iommu_superpage = 1; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -422,6 +431,10 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable batched IOTLB flush\n"); intel_iommu_strict = 1; + } else if (!strncmp(str, "sp_off", 6)) { + printk(KERN_INFO + "Intel-IOMMU: disable supported super page\n"); + intel_iommu_superpage = 0; } str += strcspn(str, ","); @@ -560,11 +573,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain) } } +static void domain_update_iommu_superpage(struct dmar_domain *domain) +{ + int i, mask = 0xf; + + if (!intel_iommu_superpage) { + domain->iommu_superpage = 0; + return; + } + + domain->iommu_superpage = 4; /* 1TiB */ + + for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { + mask |= cap_super_page_val(g_iommus[i]->cap); + if (!mask) { + break; + } + } + domain->iommu_superpage = fls(mask); +} + /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); domain_update_iommu_snooping(domain); + domain_update_iommu_superpage(domain); } static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) @@ -694,23 +728,31 @@ out: } static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn) + unsigned long pfn, int large_level) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); - int offset; + int offset, target_level; BUG_ON(!domain->pgd); BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); parent = domain->pgd; + /* Search pte */ + if (!large_level) + target_level = 1; + else + target_level = large_level; + while (level > 0) { void *tmp_page; offset = pfn_level_offset(pfn, level); pte = &parent[offset]; - if (level == 1) + if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE)) + break; + if (level == target_level) break; if (!dma_pte_present(pte)) { @@ -738,10 +780,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, return pte; } + /* return address's pte at specific level */ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, unsigned long pfn, - int level) + int level, int *large_page) { struct dma_pte *parent, *pte = NULL; int total = agaw_to_level(domain->agaw); @@ -754,8 +797,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, if (level == total) return pte; - if (!dma_pte_present(pte)) + if (!dma_pte_present(pte)) { + *large_page = total; break; + } + + if (pte->val & DMA_PTE_LARGE_PAGE) { + *large_page = total; + return pte; + } + parent = phys_to_virt(dma_pte_addr(pte)); total--; } @@ -768,6 +819,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + unsigned int large_page = 1; struct dma_pte *first_pte, *pte; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); @@ -776,14 +828,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain, /* we don't need lock here; nobody else touches the iova range */ do { - first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); + large_page = 1; + first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); if (!pte) { - start_pfn = align_to_level(start_pfn + 1, 2); + start_pfn = align_to_level(start_pfn + 1, large_page + 1); continue; } - do { + do { dma_clear_pte(pte); - start_pfn++; + start_pfn += lvl_to_nr_pages(large_page); pte++; } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); @@ -803,6 +856,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, int total = agaw_to_level(domain->agaw); int level; unsigned long tmp; + int large_page = 2; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); @@ -818,7 +872,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, return; do { - first_pte = pte = dma_pfn_level_pte(domain, tmp, level); + large_page = level; + first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); + if (large_page > level) + level = large_page + 1; if (!pte) { tmp = align_to_level(tmp + 1, level + 1); continue; @@ -1402,6 +1459,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) else domain->iommu_snooping = 0; + domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); domain->iommu_count = 1; domain->nid = iommu->node; @@ -1657,6 +1715,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr, return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; } +/* Return largest possible superpage level for a given mapping */ +static inline int hardware_largepage_caps(struct dmar_domain *domain, + unsigned long iov_pfn, + unsigned long phy_pfn, + unsigned long pages) +{ + int support, level = 1; + unsigned long pfnmerge; + + support = domain->iommu_superpage; + + /* To use a large page, the virtual *and* physical addresses + must be aligned to 2MiB/1GiB/etc. Lower bits set in either + of them will mean we have to use smaller pages. So just + merge them and check both at once. */ + pfnmerge = iov_pfn | phy_pfn; + + while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { + pages >>= VTD_STRIDE_SHIFT; + if (!pages) + break; + pfnmerge >>= VTD_STRIDE_SHIFT; + level++; + support--; + } + return level; +} + static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct scatterlist *sg, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -1665,6 +1751,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, phys_addr_t uninitialized_var(pteval); int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; unsigned long sg_res; + unsigned int largepage_lvl = 0; + unsigned long lvl_pages = 0; BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); @@ -1680,7 +1768,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; } - while (nr_pages--) { + while (nr_pages > 0) { uint64_t tmp; if (!sg_res) { @@ -1688,11 +1776,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; sg->dma_length = sg->length; pteval = page_to_phys(sg_page(sg)) | prot; + phys_pfn = pteval >> VTD_PAGE_SHIFT; } + if (!pte) { - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); + largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); + + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl); if (!pte) return -ENOMEM; + /* It is large page*/ + if (largepage_lvl > 1) + pteval |= DMA_PTE_LARGE_PAGE; + else + pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; + } /* We don't need lock here, nobody else * touches the iova range @@ -1708,16 +1806,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, } WARN_ON(1); } + + lvl_pages = lvl_to_nr_pages(largepage_lvl); + + BUG_ON(nr_pages < lvl_pages); + BUG_ON(sg_res < lvl_pages); + + nr_pages -= lvl_pages; + iov_pfn += lvl_pages; + phys_pfn += lvl_pages; + pteval += lvl_pages * VTD_PAGE_SIZE; + sg_res -= lvl_pages; + + /* If the next PTE would be the first in a new page, then we + need to flush the cache on the entries we've just written. + And then we'll need to recalculate 'pte', so clear it and + let it get set again in the if (!pte) block above. + + If we're done (!nr_pages) we need to flush the cache too. + + Also if we've been setting superpages, we may need to + recalculate 'pte' and switch back to smaller pages for the + end of the mapping, if the trailing size is not enough to + use another superpage (i.e. sg_res < lvl_pages). */ pte++; - if (!nr_pages || first_pte_in_page(pte)) { + if (!nr_pages || first_pte_in_page(pte) || + (largepage_lvl > 1 && sg_res < lvl_pages)) { domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); pte = NULL; } - iov_pfn++; - pteval += VTD_PAGE_SIZE; - sg_res--; - if (!sg_res) + + if (!sg_res && nr_pages) sg = sg_next(sg); } return 0; @@ -3527,6 +3647,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_count = 0; domain->iommu_coherency = 0; domain->iommu_snooping = 0; + domain->iommu_superpage = 0; domain->max_addr = 0; domain->nid = -1; @@ -3742,7 +3863,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, struct dma_pte *pte; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0); if (pte) phys = dma_pte_addr(pte); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 5619f852273..bbd8661b347 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -9,8 +9,12 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) +#define VTD_STRIDE_SHIFT (9) +#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) + #define DMA_PTE_READ (1) #define DMA_PTE_WRITE (2) +#define DMA_PTE_LARGE_PAGE (1 << 7) #define DMA_PTE_SNP (1 << 11) #define CONTEXT_TT_MULTI_LEVEL 0 -- cgit v1.2.3-70-g09d2 From 9b4554b21ed07e8556405510638171f0c787742a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 24 May 2011 12:19:04 -0400 Subject: intel-iommu: Only unlink device domains from iommu Commit a97590e5 added unlinking domains from iommus to reciprocate the iommu from domains unlinking that was already done. We actually want to only do this for device domains and never for the static identity map domain or VM domains. The SI domain is special and never freed, while VM domain->id lives in their own special address space, separate from iommu->domain_ids. In the current code, a VM can get domain->id zero, then mark that domain unused when unbound from pci-stub. This leads to DMAR write faults when the device is re-bound to the host driver. Signed-off-by: Alex Williamson Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e6fe1994f9d..4eaec2fa136 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3561,10 +3561,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, domain_update_iommu_cap(domain); spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); - spin_lock_irqsave(&iommu->lock, tmp_flags); - clear_bit(domain->id, iommu->domain_ids); - iommu->domains[domain->id] = NULL; - spin_unlock_irqrestore(&iommu->lock, tmp_flags); + if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && + !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) { + spin_lock_irqsave(&iommu->lock, tmp_flags); + clear_bit(domain->id, iommu->domain_ids); + iommu->domains[domain->id] = NULL; + spin_unlock_irqrestore(&iommu->lock, tmp_flags); + } } spin_unlock_irqrestore(&device_domain_lock, flags); -- cgit v1.2.3-70-g09d2 From 8fcc5372fbac085199d84a880503ed67aba3fe49 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Sat, 28 May 2011 13:15:02 -0500 Subject: intel-iommu: Check for identity mapping candidate using system dma mask The identity mapping code appears to make the assumption that if the devices dma_mask is greater than 32bits the device can use identity mapping. But that is not true: take the case where we have a 40bit device in a 44bit architecture. The device can potentially receive a physical address that it will truncate and cause incorrect addresses to be used. Instead check to see if the device's dma_mask is large enough to address the system's dma_mask. Signed-off-by: Mike Travis Reviewed-by: Mike Habeck Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4eaec2fa136..dcf051d53b7 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2316,8 +2316,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) * Assume that they will -- if they turn out not to be, then we can * take them out of the 1:1 domain later. */ - if (!startup) - return pdev->dma_mask > DMA_BIT_MASK(32); + if (!startup) { + /* + * If the device's dma_mask is less than the system's memory + * size then this is not a candidate for identity mapping. + */ + u64 dma_mask = pdev->dma_mask; + + if (pdev->dev.coherent_dma_mask && + pdev->dev.coherent_dma_mask < dma_mask) + dma_mask = pdev->dev.coherent_dma_mask; + + return dma_mask >= dma_get_required_mask(&pdev->dev); + } return 1; } -- cgit v1.2.3-70-g09d2 From cb452a4040bb051d92e85d6e7eb60c11734c1781 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 28 May 2011 13:15:03 -0500 Subject: intel-iommu: Speed up processing of the identity_mapping function When there are a large count of PCI devices, and the pass through option for iommu is set, much time is spent in the identity_mapping function hunting though the iommu domains to check if a specific device is "identity mapped". Speed up the function by checking the cached info to see if it's mapped to the static identity domain. Signed-off-by: Mike Travis Reviewed-by: Mike Habeck Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index dcf051d53b7..98be0b55ac0 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2235,10 +2235,10 @@ static int identity_mapping(struct pci_dev *pdev) if (likely(!iommu_identity_mapping)) return 0; + info = pdev->dev.archdata.iommu; + if (info && info != DUMMY_DEVICE_DOMAIN_INFO) + return (info->domain == si_domain); - list_for_each_entry(info, &si_domain->devices, link) - if (info->dev == pdev) - return 1; return 0; } -- cgit v1.2.3-70-g09d2 From 1c9fc3d11b84fbd0c4f4aa7855702c2a1f098ebb Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Sat, 28 May 2011 13:15:04 -0500 Subject: intel-iommu: Dont cache iova above 32bit Mike Travis and Mike Habeck reported an issue where iova allocation would return a range that was larger than a device's dma mask. https://lkml.org/lkml/2011/3/29/423 The dmar initialization code will reserve all PCI MMIO regions and copy those reservations into a domain specific iova tree. It is possible for one of those regions to be above the dma mask of a device. It is typical to allocate iovas with a 32bit mask (despite device's dma mask possibly being larger) and cache the result until it exhausts the lower 32bit address space. Freeing the iova range that is >= the last iova in the lower 32bit range when there is still an iova above the 32bit range will corrupt the cached iova by pointing it to a region that is above 32bit. If that region is also larger than the device's dma mask, a subsequent allocation will return an unusable iova and cause dma failure. Simply don't cache an iova that is above the 32bit caching boundary. Reported-by: Mike Travis Reported-by: Mike Habeck Cc: stable@kernel.org Acked-by: Mike Travis Tested-by: Mike Habeck Signed-off-by: Chris Wright Signed-off-by: David Woodhouse --- drivers/pci/iova.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 9606e599a47..c5c274ab5c5 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) curr = iovad->cached32_node; cached_iova = container_of(curr, struct iova, node); - if (free->pfn_lo >= cached_iova->pfn_lo) - iovad->cached32_node = rb_next(&free->node); + if (free->pfn_lo >= cached_iova->pfn_lo) { + struct rb_node *node = rb_next(&free->node); + struct iova *iova = container_of(node, struct iova, node); + + /* only cache if it's below 32bit pfn */ + if (node && iova->pfn_lo < iovad->dma_32bit_pfn) + iovad->cached32_node = node; + else + iovad->cached32_node = NULL; + } } /* Computes the padding size required, to make the -- cgit v1.2.3-70-g09d2 From c681d0ba1252954208220ad32248a3e8e2fc98e4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 28 May 2011 13:15:05 -0500 Subject: intel-iommu: Use coherent DMA mask when requested The __intel_map_single function is not honoring the passed in DMA mask. This results in not using the coherent DMA mask when called from intel_alloc_coherent(). Signed-off-by: Mike Travis Acked-by: Chris Wright Reviewed-by: Mike Habeck Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 98be0b55ac0..5cbab7f19ae 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2732,8 +2732,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, iommu = domain_get_iommu(domain); size = aligned_nrpages(paddr, size); - iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), - pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask); if (!iova) goto error; -- cgit v1.2.3-70-g09d2 From 825507d6d059f1cbe2503e0e5a3926225b983aec Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 28 May 2011 13:15:06 -0500 Subject: intel-iommu: Remove Host Bridge devices from identity mapping When using the 1:1 (identity) PCI DMA remapping, PCI Host Bridge devices that do not use the IOMMU causes a kernel panic. Fix that by not inserting those devices into the si_domain. Signed-off-by: Mike Travis Reviewed-by: Mike Habeck Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 5cbab7f19ae..7f757ce60c5 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -46,6 +46,8 @@ #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE +#define IS_BRIDGE_HOST_DEVICE(pdev) \ + ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) @@ -2343,6 +2345,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw) return -EFAULT; for_each_pci_dev(pdev) { + /* Skip Host/PCI Bridge devices */ + if (IS_BRIDGE_HOST_DEVICE(pdev)) + continue; if (iommu_should_identity_map(pdev, 1)) { printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", hw ? "hardware" : "software", pci_name(pdev)); -- cgit v1.2.3-70-g09d2 From 8519dc4401ddf8a5399f979870bbeeadbc111186 Mon Sep 17 00:00:00 2001 From: Mike Habeck Date: Sat, 28 May 2011 13:15:07 -0500 Subject: intel-iommu: Add domain check in domain_remove_one_dev_info The comment in domain_remove_one_dev_info() states "No need to compare PCI domain; it has to be the same". But for the si_domain that isn't going to be true, as it consists of all the PCI devices that are identity mapped thus multiple PCI domains can be in si_domain. The code needs to validate the PCI domain too. Signed-off-by: Mike Habeck Signed-off-by: Mike Travis Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 7f757ce60c5..b0c96d39080 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3537,8 +3537,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_safe(entry, tmp, &domain->devices) { info = list_entry(entry, struct device_domain_info, link); - /* No need to compare PCI domain; it has to be the same */ - if (info->bus == pdev->bus->number && + if (info->segment == pci_domain_nr(pdev->bus) && + info->bus == pdev->bus->number && info->devfn == pdev->devfn) { list_del(&info->link); list_del(&info->global); -- cgit v1.2.3-70-g09d2 From 70e535d1e5d1e4317e894d6228b762cf9c3fbc6a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 31 May 2011 00:22:52 +0100 Subject: intel-iommu: Fix off-by-one in RMRR setup We were mapping an extra byte (and hence usually an extra page): iommu_prepare_identity_map() expects to be given an 'end' argument which is the last byte to be mapped; not the first byte *not* to be mapped. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index b0c96d39080..a8867bd745e 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2147,7 +2147,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) return 0; return iommu_prepare_identity_map(pdev, rmrr->base_address, - rmrr->end_address + 1); + rmrr->end_address); } #ifdef CONFIG_DMAR_FLOPPY_WA @@ -2161,7 +2161,7 @@ static inline void iommu_prepare_isa(void) return; printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); - ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); + ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1); if (ret) printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " -- cgit v1.2.3-70-g09d2 From 6464920a6e30604cb71d0ecbaa20e35009bd76fb Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 25 May 2011 12:24:25 +0200 Subject: xen/blkback: don't call vbd_size() if bd_disk is NULL ...because vbd_size() dereferences bd_disk if bd_part is NULL. Signed-off-by: Laszlo Ersek Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 34570823355..6cc0db1bf52 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -357,14 +357,13 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, } vbd->bdev = bdev; - vbd->size = vbd_sz(vbd); - if (vbd->bdev->bd_disk == NULL) { DPRINTK("xen_vbd_create: device %08x doesn't exist.\n", vbd->pdevice); xen_vbd_free(vbd); return -ENOENT; } + vbd->size = vbd_sz(vbd); if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) vbd->type |= VDISK_CDROM; -- cgit v1.2.3-70-g09d2 From 9b83c771214cf6a256ee875050e6eaf320cf7983 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 May 2011 09:27:16 +0300 Subject: xen/blkback: potential null dereference in error handling blkbk->pending_pages can be NULL here so I added a check for it. Signed-off-by: Dan Carpenter [v1: Redid the loop a bit] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index c73910cc28c..5cf2993a833 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -809,11 +809,13 @@ static int __init xen_blkif_init(void) failed_init: kfree(blkbk->pending_reqs); kfree(blkbk->pending_grant_handles); - for (i = 0; i < mmap_pages; i++) { - if (blkbk->pending_pages[i]) - __free_page(blkbk->pending_pages[i]); + if (blkbk->pending_pages) { + for (i = 0; i < mmap_pages; i++) { + if (blkbk->pending_pages[i]) + __free_page(blkbk->pending_pages[i]); + } + kfree(blkbk->pending_pages); } - kfree(blkbk->pending_pages); kfree(blkbk); blkbk = NULL; return rc; -- cgit v1.2.3-70-g09d2 From 614198bbf8d74617381aea82521b261c7f9baaf6 Mon Sep 17 00:00:00 2001 From: Per Dalen Date: Tue, 31 May 2011 06:54:21 -0700 Subject: hwmon: (max6642) Rename temp_fault sysfs attribute to temp2_fault The temp_fault sysfs attribute is wrong, it should be temp2_fault instead. Reported-by: Guenter Roeck Signed-off-by: Per Dalen Acked-by: Jean Delvare Signed-off-by: Guenter Roeck --- drivers/hwmon/max6642.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/max6642.c b/drivers/hwmon/max6642.c index 0f9fc40379c..0f30e1bb366 100644 --- a/drivers/hwmon/max6642.c +++ b/drivers/hwmon/max6642.c @@ -246,7 +246,7 @@ static SENSOR_DEVICE_ATTR_2(temp1_max, S_IWUSR | S_IRUGO, show_temp_max, set_temp_max, 0, MAX6642_REG_W_LOCAL_HIGH); static SENSOR_DEVICE_ATTR_2(temp2_max, S_IWUSR | S_IRUGO, show_temp_max, set_temp_max, 1, MAX6642_REG_W_REMOTE_HIGH); -static SENSOR_DEVICE_ATTR(temp_fault, S_IRUGO, show_alarm, NULL, 2); +static SENSOR_DEVICE_ATTR(temp2_fault, S_IRUGO, show_alarm, NULL, 2); static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, show_alarm, NULL, 6); static SENSOR_DEVICE_ATTR(temp2_max_alarm, S_IRUGO, show_alarm, NULL, 4); @@ -256,7 +256,7 @@ static struct attribute *max6642_attributes[] = { &sensor_dev_attr_temp1_max.dev_attr.attr, &sensor_dev_attr_temp2_max.dev_attr.attr, - &sensor_dev_attr_temp_fault.dev_attr.attr, + &sensor_dev_attr_temp2_fault.dev_attr.attr, &sensor_dev_attr_temp1_max_alarm.dev_attr.attr, &sensor_dev_attr_temp2_max_alarm.dev_attr.attr, NULL -- cgit v1.2.3-70-g09d2 From 4c6e0f8101e62d8b2d01dc94b835a98b191a1454 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 31 May 2011 15:50:51 -0400 Subject: hwmon: (coretemp) Relax target temperature range check The current temperature range check of MSR_IA32_TEMPERATURE_TARGET seems too strict to me, some TjMax values documented in Documentation/hwmon/coretemp wouldn't pass. Relax the check so that all the documented values pass. Signed-off-by: Jean Delvare Cc: Carsten Emde Cc: Fenghua Yu Signed-off-by: Guenter Roeck --- drivers/hwmon/coretemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index de3d2465fe2..0eeff46c0ea 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -296,7 +296,7 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) * If the TjMax is not plausible, an assumption * will be used */ - if (val > 80 && val < 120) { + if (val >= 70 && val <= 125) { dev_info(dev, "TjMax is %d C.\n", val); return val * 1000; } -- cgit v1.2.3-70-g09d2 From 333ba7325213f0a09dfa5ceeddb056d6ad74b3b5 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Sun, 29 May 2011 15:53:20 +0300 Subject: cfg80211: don't drop p2p probe responses Commit 0a35d36 ("cfg80211: Use capability info to detect mesh beacons") assumed that probe response with both ESS and IBSS bits cleared means that the frame was sent by a mesh sta. However, these capabilities are also being used in the p2p_find phase, and the mesh-validation broke it. Rename the WLAN_CAPABILITY_IS_MBSS macro, and verify that mesh ies exist before assuming this frame was sent by a mesh sta. Signed-off-by: Eliad Peller Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 8 ++++++-- net/wireless/scan.c | 43 ++++++++++++++++++++++++------------------- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b2eee587988..bf56b6f7827 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1003,8 +1003,12 @@ struct ieee80211_ht_info { #define WLAN_CAPABILITY_ESS (1<<0) #define WLAN_CAPABILITY_IBSS (1<<1) -/* A mesh STA sets the ESS and IBSS capability bits to zero */ -#define WLAN_CAPABILITY_IS_MBSS(cap) \ +/* + * A mesh STA sets the ESS and IBSS capability bits to zero. + * however, this holds true for p2p probe responses (in the p2p_find + * phase) as well. + */ +#define WLAN_CAPABILITY_IS_STA_BSS(cap) \ (!((cap) & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS))) #define WLAN_CAPABILITY_CF_POLLABLE (1<<2) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 73a441d237b..7a6c67667d7 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -267,13 +267,35 @@ static bool is_bss(struct cfg80211_bss *a, return memcmp(ssidie + 2, ssid, ssid_len) == 0; } +static bool is_mesh_bss(struct cfg80211_bss *a) +{ + const u8 *ie; + + if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability)) + return false; + + ie = cfg80211_find_ie(WLAN_EID_MESH_ID, + a->information_elements, + a->len_information_elements); + if (!ie) + return false; + + ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, + a->information_elements, + a->len_information_elements); + if (!ie) + return false; + + return true; +} + static bool is_mesh(struct cfg80211_bss *a, const u8 *meshid, size_t meshidlen, const u8 *meshcfg) { const u8 *ie; - if (!WLAN_CAPABILITY_IS_MBSS(a->capability)) + if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability)) return false; ie = cfg80211_find_ie(WLAN_EID_MESH_ID, @@ -311,7 +333,7 @@ static int cmp_bss(struct cfg80211_bss *a, if (a->channel != b->channel) return b->channel->center_freq - a->channel->center_freq; - if (WLAN_CAPABILITY_IS_MBSS(a->capability | b->capability)) { + if (is_mesh_bss(a) && is_mesh_bss(b)) { r = cmp_ies(WLAN_EID_MESH_ID, a->information_elements, a->len_information_elements, @@ -457,7 +479,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, struct cfg80211_internal_bss *res) { struct cfg80211_internal_bss *found = NULL; - const u8 *meshid, *meshcfg; /* * The reference to "res" is donated to this function. @@ -470,22 +491,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, res->ts = jiffies; - if (WLAN_CAPABILITY_IS_MBSS(res->pub.capability)) { - /* must be mesh, verify */ - meshid = cfg80211_find_ie(WLAN_EID_MESH_ID, - res->pub.information_elements, - res->pub.len_information_elements); - meshcfg = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, - res->pub.information_elements, - res->pub.len_information_elements); - if (!meshid || !meshcfg || - meshcfg[1] != sizeof(struct ieee80211_meshconf_ie)) { - /* bogus mesh */ - kref_put(&res->ref, bss_release); - return NULL; - } - } - spin_lock_bh(&dev->bss_lock); found = rb_find_bss(dev, res); -- cgit v1.2.3-70-g09d2 From 21fdc87248d1d28492c775e05fa92b3c8c7bc8db Mon Sep 17 00:00:00 2001 From: Daniel Halperin Date: Tue, 31 May 2011 11:59:30 -0700 Subject: ath9k: fix two more bugs in tx power This is the same fix as commit 841051602e3fa18ea468fe5a177aa92b6eb44b56 Author: Matteo Croce Date: Fri Dec 3 02:25:08 2010 +0100 The ath9k driver subtracts 3 dBm to the txpower as with two radios the signal power is doubled. The resulting value is assigned in an u16 which overflows and makes the card work at full power. in two more places. I grepped the ath tree and didn't find any others. Cc: stable@kernel.org Signed-off-by: Daniel Halperin Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 10 ++++++++-- drivers/net/wireless/ath/ath9k/eeprom_9287.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index 0ca7635d066..ff8150e46f0 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -4645,10 +4645,16 @@ static void ar9003_hw_set_power_per_rate_table(struct ath_hw *ah, case 1: break; case 2: - scaledPower -= REDUCE_SCALED_POWER_BY_TWO_CHAIN; + if (scaledPower > REDUCE_SCALED_POWER_BY_TWO_CHAIN) + scaledPower -= REDUCE_SCALED_POWER_BY_TWO_CHAIN; + else + scaledPower = 0; break; case 3: - scaledPower -= REDUCE_SCALED_POWER_BY_THREE_CHAIN; + if (scaledPower > REDUCE_SCALED_POWER_BY_THREE_CHAIN) + scaledPower -= REDUCE_SCALED_POWER_BY_THREE_CHAIN; + else + scaledPower = 0; break; } diff --git a/drivers/net/wireless/ath/ath9k/eeprom_9287.c b/drivers/net/wireless/ath/ath9k/eeprom_9287.c index 7856f0d4512..343fc9f946d 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom_9287.c +++ b/drivers/net/wireless/ath/ath9k/eeprom_9287.c @@ -524,10 +524,16 @@ static void ath9k_hw_set_ar9287_power_per_rate_table(struct ath_hw *ah, case 1: break; case 2: - scaledPower -= REDUCE_SCALED_POWER_BY_TWO_CHAIN; + if (scaledPower > REDUCE_SCALED_POWER_BY_TWO_CHAIN) + scaledPower -= REDUCE_SCALED_POWER_BY_TWO_CHAIN; + else + scaledPower = 0; break; case 3: - scaledPower -= REDUCE_SCALED_POWER_BY_THREE_CHAIN; + if (scaledPower > REDUCE_SCALED_POWER_BY_THREE_CHAIN) + scaledPower -= REDUCE_SCALED_POWER_BY_THREE_CHAIN; + else + scaledPower = 0; break; } scaledPower = max((u16)0, scaledPower); -- cgit v1.2.3-70-g09d2 From a7567b2059020bf3fa96c389ec25eed8e28ad4ba Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Jun 2011 08:29:54 +0200 Subject: bluetooth l2cap: fix locking in l2cap_global_chan_by_psm read_lock() ... read_unlock_bh() is clearly bogus. This was broken by commit 23691d75cdc69c3b285211b4d77746aa20a17d18 Author: Gustavo F. Padovan Date: Wed Apr 27 18:26:32 2011 -0300 Bluetooth: Remove l2cap_sk_list Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a86f9ba4f05..e64a1c2df23 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -906,7 +906,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr if (c->psm == psm) { /* Exact match. */ if (!bacmp(&bt_sk(sk)->src, src)) { - read_unlock_bh(&chan_list_lock); + read_unlock(&chan_list_lock); return c; } -- cgit v1.2.3-70-g09d2 From dfe21582ac5ebc460dda98c67e8589dd506d02cd Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 1 Jun 2011 17:17:57 +0200 Subject: iwl4965: correctly validate temperature value In some cases we can read wrong temperature value. If after that temperature value will not be updated to good one, we badly configure tx power parameters and device is unable to send a data. Resolves: https://bugzilla.kernel.org/show_bug.cgi?id=35932 Cc: stable@kernel.org # 2.6.39+ Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/iwlegacy/iwl-4965.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlegacy/iwl-4965.c b/drivers/net/wireless/iwlegacy/iwl-4965.c index f5433c74b84..f9db25bb35c 100644 --- a/drivers/net/wireless/iwlegacy/iwl-4965.c +++ b/drivers/net/wireless/iwlegacy/iwl-4965.c @@ -1543,7 +1543,7 @@ static void iwl4965_temperature_calib(struct iwl_priv *priv) s32 temp; temp = iwl4965_hw_get_temperature(priv); - if (temp < 0) + if (IWL_TX_POWER_TEMPERATURE_OUT_OF_RANGE(temp)) return; if (priv->temperature != temp) { -- cgit v1.2.3-70-g09d2 From 3cc39b3f061e90f69cb1f65d72c005c56cddd6a6 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 1 Jun 2011 16:06:04 -0400 Subject: tile: enable CONFIG_BUGVERBOSE Trivial config change to enable backtraces on panic. Signed-off-by: Chris Metcalf --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 28afa4c5333..dd373c8ee94 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -697,7 +697,7 @@ config DEBUG_BUGVERBOSE bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EXPERT depends on BUG depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \ - FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 + FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 || TILE default y help Say Y here to make BUG() panics output the file name and line number -- cgit v1.2.3-70-g09d2 From 0f48f2600911d5de6393829e4a9986d4075558b3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 2 Jun 2011 05:29:19 +0900 Subject: block: fix mismerge of the DISK_EVENT_MEDIA_CHANGE removal Jens' back-merge commit 698567f3fa79 ("Merge commit 'v2.6.39' into for-2.6.40/core") was incorrectly done, and re-introduced the DISK_EVENT_MEDIA_CHANGE lines that had been removed earlier in commits - 9fd097b14918 ("block: unexport DISK_EVENT_MEDIA_CHANGE for legacy/fringe drivers") - 7eec77a1816a ("ide: unexport DISK_EVENT_MEDIA_CHANGE for ide-gd and ide-cd") because of conflicts with the "g->flags" updates near-by by commit d4dc210f69bc ("block: don't block events on excl write for non-optical devices") As a result, we re-introduced the hanging behavior due to infinite disk media change reports. Tssk, tssk, people! Don't do back-merges at all, and *definitely* don't do them to hide merge conflicts from me - especially as I'm likely better at merging them than you are, since I do so many merges. Reported-by: Steven Rostedt Cc: Jens Axboe Signed-off-by: Linus Torvalds --- drivers/block/paride/pcd.c | 1 - drivers/cdrom/viocd.c | 1 - drivers/ide/ide-cd.c | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index a0aabd904a5..46b8136c31b 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -321,7 +321,6 @@ static void pcd_init_units(void) strcpy(disk->disk_name, cd->name); /* umm... */ disk->fops = &pcd_bdops; disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - disk->events = DISK_EVENT_MEDIA_CHANGE; } } diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index ae15a4ddaa9..7878da89d29 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -627,7 +627,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id) gendisk->fops = &viocd_fops; gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - gendisk->events = DISK_EVENT_MEDIA_CHANGE; set_capacity(gendisk, 0); gendisk->private_data = d; d->viocd_disk = gendisk; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 6e5123b1d34..144d27261e4 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1782,7 +1782,6 @@ static int ide_cd_probe(ide_drive_t *drive) ide_cd_read_toc(drive, &sense); g->fops = &idecd_ops; g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - g->events = DISK_EVENT_MEDIA_CHANGE; add_disk(g); return 0; -- cgit v1.2.3-70-g09d2 From 1fa7b6a29c61358cc2ca6f64cef4aa0e1a7ca74c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 2 Jun 2011 06:11:24 +0900 Subject: Revert "mm: fail GFP_DMA allocations when ZONE_DMA is not configured" This reverts commit a197b59ae6e8bee56fcef37ea2482dc08414e2ac. As rmk says: "Commit a197b59ae6e8 (mm: fail GFP_DMA allocations when ZONE_DMA is not configured) is causing regressions on ARM with various drivers which use GFP_DMA. The behaviour up until now has been to silently ignore that flag when CONFIG_ZONE_DMA is not enabled, and to allocate from the normal zone. However, as a result of the above commit, such allocations now fail which causes drivers to fail. These are regressions compared to the previous kernel version." so just revert it. Requested-by: Russell King Acked-by: Andrew Morton Cc: David Rientjes Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a4e1db3f198..4e8985acdab 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2247,10 +2247,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return NULL; -#ifndef CONFIG_ZONE_DMA - if (WARN_ON_ONCE(gfp_mask & __GFP_DMA)) - return NULL; -#endif /* * Check the zones suitable for the gfp_mask contain at least one -- cgit v1.2.3-70-g09d2 From 4f5f71a7abe329bdad81ee6a8e4545054a7cc30a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 31 May 2011 06:54:21 -0700 Subject: hwmon: (coretemp) Fix TjMax detection for older CPUs Commit a321cedb12904114e2ba5041a3673ca24deb09c9 excludes CPU models 0xe, 0xf, 0x16, and 0x1a from TjMax temperature adjustment, even though several of those CPUs are known to have TiMax other than 100 degrees C, and even though the code in adjust_tjmax() explicitly handles those CPUs and points to a Web document listing several of the affected CPU IDs. Reinstate original TjMax adjustment if TjMax can not be determined using the IA32_TEMPERATURE_TARGET register. https://bugzilla.kernel.org/show_bug.cgi?id=32582 Signed-off-by: Guenter Roeck Cc: Huaxu Wan Cc: Carsten Emde Cc: Valdis Kletnieks Cc: Henrique de Moraes Holschuh Cc: Yong Wang Cc: Rudolf Marek Cc: Fenghua Yu Tested-by: Jean Delvare Acked-by: Jean Delvare Acked-by: Fenghua Yu Cc: # .35.x .36.x .37.x .38.x .39.x --- drivers/hwmon/coretemp.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 0eeff46c0ea..1680977cfba 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -304,24 +304,9 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) /* * An assumption is made for early CPUs and unreadable MSR. - * NOTE: the given value may not be correct. + * NOTE: the calculated value may not be correct. */ - - switch (c->x86_model) { - case 0xe: - case 0xf: - case 0x16: - case 0x1a: - dev_warn(dev, "TjMax is assumed as 100 C!\n"); - return 100000; - case 0x17: - case 0x1c: /* Atom CPUs */ - return adjust_tjmax(c, id, dev); - default: - dev_warn(dev, "CPU (model=0x%x) is not supported yet," - " using default TjMax of 100C.\n", c->x86_model); - return 100000; - } + return adjust_tjmax(c, id, dev); } static void __devinit get_ucode_rev_on_cpu(void *edx) -- cgit v1.2.3-70-g09d2 From bb9973e4e73f43bd86698483d0c3f7a362ff94ce Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 1 Jun 2011 11:03:41 -0700 Subject: hwmon: (coretemp) Further relax temperature range checks Further relax temperature range checks after reading the IA32_TEMPERATURE_TARGET register. If the register returns a value other than 0 in bits 16..32, assume that the returned value is correct. This change applies to both packet and core temperature limits. Cc: Carsten Emde Cc: Fenghua Yu Cc: Jean Delvare Signed-off-by: Guenter Roeck Acked-by: Fenghua Yu --- drivers/hwmon/coretemp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 1680977cfba..85e937984ff 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -296,7 +296,7 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) * If the TjMax is not plausible, an assumption * will be used */ - if (val >= 70 && val <= 125) { + if (val) { dev_info(dev, "TjMax is %d C.\n", val); return val * 1000; } @@ -326,7 +326,7 @@ static int get_pkg_tjmax(unsigned int cpu, struct device *dev) err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx); if (!err) { val = (eax >> 16) & 0xff; - if (val > 80 && val < 120) + if (val) return val * 1000; } dev_warn(dev, "Unable to read Pkg-TjMax from CPU:%u\n", cpu); -- cgit v1.2.3-70-g09d2 From d0733d2e29b652b2e7b1438ececa732e4eed98eb Mon Sep 17 00:00:00 2001 From: Marcus Meissner Date: Wed, 1 Jun 2011 21:05:22 -0700 Subject: net/ipv4: Check for mistakenly passed in non-IPv4 address Check against mistakenly passing in IPv6 addresses (which would result in an INADDR_ANY bind) or similar incompatible sockaddrs. Signed-off-by: Marcus Meissner Cc: Reinhard Max Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cc1463156cd..9c1926027a2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -465,6 +465,9 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; + if (addr->sin_family != AF_INET) + goto out; + chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too -- cgit v1.2.3-70-g09d2 From 307f73df2b9829ee5a261d1ed432ff683c426cdf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 31 May 2011 22:53:19 +0000 Subject: vlan: fix typo in vlan_dev_hard_start_xmit() commit 4af429d29b341bb1735f04c2fb960178ed5d52e7 (vlan: lockless transmit path) have a typo in vlan_dev_hard_start_xmit(), using u64_stats_update_begin() to end the stat update, it should be u64_stats_update_end(). Signed-off-by: Wei Yongjun Reviewed-by: WANG Cong Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index f247f5bff88..7ea5cf9ea08 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -165,7 +165,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += len; - u64_stats_update_begin(&stats->syncp); + u64_stats_update_end(&stats->syncp); } else { this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped); } -- cgit v1.2.3-70-g09d2 From 85e3c65fa3a1d0542c181510a950a2be7733ff29 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 1 Jun 2011 02:01:41 +0000 Subject: usbnet/cdc_ncm: add missing .reset_resume hook This avoids messages like this after suspend: cdc_ncm 2-1.4:1.6: no reset_resume for driver cdc_ncm? cdc_ncm 2-1.4:1.7: no reset_resume for driver cdc_ncm? cdc_ncm 2-1.4:1.6: usb0: unregister 'cdc_ncm' usb-0000:00:1d.0-1.4, CDC NCM This is important for the Ericsson F5521gw GSM/UMTS modem. Otherwise modemmanager looses the fact that the cdc_ncm and cdc_acm devices belong together. The cdc_ether module does the same. Signed-off-by: Stefan Metzmacher Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index cdd3ae48610..f33ca6aa29e 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -54,7 +54,7 @@ #include #include -#define DRIVER_VERSION "24-May-2011" +#define DRIVER_VERSION "01-June-2011" /* CDC NCM subclass 3.2.1 */ #define USB_CDC_NCM_NDP16_LENGTH_MIN 0x10 @@ -1234,6 +1234,7 @@ static struct usb_driver cdc_ncm_driver = { .disconnect = cdc_ncm_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, + .reset_resume = usbnet_resume, .supports_autosuspend = 1, }; -- cgit v1.2.3-70-g09d2 From 41be5a4a3668810bf3687a76c2b017bd437039e0 Mon Sep 17 00:00:00 2001 From: "sjur.brandeland@stericsson.com" Date: Wed, 1 Jun 2011 00:55:37 +0000 Subject: caif: Fix race when conditionally taking rtnl lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take the RTNL lock unconditionally when calling dev_close. Taking the lock conditionally may cause race conditions. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/chnl_net.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 649ebacaf6b..adbb424403d 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -139,17 +139,14 @@ static void close_work(struct work_struct *work) struct chnl_net *dev = NULL; struct list_head *list_node; struct list_head *_tmp; - /* May be called with or without RTNL lock held */ - int islocked = rtnl_is_locked(); - if (!islocked) - rtnl_lock(); + + rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); if (dev->state == CAIF_SHUTDOWN) dev_close(dev->netdev); } - if (!islocked) - rtnl_unlock(); + rtnl_unlock(); } static DECLARE_WORK(close_worker, close_work); -- cgit v1.2.3-70-g09d2 From a3bcc23e890a6d49d6763d9eb073d711de2e0469 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 1 Jun 2011 06:49:10 +0000 Subject: af-packet: Add flag to distinguish VID 0 from no-vlan. Currently, user-space cannot determine if a 0 tcp_vlan_tci means there is no VLAN tag or the VLAN ID was zero. Add flag to make this explicit. User-space can check for TP_STATUS_VLAN_VALID || tp_vlan_tci > 0, which will be backwards compatible. Older could would have just checked for tp_vlan_tci, so it will work no worse than before. Signed-off-by: Ben Greear Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/if_packet.h | 1 + net/packet/af_packet.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 72bfa5a034d..6d66ce1791a 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -70,6 +70,7 @@ struct tpacket_auxdata { #define TP_STATUS_COPY 0x2 #define TP_STATUS_LOSING 0x4 #define TP_STATUS_CSUMNOTREADY 0x8 +#define TP_STATUS_VLAN_VALID 0x10 /* auxdata has valid tp_vlan_tci */ /* Tx ring - header status */ #define TP_STATUS_AVAILABLE 0x0 diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 925f715686a..ba248d93399 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -798,7 +798,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, getnstimeofday(&ts); h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; - h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); + if (vlan_tx_tag_present(skb)) { + h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); + status |= TP_STATUS_VLAN_VALID; + } else { + h.h2->tp_vlan_tci = 0; + } hdrlen = sizeof(*h.h2); break; default: @@ -1725,8 +1730,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); - aux.tp_vlan_tci = vlan_tx_tag_get(skb); - + if (vlan_tx_tag_present(skb)) { + aux.tp_vlan_tci = vlan_tx_tag_get(skb); + aux.tp_status |= TP_STATUS_VLAN_VALID; + } else { + aux.tp_vlan_tci = 0; + } put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } -- cgit v1.2.3-70-g09d2 From b722dbf176b67c75fe0f5a6b1b31f5ea8aa6117d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 1 Jun 2011 07:10:10 +0000 Subject: drivers/net/davinci_emac.c: add missing clk_put Go to existing error handling code at the end of the function that calls clk_put. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r exists@ expression e1,e2; statement S; @@ e1 = clk_get@p1(...); ... when != e1 = e2 when != clk_put(e1) when any if (...) { ... when != clk_put(e1) when != if (...) { ... clk_put(e1) ... } * return@p3 ...; } else S // Signed-off-by: Julia Lawall Acked-by: Kevin Hilman Signed-off-by: David S. Miller --- drivers/net/davinci_emac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c index 29a4f06fbfc..dcc4a170b0f 100644 --- a/drivers/net/davinci_emac.c +++ b/drivers/net/davinci_emac.c @@ -1781,8 +1781,8 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev) ndev = alloc_etherdev(sizeof(struct emac_priv)); if (!ndev) { dev_err(&pdev->dev, "error allocating net_device\n"); - clk_put(emac_clk); - return -ENOMEM; + rc = -ENOMEM; + goto free_clk; } platform_set_drvdata(pdev, ndev); @@ -1796,7 +1796,8 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev) pdata = pdev->dev.platform_data; if (!pdata) { dev_err(&pdev->dev, "no platform data\n"); - return -ENODEV; + rc = -ENODEV; + goto probe_quit; } /* MAC addr and PHY mask , RMII enable info from platform_data */ @@ -1929,8 +1930,9 @@ no_dma: iounmap(priv->remap_addr); probe_quit: - clk_put(emac_clk); free_netdev(ndev); +free_clk: + clk_put(emac_clk); return rc; } -- cgit v1.2.3-70-g09d2 From 6979d5dd96a4a4975ce240982436e92a3da23315 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 1 Jun 2011 10:18:09 +0000 Subject: net: dm9000: Get the chip in a known good state before enabling interrupts Currently the DM9000 driver requests the primary interrupt before it resets the chip and puts it into a known good state. This means that if the chip is asserting interrupt for some reason we can end up with a screaming IRQ that the interrupt handler is unable to deal with. Avoid this by only requesting the interrupt after we've reset the chip so we know what state it's in. This started manifesting itself on one of my boards in the past month or so, I suspect as a result of some core infrastructure changes removing some form of mitigation against bad behaviour here, even when things boot it seems that the new code brings the interface up more quickly. Signed-off-by: Mark Brown Signed-off-by: David S. Miller --- drivers/net/dm9000.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c index fbaff3584bd..ee597e676ee 100644 --- a/drivers/net/dm9000.c +++ b/drivers/net/dm9000.c @@ -1157,9 +1157,6 @@ dm9000_open(struct net_device *dev) irqflags |= IRQF_SHARED; - if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev)) - return -EAGAIN; - /* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */ iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */ mdelay(1); /* delay needs by DM9000B */ @@ -1168,6 +1165,9 @@ dm9000_open(struct net_device *dev) dm9000_reset(db); dm9000_init_dm9000(dev); + if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev)) + return -EAGAIN; + /* Init driver variable */ db->dbug_cnt = 0; -- cgit v1.2.3-70-g09d2 From a1b2cc50679c1d2eed44e2885f6178ce907498b7 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Tue, 31 May 2011 09:25:16 +0000 Subject: dmaengine: shdma: fix a regression: initialise DMA channels for memcpy A recent patch has introduced a regression, where repeating a memcpy DMA test with shdma module unloading between them skips the DMA channel configuration. Fix this regression by always configuring the channel during its allocation. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/dma/shdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 727e76ff13e..2a638f9f09a 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -343,7 +343,7 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) dmae_set_dmars(sh_chan, cfg->mid_rid); dmae_set_chcr(sh_chan, cfg->chcr); - } else if ((sh_dmae_readl(sh_chan, CHCR) & 0xf00) != 0x400) { + } else { dmae_init(sh_chan); } -- cgit v1.2.3-70-g09d2 From 816af7422f5bdbb74a0c9bb09735a9aeb9522c30 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 1 Jun 2011 07:32:07 +0000 Subject: ARM: mach-shmobile: add DMAC clock definitions on SH7372 These definitions are needed to let the runtime PM subsystem turn off DMAC clocks, when it is suspended by the driver. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/clock-sh7372.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index d17eb66f4ac..c0800d83971 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -509,6 +509,7 @@ enum { MSTP001, MSTP118, MSTP117, MSTP116, MSTP113, MSTP106, MSTP101, MSTP100, MSTP223, + MSTP218, MSTP217, MSTP216, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, MSTP329, MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312, MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403, @@ -534,6 +535,9 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP101] = MSTP(&div4_clks[DIV4_M1], SMSTPCR1, 1, 0), /* VPU */ [MSTP100] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 0, 0), /* LCDC0 */ [MSTP223] = MSTP(&div6_clks[DIV6_SPU], SMSTPCR2, 23, 0), /* SPU2 */ + [MSTP218] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* DMAC1 */ + [MSTP217] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 17, 0), /* DMAC2 */ + [MSTP216] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 16, 0), /* DMAC3 */ [MSTP207] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 7, 0), /* SCIFA5 */ [MSTP206] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 6, 0), /* SCIFB */ [MSTP204] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 4, 0), /* SCIFA0 */ @@ -626,6 +630,9 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[MSTP100]), /* LCDC0 */ CLKDEV_DEV_ID("uio_pdrv_genirq.6", &mstp_clks[MSTP223]), /* SPU2DSP0 */ CLKDEV_DEV_ID("uio_pdrv_genirq.7", &mstp_clks[MSTP223]), /* SPU2DSP1 */ + CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), /* DMAC1 */ + CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[MSTP217]), /* DMAC2 */ + CLKDEV_DEV_ID("sh-dma-engine.2", &mstp_clks[MSTP216]), /* DMAC3 */ CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */ CLKDEV_DEV_ID("sh-sci.6", &mstp_clks[MSTP206]), /* SCIFB */ CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), /* SCIFA0 */ -- cgit v1.2.3-70-g09d2 From 2e4ceec4edaef6e903422792de4f7f37de98cec6 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 1 Jun 2011 19:48:50 +0000 Subject: drivers/net/can/flexcan.c: add missing clk_put The failed_get label is used after the call to clk_get has succeeded, so it should be moved up above the call to clk_put. The failed_req labels doesn't do anything different than failed_get, so delete it. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r exists@ expression e1,e2; statement S; @@ e1 = clk_get@p1(...); ... when != e1 = e2 when != clk_put(e1) when any if (...) { ... when != clk_put(e1) when != if (...) { ... clk_put(e1) ... } * return@p3 ...; } else S // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/can/flexcan.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index d4990568bae..17678117ed6 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -923,7 +923,7 @@ static int __devinit flexcan_probe(struct platform_device *pdev) mem_size = resource_size(mem); if (!request_mem_region(mem->start, mem_size, pdev->name)) { err = -EBUSY; - goto failed_req; + goto failed_get; } base = ioremap(mem->start, mem_size); @@ -977,9 +977,8 @@ static int __devinit flexcan_probe(struct platform_device *pdev) iounmap(base); failed_map: release_mem_region(mem->start, mem_size); - failed_req: - clk_put(clk); failed_get: + clk_put(clk); failed_clock: return err; } -- cgit v1.2.3-70-g09d2 From e73e079bf128d68284efedeba1fbbc18d78610f9 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 25 May 2011 15:52:14 -0500 Subject: [SCSI] Fix oops caused by queue refcounting failure In certain circumstances, we can get an oops from a torn down device. Most notably this is from CD roms trying to call scsi_ioctl. The root cause of the problem is the fact that after scsi_remove_device() has been called, the queue is fully torn down. This is actually wrong since the queue can be used until the sdev release function is called. Therefore, we add an extra reference to the queue which is released in sdev->release, so the queue always exists. Reported-by: Parag Warudkar Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 2 +- drivers/scsi/scsi_sysfs.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 58584dc0724..44e8ca398ef 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -297,7 +297,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, kfree(sdev); goto out; } - + blk_get_queue(sdev->request_queue); sdev->request_queue->queuedata = sdev; scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index e63912510fb..e0bd3f790fc 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -322,6 +322,7 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) kfree(evt); } + blk_put_queue(sdev->request_queue); /* NULL queue means the device can't be used */ sdev->request_queue = NULL; -- cgit v1.2.3-70-g09d2 From 28304f485c3627cc5e1665b92e26eb7fcfe98088 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Thu, 2 Jun 2011 13:05:02 +0200 Subject: cfq-iosched: Remove bogus check in queue_fail path queue_fail can only be reached if cic is NULL, so its check for cic must be bogus. Signed-off-by: Paul Bolle Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 8a02c955c61..3c7b537bf90 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3786,9 +3786,6 @@ new_queue: return 0; queue_fail: - if (cic) - put_io_context(cic->ioc); - cfq_schedule_dispatch(cfqd); spin_unlock_irqrestore(q->queue_lock, flags); cfq_log(cfqd, "set_request fail"); -- cgit v1.2.3-70-g09d2 From e2bd9678fc0085acf540dc4cb48ff961cd4d88c0 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Thu, 2 Jun 2011 13:05:02 +0200 Subject: block: Use hlist_entry() for io_context.cic_list.first list_entry() and hlist_entry() are both simply aliases for container_of(), but since io_context.cic_list.first is an hlist_node one should at least use the correct alias. Signed-off-by: Paul Bolle Signed-off-by: Jens Axboe --- block/blk-ioc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-ioc.c b/block/blk-ioc.c index c898049dafd..342eae9b0d3 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -21,7 +21,7 @@ static void cfq_dtor(struct io_context *ioc) if (!hlist_empty(&ioc->cic_list)) { struct cfq_io_context *cic; - cic = list_entry(ioc->cic_list.first, struct cfq_io_context, + cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, cic_list); cic->dtor(ioc); } @@ -57,7 +57,7 @@ static void cfq_exit(struct io_context *ioc) if (!hlist_empty(&ioc->cic_list)) { struct cfq_io_context *cic; - cic = list_entry(ioc->cic_list.first, struct cfq_io_context, + cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, cic_list); cic->exit(ioc); } -- cgit v1.2.3-70-g09d2 From 4c8cc55b3c0ebe989e727017933945b68b4327cd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Jun 2011 23:22:30 -0400 Subject: ktest: Fix off-by-one in config bisect result Because in perl the array size returned by $#arr, is the last index and not the actually size of the array, we end the config bisect early, thinking there is only one config left when there are in fact two. Thus the result has a 50% chance of picking the correct config that caused the problem. Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 1fd29b2daa9..8dc8c3cf3ac 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1638,7 +1638,7 @@ sub run_config_bisect { if (!$found) { # try the other half doprint "Top half produced no set configs, trying bottom half\n"; - @tophalf = @start_list[$half .. $#start_list]; + @tophalf = @start_list[$half + 1 .. $#start_list]; create_config @tophalf; read_current_config \%current_config; foreach my $config (@tophalf) { @@ -1690,7 +1690,7 @@ sub run_config_bisect { # remove half the configs we are looking at and see if # they are good. $half = int($#start_list / 2); - } while ($half > 0); + } while ($#start_list > 0); # we found a single config, try it again unless we are running manually -- cgit v1.2.3-70-g09d2 From 4da46da2d295c0d9f4aaf28dd2b70a1ecb42d972 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Jun 2011 23:25:13 -0400 Subject: ktest: Fix result of rebooting the kernel The command that is called that reboots the kernel may fail but the return code is not passed back to the ktest.pl script. This is because a ';' is used between the two commands and if the second command fails, only the first command's return code is returned. Using a '&&' between the two commands fixes this. Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 8dc8c3cf3ac..6c68259e730 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -788,7 +788,7 @@ sub wait_for_input sub reboot_to { if ($reboot_type eq "grub") { - run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch; reboot)'"; + run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch && reboot)'"; return; } -- cgit v1.2.3-70-g09d2 From 9bf7174949aef2f43253956e1f3ab01698abbd79 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Jun 2011 23:27:19 -0400 Subject: ktest: Ignore unset values of the minconfig in config_bisect By ignoring the unset values of the minconfig in deciding what to test in the config_bisect can cause the problem config from being tested too. Just do not test the configs that are set in the minconfig. Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 6c68259e730..cef28e6632b 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1480,7 +1480,7 @@ sub process_config_ignore { or dodie "Failed to read $config"; while () { - if (/^(.*?(CONFIG\S*)(=.*| is not set))/) { + if (/^((CONFIG\S*)=.*)/) { $config_ignore{$2} = $1; } } -- cgit v1.2.3-70-g09d2 From bf0be0e951cf1c4c9ce38032195cd8095a16d828 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 30 May 2011 12:49:01 +0200 Subject: ALSA: 6fire: Don't leak firmware in error path One of the error paths in sound/usb/6fire/firmware.c::usb6fire_fw_ezusb_upload() neglects to free the memory allocated for the firmware before returning, thus leaking the memory. Signed-off-by: Jesper Juhl Signed-off-by: Takashi Iwai --- sound/usb/6fire/firmware.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/6fire/firmware.c b/sound/usb/6fire/firmware.c index d47beffedb0..a91719d5918 100644 --- a/sound/usb/6fire/firmware.c +++ b/sound/usb/6fire/firmware.c @@ -227,6 +227,7 @@ static int usb6fire_fw_ezusb_upload( ret = usb6fire_fw_ihex_init(fw, rec); if (ret < 0) { kfree(rec); + release_firmware(fw); snd_printk(KERN_ERR PREFIX "error validating ezusb " "firmware %s.\n", fwname); return ret; -- cgit v1.2.3-70-g09d2 From b36a968927b789b2dd8de0aaf7a72ef7c1f0d012 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 2 Jun 2011 14:21:45 -0400 Subject: asm-generic/unistd.h: support sendmmsg syscall Signed-off-by: Chris Metcalf Acked-by: Arnd Bergmann --- include/asm-generic/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index ae90e0f6399..4f76959397f 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -683,9 +683,11 @@ __SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime) __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_setns 268 __SYSCALL(__NR_setns, sys_setns) +#define __NR_sendmmsg 269 +__SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg) #undef __NR_syscalls -#define __NR_syscalls 269 +#define __NR_syscalls 270 /* * All syscalls below here should go away really, -- cgit v1.2.3-70-g09d2 From ec764bf083a6ff396234351b51fd236f53c903bf Mon Sep 17 00:00:00 2001 From: Koki Sanagi Date: Mon, 30 May 2011 21:48:34 +0000 Subject: net: tracepoint of net_dev_xmit sees freed skb and causes panic Because there is a possibility that skb is kfree_skb()ed and zero cleared after ndo_start_xmit, we should not see the contents of skb like skb->len and skb->dev->name after ndo_start_xmit. But trace_net_dev_xmit does that and causes panic by NULL pointer dereference. This patch fixes trace_net_dev_xmit not to see the contents of skb directly. If you want to reproduce this panic, 1. Get tracepoint of net_dev_xmit on 2. Create 2 guests on KVM 2. Make 2 guests use virtio_net 4. Execute netperf from one to another for a long time as a network burden 5. host will panic(It takes about 30 minutes) Signed-off-by: Koki Sanagi Signed-off-by: David S. Miller --- include/trace/events/net.h | 12 +++++++----- net/core/dev.c | 7 +++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 5f247f5ffc5..f99645d05a8 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -12,22 +12,24 @@ TRACE_EVENT(net_dev_xmit, TP_PROTO(struct sk_buff *skb, - int rc), + int rc, + struct net_device *dev, + unsigned int skb_len), - TP_ARGS(skb, rc), + TP_ARGS(skb, rc, dev, skb_len), TP_STRUCT__entry( __field( void *, skbaddr ) __field( unsigned int, len ) __field( int, rc ) - __string( name, skb->dev->name ) + __string( name, dev->name ) ), TP_fast_assign( __entry->skbaddr = skb; - __entry->len = skb->len; + __entry->len = skb_len; __entry->rc = rc; - __assign_str(name, skb->dev->name); + __assign_str(name, dev->name); ), TP_printk("dev=%s skbaddr=%p len=%u rc=%d", diff --git a/net/core/dev.c b/net/core/dev.c index c7e305d13b7..939307891e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2096,6 +2096,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, { const struct net_device_ops *ops = dev->netdev_ops; int rc = NETDEV_TX_OK; + unsigned int skb_len; if (likely(!skb->next)) { u32 features; @@ -2146,8 +2147,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } } + skb_len = skb->len; rc = ops->ndo_start_xmit(skb, dev); - trace_net_dev_xmit(skb, rc); + trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; @@ -2167,8 +2169,9 @@ gso: if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(nskb); + skb_len = nskb->len; rc = ops->ndo_start_xmit(nskb, dev); - trace_net_dev_xmit(nskb, rc); + trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) goto out_kfree_gso_skb; -- cgit v1.2.3-70-g09d2 From 9a2e0fb0893ddf595d0a372e681f5b98017c6d90 Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Thu, 2 Jun 2011 13:01:39 +0000 Subject: tg3: Fix tg3_skb_error_unmap() This function attempts to free one fragment beyond the number of fragments that were actually mapped. This patch brings back the limit to the correct spot. Signed-off-by: Matt Carlson Tested-by: Alex Williamson Signed-off-by: David S. Miller --- drivers/net/tg3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index f4b01c638a3..a1f9f9eef37 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -5774,7 +5774,7 @@ static void tg3_skb_error_unmap(struct tg3_napi *tnapi, dma_unmap_addr(txb, mapping), skb_headlen(skb), PCI_DMA_TODEVICE); - for (i = 0; i <= last; i++) { + for (i = 0; i < last; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; entry = NEXT_TX(entry); -- cgit v1.2.3-70-g09d2 From 4dffbe03d1e940aba878f9420e67feb8423cdd08 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 3 Jun 2011 10:05:02 +0200 Subject: ALSA: hda - Fix HP and Front pins of ad1988/ad1989 in ad198x_power_eapd() In ad198x_power_eapd(), wrong pin NIDs are used for controlling EAPD for HP and Front outputs of AD1988/AD1989. These are actually same with the ones for AD1984 & co, port-A is 0x11 and port-D 0x12. Reported-by: Raymond Yau Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_analog.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 696ac259030..82c4b2f5359 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -524,6 +524,10 @@ static void ad198x_power_eapd(struct hda_codec *codec) case 0x11d4184a: case 0x11d4194a: case 0x11d4194b: + case 0x11d41988: + case 0x11d4198b: + case 0x11d4989a: + case 0x11d4989b: ad198x_power_eapd_write(codec, 0x12, 0x11); break; case 0x11d41981: @@ -533,12 +537,6 @@ static void ad198x_power_eapd(struct hda_codec *codec) case 0x11d41986: ad198x_power_eapd_write(codec, 0x1b, 0x1a); break; - case 0x11d41988: - case 0x11d4198b: - case 0x11d4989a: - case 0x11d4989b: - ad198x_power_eapd_write(codec, 0x29, 0x22); - break; } } -- cgit v1.2.3-70-g09d2 From a01ef051d584c12ede5cd5275b008b2ded57f3d9 Mon Sep 17 00:00:00 2001 From: Raymond Yau Date: Wed, 1 Jun 2011 15:09:48 +0800 Subject: ALSA: hda - Check pin support EAPD in ad198x_power_eapd_write Check whether the pin supports EAPD in ad198x_power_eapd_write. Signed-off-by: Raymond Yau Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_analog.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 82c4b2f5359..d694e9d4921 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -506,9 +506,11 @@ static void ad198x_power_eapd_write(struct hda_codec *codec, hda_nid_t front, hda_nid_t hp) { struct ad198x_spec *spec = codec->spec; - snd_hda_codec_write(codec, front, 0, AC_VERB_SET_EAPD_BTLENABLE, + if (snd_hda_query_pin_caps(codec, front) & AC_PINCAP_EAPD) + snd_hda_codec_write(codec, front, 0, AC_VERB_SET_EAPD_BTLENABLE, !spec->inv_eapd ? 0x00 : 0x02); - snd_hda_codec_write(codec, hp, 0, AC_VERB_SET_EAPD_BTLENABLE, + if (snd_hda_query_pin_caps(codec, hp) & AC_PINCAP_EAPD) + snd_hda_codec_write(codec, hp, 0, AC_VERB_SET_EAPD_BTLENABLE, !spec->inv_eapd ? 0x00 : 0x02); } -- cgit v1.2.3-70-g09d2 From 9676001559fce06e37c7dc230ab275f605556176 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 26 May 2011 11:47:35 +0300 Subject: ALSA: fm801: add error handling if auto-detect fails In the original code if auto detect failed and tea575x_tuner == 4 then we copy bogus information to chip->tea.card. I've changed the autodetect code to cleanup and return -ENODEV on error instead. Signed-off-by: Dan Carpenter Signed-off-by: Takashi Iwai --- sound/pci/fm801.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index eacd4901a30..a7ec7030cf8 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -1234,9 +1234,12 @@ static int __devinit snd_fm801_create(struct snd_card *card, sprintf(chip->tea.bus_info, "PCI:%s", pci_name(pci)); if ((tea575x_tuner & TUNER_TYPE_MASK) > 0 && (tea575x_tuner & TUNER_TYPE_MASK) < 4) { - if (snd_tea575x_init(&chip->tea)) + if (snd_tea575x_init(&chip->tea)) { snd_printk(KERN_ERR "TEA575x radio not found\n"); - } else if ((tea575x_tuner & TUNER_TYPE_MASK) == 0) + snd_fm801_free(chip); + return -ENODEV; + } + } else if ((tea575x_tuner & TUNER_TYPE_MASK) == 0) { /* autodetect tuner connection */ for (tea575x_tuner = 1; tea575x_tuner <= 3; tea575x_tuner++) { chip->tea575x_tuner = tea575x_tuner; @@ -1246,6 +1249,12 @@ static int __devinit snd_fm801_create(struct snd_card *card, break; } } + if (tea575x_tuner == 4) { + snd_printk(KERN_ERR "TEA575x radio not found\n"); + snd_fm801_free(chip); + return -ENODEV; + } + } strlcpy(chip->tea.card, snd_fm801_tea575x_gpios[(tea575x_tuner & TUNER_TYPE_MASK) - 1].name, sizeof(chip->tea.card)); #endif -- cgit v1.2.3-70-g09d2 From d50a2fb63643dce8506520dab5ffb8f49cc45cb2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 3 Jun 2011 02:28:49 -0700 Subject: ALSA: asihpi: Use angle brackets for system includes Use the normal include style. Signed-off-by: Joe Perches Signed-off-by: Takashi Iwai --- sound/pci/asihpi/hpidspcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/asihpi/hpidspcd.c b/sound/pci/asihpi/hpidspcd.c index fb311d8c05b..5c6ea113d21 100644 --- a/sound/pci/asihpi/hpidspcd.c +++ b/sound/pci/asihpi/hpidspcd.c @@ -60,7 +60,7 @@ struct code_header { HPI_VER_MINOR(HPI_VER) * 100 + HPI_VER_RELEASE(HPI_VER))) /***********************************************************************/ -#include "linux/pci.h" +#include /*-------------------------------------------------------------------*/ short hpi_dsp_code_open(u32 adapter, struct dsp_code *ps_dsp_code, u32 *pos_error_code) -- cgit v1.2.3-70-g09d2 From 5ff6197f828d5ea051b3abf77cb61f8a34480e8d Mon Sep 17 00:00:00 2001 From: Steven Miao Date: Wed, 1 Jun 2011 15:52:41 +0800 Subject: Blackfin: strncpy: fix handling of zero lengths The jump to 4f will cause the NUL padding loop to run at least one time, so if string length is zero just jump to the end. Otherwise we wrongly write one NUL byte when size==0. Signed-off-by: Steven Miao Signed-off-by: Mike Frysinger --- arch/blackfin/lib/strncpy.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/blackfin/lib/strncpy.S b/arch/blackfin/lib/strncpy.S index f3931d50b4a..2c07dddac99 100644 --- a/arch/blackfin/lib/strncpy.S +++ b/arch/blackfin/lib/strncpy.S @@ -25,7 +25,7 @@ ENTRY(_strncpy) CC = R2 == 0; - if CC JUMP 4f; + if CC JUMP 6f; P2 = R2 ; /* size */ P0 = R0 ; /* dst*/ -- cgit v1.2.3-70-g09d2 From cf610bf4199770420629d3bc273494bd27ad6c1d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 31 May 2011 07:03:21 +0300 Subject: UBIFS: fix shrinker object count reports Sometimes VM asks the shrinker to return amount of objects it can shrink, and we return the ubifs_clean_zn_cnt in that case. However, it is possible that this counter is negative for a short period of time, due to the way UBIFS TNC code updates it. And I can observe the following warnings sometimes: shrink_slab: ubifs_shrinker+0x0/0x2b7 [ubifs] negative objects to delete nr=-8541616642706119788 This patch makes sure UBIFS never returns negative count of objects. Signed-off-by: Artem Bityutskiy Cc: stable@kernel.org --- fs/ubifs/shrinker.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index ca953a94502..9e1d05666fe 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -284,7 +284,11 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); if (nr == 0) - return clean_zn_cnt; + /* + * Due to the way UBIFS updates the clean znode counter it may + * temporarily be negative. + */ + return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; if (!clean_zn_cnt) { /* -- cgit v1.2.3-70-g09d2 From 812eb258311f89bcd664a34a620f249d54a2cd83 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 31 May 2011 08:40:40 +0300 Subject: UBIFS: fix memory leak on error path UBIFS leaks memory on error path in 'ubifs_jnl_update()' in case of write failure because it forgets to free the 'struct ubifs_dent_node *dent' object. Although the object is small, the alignment can make it large - e.g., 2KiB if the min. I/O unit is 2KiB. Signed-off-by: Artem Bityutskiy Cc: stable@kernel.org --- fs/ubifs/journal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 34b1679e6e3..cef0460f4c5 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -669,6 +669,7 @@ out_free: out_release: release_head(c, BASEHD); + kfree(dent); out_ro: ubifs_ro_mode(c, err); if (last_reference) -- cgit v1.2.3-70-g09d2 From 837072377034d0a0b18b851d1ab95676b245cc0a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 31 May 2011 14:26:07 +0300 Subject: UBIFS: fix clean znode counter corruption in error cases UBIFS maintains per-filesystem and global clean znode counters ('c->clean_zn_cnt' and 'ubifs_clean_zn_cnt'). It is important to maintain correct values there since the shrinker relies on 'ubifs_clean_zn_cnt'. However, in case of failures during commit the counters were corrupted. E.g., if a failure happens in the middle of 'write_index()', then some nodes in the commit list ('c->cnext') are marked as clean, and some are marked as dirty. And the 'ubifs_destroy_tnc_subtree()' frees does not retrun correct count, and we end up with non-zero 'c->clean_zn_cnt' when unmounting. This means that if we have 2 file-sytem and one of them fails, and we unmount it, 'ubifs_clean_zn_cnt' stays incorrect and confuses the shrinker. Signed-off-by: Artem Bityutskiy --- fs/ubifs/tnc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 8119b1fd8d9..91b4213dde8 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -2876,12 +2876,13 @@ static void tnc_destroy_cnext(struct ubifs_info *c) */ void ubifs_tnc_close(struct ubifs_info *c) { - long clean_freed; - tnc_destroy_cnext(c); if (c->zroot.znode) { - clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); - atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); + long n; + + ubifs_destroy_tnc_subtree(c->zroot.znode); + n = atomic_long_read(&c->clean_zn_cnt); + atomic_long_sub(n, &ubifs_clean_zn_cnt); } kfree(c->gap_lebs); kfree(c->ilebs); -- cgit v1.2.3-70-g09d2 From 4f1ab9b01d34eac9fc958f7150d3bf266dcc1685 Mon Sep 17 00:00:00 2001 From: Ben Gardiner Date: Mon, 30 May 2011 14:56:14 -0400 Subject: UBIFS: assert no fixup when writing a node The current free space fixup can result in some writing to the UBI volume when the space_fixup flag is set. To catch instances where UBIFS is writing to the NAND while the space_fixup flag is set, add an assert to ubifs_write_node(). Artem: tweaked the patch, added similar assertion to the write buffer write path. Signed-off-by: Ben Gardiner Reviewed-by: Matthew L. Creech Signed-off-by: Artem Bityutskiy --- fs/ubifs/io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 166951e0dcd..3be645e012c 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -581,6 +581,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) ubifs_assert(wbuf->size % c->min_io_size == 0); ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); ubifs_assert(!c->ro_media && !c->ro_mount); + ubifs_assert(!c->space_fixup); if (c->leb_size - wbuf->offs >= c->max_write_size) ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); @@ -759,6 +760,7 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); ubifs_assert(!c->ro_media && !c->ro_mount); + ubifs_assert(!c->space_fixup); if (c->ro_error) return -EROFS; -- cgit v1.2.3-70-g09d2 From 781c5717a95a74b294beb38b8276943b0f8b5bb4 Mon Sep 17 00:00:00 2001 From: Ben Gardiner Date: Mon, 30 May 2011 14:56:15 -0400 Subject: UBIFS: intialize LPT earlier The current 'mount_ubifs()' implementation does not initialize the LPT until the the master node is marked dirty. Move the LPT initialization to before marking the master node dirty. This is a preparation for the next patch which will move the free-space-fixup check to before marking the master node dirty, because we have to fix-up the free space before doing any writes. Artem: massaged the patch and commit message. Signed-off-by: Ben Gardiner Reviewed-by: Matthew L. Creech Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1e40db740da..6d357fd9c28 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1282,17 +1282,24 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_master; - init_constants_master(c); - if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { ubifs_msg("recovery needed"); c->need_recovery = 1; - if (!c->ro_mount) { - err = ubifs_recover_inl_heads(c, c->sbuf); - if (err) - goto out_master; - } - } else if (!c->ro_mount) { + } + + init_constants_master(c); + + if (c->need_recovery && !c->ro_mount) { + err = ubifs_recover_inl_heads(c, c->sbuf); + if (err) + goto out_master; + } + + err = ubifs_lpt_init(c, 1, !c->ro_mount); + if (err) + goto out_master; + + if (!c->ro_mount) { /* * Set the "dirty" flag so that if we reboot uncleanly we * will notice this immediately on the next mount. @@ -1300,13 +1307,9 @@ static int mount_ubifs(struct ubifs_info *c) c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); err = ubifs_write_master(c); if (err) - goto out_master; + goto out_lpt; } - err = ubifs_lpt_init(c, 1, !c->ro_mount); - if (err) - goto out_lpt; - err = dbg_check_idx_size(c, c->bi.old_idx_sz); if (err) goto out_lpt; -- cgit v1.2.3-70-g09d2 From 098011940a2549ae7182db4bf101c3e3d2b4e6df Mon Sep 17 00:00:00 2001 From: Ben Gardiner Date: Mon, 30 May 2011 14:56:16 -0400 Subject: UBIFS: fix-up free space earlier The free space fixup is currently initiated during mount after the call to ubifs_write_master() which results in a write to PEBs; this has been observed with the patch 'assert no fixup when writing a node' applied: Move the free space fixup on mount to before the calls to ubifs_recover_inl_heads() and ubifs_write_master(). This results in no assertions with the previously mentioned patch applied. Artem: tweaked the patch a bit Signed-off-by: Ben Gardiner Reviewed-by: Matthew L. Creech Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 6d357fd9c28..b5aeb5a8ebe 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1282,13 +1282,13 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_master; + init_constants_master(c); + if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { ubifs_msg("recovery needed"); c->need_recovery = 1; } - init_constants_master(c); - if (c->need_recovery && !c->ro_mount) { err = ubifs_recover_inl_heads(c, c->sbuf); if (err) @@ -1299,6 +1299,12 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_master; + if (!c->ro_mount && c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + goto out_master; + } + if (!c->ro_mount) { /* * Set the "dirty" flag so that if we reboot uncleanly we @@ -1402,12 +1408,6 @@ static int mount_ubifs(struct ubifs_info *c) } else ubifs_assert(c->lst.taken_empty_lebs > 0); - if (!c->ro_mount && c->space_fixup) { - err = ubifs_fixup_free_space(c); - if (err) - goto out_infos; - } - err = dbg_check_filesystem(c); if (err) goto out_infos; -- cgit v1.2.3-70-g09d2 From 157186bc185cdf588fecba0fc5d7466e3e5d49d7 Mon Sep 17 00:00:00 2001 From: Eric Lammerts Date: Fri, 27 May 2011 18:16:52 -0400 Subject: ALSA: usb - turn off de-emphasis in s/pdif for cm6206 CM6206: Turn off de-emphasis channel status bit in S/PDIF output. Signed-off-by: Eric Lammerts Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 2e969cbb393..090e1930dfd 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -403,7 +403,7 @@ static int snd_usb_cm106_boot_quirk(struct usb_device *dev) static int snd_usb_cm6206_boot_quirk(struct usb_device *dev) { int err, reg; - int val[] = {0x200c, 0x3000, 0xf800, 0x143f, 0x0000, 0x3000}; + int val[] = {0x2004, 0x3000, 0xf800, 0x143f, 0x0000, 0x3000}; for (reg = 0; reg < ARRAY_SIZE(val); reg++) { err = snd_usb_cm106_write_int_reg(dev, reg, val[reg]); -- cgit v1.2.3-70-g09d2 From 55db4c64eddf37e31279ec15fe90314713bc9cfa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 4 Jun 2011 06:33:24 +0900 Subject: Revert "tty: make receive_buf() return the amout of bytes received" This reverts commit b1c43f82c5aa265442f82dba31ce985ebb7aa71c. It was broken in so many ways, and results in random odd pty issues. It re-introduced the buggy schedule_work() in flush_to_ldisc() that can cause endless work-loops (see commit a5660b41af6a: "tty: fix endless work loop when the buffer fills up"). It also used an "unsigned int" return value fo the ->receive_buf() function, but then made multiple functions return a negative error code, and didn't actually check for the error in the caller. And it didn't actually work at all. BenH bisected down odd tty behavior to it: "It looks like the patch is causing some major malfunctions of the X server for me, possibly related to PTYs. For example, cat'ing a large file in a gnome terminal hangs the kernel for -minutes- in a loop of what looks like flush_to_ldisc/workqueue code, (some ftrace data in the quoted bits further down). ... Some more data: It -looks- like what happens is that the flush_to_ldisc work queue entry constantly re-queues itself (because the PTY is full ?) and the workqueue thread will basically loop forver calling it without ever scheduling, thus starving the consumer process that could have emptied the PTY." which is pretty much exactly the problem we fixed in a5660b41af6a. Milton Miller pointed out the 'unsigned int' issue. Reported-by: Benjamin Herrenschmidt Reported-by: Milton Miller Cc: Stefan Bigler Cc: Toby Gray Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Alan Cox Signed-off-by: Linus Torvalds --- drivers/bluetooth/hci_ldisc.c | 17 ++++------- drivers/input/serio/serport.c | 10 ++----- drivers/isdn/gigaset/ser-gigaset.c | 8 ++--- drivers/misc/ti-st/st_core.c | 6 ++-- drivers/net/caif/caif_serial.c | 6 ++-- drivers/net/can/slcan.c | 9 ++---- drivers/net/hamradio/6pack.c | 8 ++--- drivers/net/hamradio/mkiss.c | 11 +++---- drivers/net/irda/irtty-sir.c | 16 +++++----- drivers/net/ppp_async.c | 6 ++-- drivers/net/ppp_synctty.c | 6 ++-- drivers/net/slip.c | 11 +++---- drivers/net/wan/x25_asy.c | 7 ++--- drivers/tty/n_gsm.c | 6 ++-- drivers/tty/n_hdlc.c | 18 +++++------ drivers/tty/n_r3964.c | 10 +++---- drivers/tty/n_tty.c | 61 +++++++++++++++++++++++++++++--------- drivers/tty/tty_buffer.c | 15 ++++------ drivers/tty/vt/selection.c | 3 +- include/linux/tty_ldisc.h | 9 +++--- 20 files changed, 115 insertions(+), 128 deletions(-) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index b3f01996318..48ad2a7ab08 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -355,29 +355,24 @@ static void hci_uart_tty_wakeup(struct tty_struct *tty) * flags pointer to flags for data * count count of received data in bytes * - * Return Value: Number of bytes received + * Return Value: None */ -static unsigned int hci_uart_tty_receive(struct tty_struct *tty, - const u8 *data, char *flags, int count) +static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, char *flags, int count) { struct hci_uart *hu = (void *)tty->disc_data; - int received; if (!hu || tty != hu->tty) - return -ENODEV; + return; if (!test_bit(HCI_UART_PROTO_SET, &hu->flags)) - return -EINVAL; + return; spin_lock(&hu->rx_lock); - received = hu->proto->recv(hu, (void *) data, count); - if (received > 0) - hu->hdev->stat.byte_rx += received; + hu->proto->recv(hu, (void *) data, count); + hu->hdev->stat.byte_rx += count; spin_unlock(&hu->rx_lock); tty_unthrottle(tty); - - return received; } static int hci_uart_register_dev(struct hci_uart *hu) diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c index f3698967edf..8755f5f3ad3 100644 --- a/drivers/input/serio/serport.c +++ b/drivers/input/serio/serport.c @@ -120,21 +120,17 @@ static void serport_ldisc_close(struct tty_struct *tty) * 'interrupt' routine. */ -static unsigned int serport_ldisc_receive(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void serport_ldisc_receive(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct serport *serport = (struct serport*) tty->disc_data; unsigned long flags; unsigned int ch_flags; - int ret = 0; int i; spin_lock_irqsave(&serport->lock, flags); - if (!test_bit(SERPORT_ACTIVE, &serport->flags)) { - ret = -EINVAL; + if (!test_bit(SERPORT_ACTIVE, &serport->flags)) goto out; - } for (i = 0; i < count; i++) { switch (fp[i]) { @@ -156,8 +152,6 @@ static unsigned int serport_ldisc_receive(struct tty_struct *tty, out: spin_unlock_irqrestore(&serport->lock, flags); - - return ret == 0 ? count : ret; } /* diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 1d44d470897..86a5c4f7775 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -674,7 +674,7 @@ gigaset_tty_ioctl(struct tty_struct *tty, struct file *file, * cflags buffer containing error flags for received characters (ignored) * count number of received characters */ -static unsigned int +static void gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf, char *cflags, int count) { @@ -683,12 +683,12 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf, struct inbuf_t *inbuf; if (!cs) - return -ENODEV; + return; inbuf = cs->inbuf; if (!inbuf) { dev_err(cs->dev, "%s: no inbuf\n", __func__); cs_put(cs); - return -EINVAL; + return; } tail = inbuf->tail; @@ -725,8 +725,6 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf, gig_dbg(DEBUG_INTR, "%s-->BH", __func__); gigaset_schedule_event(cs); cs_put(cs); - - return count; } /* diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index 1a05fe08e2c..f91f82eabda 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -747,8 +747,8 @@ static void st_tty_close(struct tty_struct *tty) pr_debug("%s: done ", __func__); } -static unsigned int st_tty_receive(struct tty_struct *tty, - const unsigned char *data, char *tty_flags, int count) +static void st_tty_receive(struct tty_struct *tty, const unsigned char *data, + char *tty_flags, int count) { #ifdef VERBOSE print_hex_dump(KERN_DEBUG, ">in>", DUMP_PREFIX_NONE, @@ -761,8 +761,6 @@ static unsigned int st_tty_receive(struct tty_struct *tty, */ st_recv(tty->disc_data, data, count); pr_debug("done %s", __func__); - - return count; } /* wake-up function called in from the TTY layer diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index 73c7e03617e..3df0c0f8b8b 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -167,8 +167,8 @@ static inline void debugfs_tx(struct ser_device *ser, const u8 *data, int size) #endif -static unsigned int ldisc_receive(struct tty_struct *tty, - const u8 *data, char *flags, int count) +static void ldisc_receive(struct tty_struct *tty, const u8 *data, + char *flags, int count) { struct sk_buff *skb = NULL; struct ser_device *ser; @@ -215,8 +215,6 @@ static unsigned int ldisc_receive(struct tty_struct *tty, } else ++ser->dev->stats.rx_dropped; update_tty_status(ser); - - return count; } static int handle_tx(struct ser_device *ser) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 75622d54581..1b49df6b247 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -425,17 +425,16 @@ static void slc_setup(struct net_device *dev) * in parallel */ -static unsigned int slcan_receive_buf(struct tty_struct *tty, +static void slcan_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct slcan *sl = (struct slcan *) tty->disc_data; - int bytes = count; if (!sl || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev)) - return -ENODEV; + return; /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; @@ -444,8 +443,6 @@ static unsigned int slcan_receive_buf(struct tty_struct *tty, } slcan_unesc(sl, *cp++); } - - return count; } /************************************ diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 992089639ea..3e5d0b6b651 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -456,7 +456,7 @@ out: * a block of 6pack data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. */ -static unsigned int sixpack_receive_buf(struct tty_struct *tty, +static void sixpack_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct sixpack *sp; @@ -464,11 +464,11 @@ static unsigned int sixpack_receive_buf(struct tty_struct *tty, int count1; if (!count) - return 0; + return; sp = sp_get(tty); if (!sp) - return -ENODEV; + return; memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf)); @@ -487,8 +487,6 @@ static unsigned int sixpack_receive_buf(struct tty_struct *tty, sp_put(sp); tty_unthrottle(tty); - - return count1; } /* diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 0e4f2353114..4c628393c8b 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -923,14 +923,13 @@ static long mkiss_compat_ioctl(struct tty_struct *tty, struct file *file, * a block of data has been received, which can now be decapsulated * and sent on to the AX.25 layer for further processing. */ -static unsigned int mkiss_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct mkiss *ax = mkiss_get(tty); - int bytes = count; if (!ax) - return -ENODEV; + return; /* * Argh! mtu change time! - costs us the packet part received @@ -940,7 +939,7 @@ static unsigned int mkiss_receive_buf(struct tty_struct *tty, ax_changedmtu(ax); /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp != NULL && *fp++) { if (!test_and_set_bit(AXF_ERROR, &ax->flags)) ax->dev->stats.rx_errors++; @@ -953,8 +952,6 @@ static unsigned int mkiss_receive_buf(struct tty_struct *tty, mkiss_put(ax); tty_unthrottle(tty); - - return count; } /* diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index 035861d8acb..3352b2443e5 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -216,23 +216,23 @@ static int irtty_do_write(struct sir_dev *dev, const unsigned char *ptr, size_t * usbserial: urb-complete-interrupt / softint */ -static unsigned int irtty_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void irtty_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct sir_dev *dev; struct sirtty_cb *priv = tty->disc_data; int i; - IRDA_ASSERT(priv != NULL, return -ENODEV;); - IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return -EINVAL;); + IRDA_ASSERT(priv != NULL, return;); + IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return;); if (unlikely(count==0)) /* yes, this happens */ - return 0; + return; dev = priv->dev; if (!dev) { IRDA_WARNING("%s(), not ready yet!\n", __func__); - return -ENODEV; + return; } for (i = 0; i < count; i++) { @@ -242,13 +242,11 @@ static unsigned int irtty_receive_buf(struct tty_struct *tty, if (fp && *fp++) { IRDA_DEBUG(0, "Framing or parity error!\n"); sirdev_receive(dev, NULL, 0); /* notify sir_dev (updating stats) */ - return -EINVAL; + return; } } sirdev_receive(dev, cp, count); - - return count; } /* diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index 53872d7d738..a1b82c9c67d 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -340,7 +340,7 @@ ppp_asynctty_poll(struct tty_struct *tty, struct file *file, poll_table *wait) } /* May sleep, don't call from interrupt level or with interrupts disabled */ -static unsigned int +static void ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf, char *cflags, int count) { @@ -348,7 +348,7 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf, unsigned long flags; if (!ap) - return -ENODEV; + return; spin_lock_irqsave(&ap->recv_lock, flags); ppp_async_input(ap, buf, cflags, count); spin_unlock_irqrestore(&ap->recv_lock, flags); @@ -356,8 +356,6 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf, tasklet_schedule(&ap->tsk); ap_put(ap); tty_unthrottle(tty); - - return count; } static void diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c index 0815790a5cf..2573f525f11 100644 --- a/drivers/net/ppp_synctty.c +++ b/drivers/net/ppp_synctty.c @@ -381,7 +381,7 @@ ppp_sync_poll(struct tty_struct *tty, struct file *file, poll_table *wait) } /* May sleep, don't call from interrupt level or with interrupts disabled */ -static unsigned int +static void ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf, char *cflags, int count) { @@ -389,7 +389,7 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf, unsigned long flags; if (!ap) - return -ENODEV; + return; spin_lock_irqsave(&ap->recv_lock, flags); ppp_sync_input(ap, buf, cflags, count); spin_unlock_irqrestore(&ap->recv_lock, flags); @@ -397,8 +397,6 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf, tasklet_schedule(&ap->tsk); sp_put(ap); tty_unthrottle(tty); - - return count; } static void diff --git a/drivers/net/slip.c b/drivers/net/slip.c index 584809c656d..8ec1a9a0bb9 100644 --- a/drivers/net/slip.c +++ b/drivers/net/slip.c @@ -670,17 +670,16 @@ static void sl_setup(struct net_device *dev) * in parallel */ -static unsigned int slip_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void slip_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct slip *sl = tty->disc_data; - int bytes = count; if (!sl || sl->magic != SLIP_MAGIC || !netif_running(sl->dev)) - return -ENODEV; + return; /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; @@ -694,8 +693,6 @@ static unsigned int slip_receive_buf(struct tty_struct *tty, #endif slip_unesc(sl, *cp++); } - - return count; } /************************************ diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 40398bf7d03..24297b274cd 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -517,18 +517,17 @@ static int x25_asy_close(struct net_device *dev) * and sent on to some IP layer for further processing. */ -static unsigned int x25_asy_receive_buf(struct tty_struct *tty, +static void x25_asy_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct x25_asy *sl = tty->disc_data; - int bytes = count; if (!sl || sl->magic != X25_ASY_MAGIC || !netif_running(sl->dev)) return; /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; @@ -537,8 +536,6 @@ static unsigned int x25_asy_receive_buf(struct tty_struct *tty, } x25_asy_unesc(sl, *cp++); } - - return count; } /* diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index a4c42a75a3b..09e8c7d53af 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2128,8 +2128,8 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) gsm->tty = NULL; } -static unsigned int gsmld_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct gsm_mux *gsm = tty->disc_data; const unsigned char *dp; @@ -2162,8 +2162,6 @@ static unsigned int gsmld_receive_buf(struct tty_struct *tty, } /* FASYNC if needed ? */ /* If clogged call tty_throttle(tty); */ - - return count; } /** diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index cac666314ae..cea56033b34 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -188,8 +188,8 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp, poll_table *wait); static int n_hdlc_tty_open(struct tty_struct *tty); static void n_hdlc_tty_close(struct tty_struct *tty); -static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, - const __u8 *cp, char *fp, int count); +static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *cp, + char *fp, int count); static void n_hdlc_tty_wakeup(struct tty_struct *tty); #define bset(p,b) ((p)[(b) >> 5] |= (1 << ((b) & 0x1f))) @@ -509,8 +509,8 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty) * Called by tty low level driver when receive data is available. Data is * interpreted as one HDLC frame. */ -static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, - const __u8 *data, char *flags, int count) +static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data, + char *flags, int count) { register struct n_hdlc *n_hdlc = tty2n_hdlc (tty); register struct n_hdlc_buf *buf; @@ -521,20 +521,20 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, /* This can happen if stuff comes in on the backup tty */ if (!n_hdlc || tty != n_hdlc->tty) - return -ENODEV; + return; /* verify line is using HDLC discipline */ if (n_hdlc->magic != HDLC_MAGIC) { printk("%s(%d) line not using HDLC discipline\n", __FILE__,__LINE__); - return -EINVAL; + return; } if ( count>maxframe ) { if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d) rx count>maxframesize, data discarded\n", __FILE__,__LINE__); - return -EINVAL; + return; } /* get a free HDLC buffer */ @@ -550,7 +550,7 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d) no more rx buffers, data discarded\n", __FILE__,__LINE__); - return -EINVAL; + return; } /* copy received data to HDLC buffer */ @@ -565,8 +565,6 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, if (n_hdlc->tty->fasync != NULL) kill_fasync (&n_hdlc->tty->fasync, SIGIO, POLL_IN); - return count; - } /* end of n_hdlc_tty_receive() */ /** diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c index a4bc39c21a4..5c6c31459a2 100644 --- a/drivers/tty/n_r3964.c +++ b/drivers/tty/n_r3964.c @@ -139,8 +139,8 @@ static int r3964_ioctl(struct tty_struct *tty, struct file *file, static void r3964_set_termios(struct tty_struct *tty, struct ktermios *old); static unsigned int r3964_poll(struct tty_struct *tty, struct file *file, struct poll_table_struct *wait); -static unsigned int r3964_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count); +static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count); static struct tty_ldisc_ops tty_ldisc_N_R3964 = { .owner = THIS_MODULE, @@ -1239,8 +1239,8 @@ static unsigned int r3964_poll(struct tty_struct *tty, struct file *file, return result; } -static unsigned int r3964_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct r3964_info *pInfo = tty->disc_data; const unsigned char *p; @@ -1257,8 +1257,6 @@ static unsigned int r3964_receive_buf(struct tty_struct *tty, } } - - return count; } MODULE_LICENSE("GPL"); diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 95d0a9c2dd1..0ad32888091 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -81,6 +81,38 @@ static inline int tty_put_user(struct tty_struct *tty, unsigned char x, return put_user(x, ptr); } +/** + * n_tty_set__room - receive space + * @tty: terminal + * + * Called by the driver to find out how much data it is + * permitted to feed to the line discipline without any being lost + * and thus to manage flow control. Not serialized. Answers for the + * "instant". + */ + +static void n_tty_set_room(struct tty_struct *tty) +{ + /* tty->read_cnt is not read locked ? */ + int left = N_TTY_BUF_SIZE - tty->read_cnt - 1; + int old_left; + + /* + * If we are doing input canonicalization, and there are no + * pending newlines, let characters through without limit, so + * that erase characters will be handled. Other excess + * characters will be beeped. + */ + if (left <= 0) + left = tty->icanon && !tty->canon_data; + old_left = tty->receive_room; + tty->receive_room = left; + + /* Did this open up the receive buffer? We may need to flip */ + if (left && !old_left) + schedule_work(&tty->buf.work); +} + static void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty) { if (tty->read_cnt < N_TTY_BUF_SIZE) { @@ -152,6 +184,7 @@ static void reset_buffer_flags(struct tty_struct *tty) tty->canon_head = tty->canon_data = tty->erasing = 0; memset(&tty->read_flags, 0, sizeof tty->read_flags); + n_tty_set_room(tty); check_unthrottle(tty); } @@ -1327,19 +1360,17 @@ static void n_tty_write_wakeup(struct tty_struct *tty) * calls one at a time and in order (or using flush_to_ldisc) */ -static unsigned int n_tty_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { const unsigned char *p; char *f, flags = TTY_NORMAL; int i; char buf[64]; unsigned long cpuflags; - int left; - int ret = 0; if (!tty->read_buf) - return 0; + return; if (tty->real_raw) { spin_lock_irqsave(&tty->read_lock, cpuflags); @@ -1349,7 +1380,6 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, memcpy(tty->read_buf + tty->read_head, cp, i); tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1); tty->read_cnt += i; - ret += i; cp += i; count -= i; @@ -1359,10 +1389,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, memcpy(tty->read_buf + tty->read_head, cp, i); tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1); tty->read_cnt += i; - ret += i; spin_unlock_irqrestore(&tty->read_lock, cpuflags); } else { - ret = count; for (i = count, p = cp, f = fp; i; i--, p++) { if (f) flags = *f++; @@ -1390,6 +1418,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, tty->ops->flush_chars(tty); } + n_tty_set_room(tty); + if ((!tty->icanon && (tty->read_cnt >= tty->minimum_to_wake)) || L_EXTPROC(tty)) { kill_fasync(&tty->fasync, SIGIO, POLL_IN); @@ -1402,12 +1432,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, * mode. We don't want to throttle the driver if we're in * canonical mode and don't have a newline yet! */ - left = N_TTY_BUF_SIZE - tty->read_cnt - 1; - - if (left < TTY_THRESHOLD_THROTTLE) + if (tty->receive_room < TTY_THRESHOLD_THROTTLE) tty_throttle(tty); - - return ret; } int is_ignored(int sig) @@ -1451,6 +1477,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) if (test_bit(TTY_HW_COOK_IN, &tty->flags)) { tty->raw = 1; tty->real_raw = 1; + n_tty_set_room(tty); return; } if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) || @@ -1503,6 +1530,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) else tty->real_raw = 0; } + n_tty_set_room(tty); /* The termios change make the tty ready for I/O */ wake_up_interruptible(&tty->write_wait); wake_up_interruptible(&tty->read_wait); @@ -1784,6 +1812,8 @@ do_it_again: retval = -ERESTARTSYS; break; } + /* FIXME: does n_tty_set_room need locking ? */ + n_tty_set_room(tty); timeout = schedule_timeout(timeout); continue; } @@ -1855,8 +1885,10 @@ do_it_again: * longer than TTY_THRESHOLD_UNTHROTTLE in canonical mode, * we won't get any more characters. */ - if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE) + if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE) { + n_tty_set_room(tty); check_unthrottle(tty); + } if (b - buf >= minimum) break; @@ -1878,6 +1910,7 @@ do_it_again: } else if (test_and_clear_bit(TTY_PUSH, &tty->flags)) goto do_it_again; + n_tty_set_room(tty); return retval; } diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 46de2e075da..f1a7918d71a 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -416,7 +416,6 @@ static void flush_to_ldisc(struct work_struct *work) struct tty_buffer *head, *tail = tty->buf.tail; int seen_tail = 0; while ((head = tty->buf.head) != NULL) { - int copied; int count; char *char_buf; unsigned char *flag_buf; @@ -443,19 +442,17 @@ static void flush_to_ldisc(struct work_struct *work) line discipline as we want to empty the queue */ if (test_bit(TTY_FLUSHPENDING, &tty->flags)) break; + if (!tty->receive_room || seen_tail) + break; + if (count > tty->receive_room) + count = tty->receive_room; char_buf = head->char_buf_ptr + head->read; flag_buf = head->flag_buf_ptr + head->read; + head->read += count; spin_unlock_irqrestore(&tty->buf.lock, flags); - copied = disc->ops->receive_buf(tty, char_buf, + disc->ops->receive_buf(tty, char_buf, flag_buf, count); spin_lock_irqsave(&tty->buf.lock, flags); - - head->read += copied; - - if (copied == 0 || seen_tail) { - schedule_work(&tty->buf.work); - break; - } } clear_bit(TTY_FLUSHING, &tty->flags); } diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 67b1d0d7c8a..fb864e7fcd1 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -332,7 +332,8 @@ int paste_selection(struct tty_struct *tty) continue; } count = sel_buffer_lth - pasted; - count = tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted, + count = min(count, tty->receive_room); + tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted, NULL, count); pasted += count; } diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 5b07792ccb4..ff7dc08696a 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -76,7 +76,7 @@ * tty device. It is solely the responsibility of the line * discipline to handle poll requests. * - * unsigned int (*receive_buf)(struct tty_struct *, const unsigned char *cp, + * void (*receive_buf)(struct tty_struct *, const unsigned char *cp, * char *fp, int count); * * This function is called by the low-level tty driver to send @@ -84,8 +84,7 @@ * processing. is a pointer to the buffer of input * character received by the device. is a pointer to a * pointer of flag bytes which indicate whether a character was - * received with a parity error, etc. Returns the amount of bytes - * received. + * received with a parity error, etc. * * void (*write_wakeup)(struct tty_struct *); * @@ -141,8 +140,8 @@ struct tty_ldisc_ops { /* * The following routines are called from below. */ - unsigned int (*receive_buf)(struct tty_struct *, - const unsigned char *cp, char *fp, int count); + void (*receive_buf)(struct tty_struct *, const unsigned char *cp, + char *fp, int count); void (*write_wakeup)(struct tty_struct *); void (*dcd_change)(struct tty_struct *, unsigned int, struct pps_event_time *); -- cgit v1.2.3-70-g09d2 From bb3d6bf1919a20c56b3257b4ec09e67a9226cfb2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 4 Jun 2011 07:00:50 +0900 Subject: Revert "ASoC: Update cx20442 for TTY API change" This reverts commit ed0bd2333cffc3d856db9beb829543c1dfc00982. Since we reverted the TTY API change, we should revert the ASoC update to it too. Cc: Mark Brown Cc: Liam Girdwood Cc: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- sound/soc/codecs/cx20442.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c index f8c663dcff0..d68ea532cc7 100644 --- a/sound/soc/codecs/cx20442.c +++ b/sound/soc/codecs/cx20442.c @@ -262,14 +262,14 @@ static int v253_hangup(struct tty_struct *tty) } /* Line discipline .receive_buf() */ -static unsigned int v253_receive(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void v253_receive(struct tty_struct *tty, + const unsigned char *cp, char *fp, int count) { struct snd_soc_codec *codec = tty->disc_data; struct cx20442_priv *cx20442; if (!codec) - return count; + return; cx20442 = snd_soc_codec_get_drvdata(codec); @@ -281,8 +281,6 @@ static unsigned int v253_receive(struct tty_struct *tty, codec->hw_write = (hw_write_t)tty->ops->write; codec->card->pop_time = 1; } - - return count; } /* Line discipline .write_wakeup() */ -- cgit v1.2.3-70-g09d2 From 1bc8779349d6278e2713a1ff94418c2a6746a791 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Sat, 28 May 2011 21:57:55 +0200 Subject: btrfs: scrub: don't reuse bios and pages The current scrub implementation reuses bios and pages as often as possible, allocating them only on start and releasing them when finished. This leads to more problems with the block layer than it's worth. The elevator gets confused when there are more pages added to the bio than bi_size suggests. This patch completely rips out the reuse of bios and pages and allocates them freshly for each submit. Signed-off-by: Arne Jansen Signed-off-by: Chris Maosn --- fs/btrfs/scrub.c | 114 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6dfed0c27ac..2d1f8909a8e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -117,33 +117,37 @@ static void scrub_free_csums(struct scrub_dev *sdev) } } +static void scrub_free_bio(struct bio *bio) +{ + int i; + struct page *last_page = NULL; + + if (!bio) + return; + + for (i = 0; i < bio->bi_vcnt; ++i) { + if (bio->bi_io_vec[i].bv_page == last_page) + continue; + last_page = bio->bi_io_vec[i].bv_page; + __free_page(last_page); + } + bio_put(bio); +} + static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) { int i; - int j; - struct page *last_page; if (!sdev) return; for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { struct scrub_bio *sbio = sdev->bios[i]; - struct bio *bio; if (!sbio) break; - bio = sbio->bio; - if (bio) { - last_page = NULL; - for (j = 0; j < bio->bi_vcnt; ++j) { - if (bio->bi_io_vec[j].bv_page == last_page) - continue; - last_page = bio->bi_io_vec[j].bv_page; - __free_page(last_page); - } - bio_put(bio); - } + scrub_free_bio(sbio->bio); kfree(sbio); } @@ -156,8 +160,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) { struct scrub_dev *sdev; int i; - int j; - int ret; struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; sdev = kzalloc(sizeof(*sdev), GFP_NOFS); @@ -165,7 +167,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) goto nomem; sdev->dev = dev; for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { - struct bio *bio; struct scrub_bio *sbio; sbio = kzalloc(sizeof(*sbio), GFP_NOFS); @@ -173,32 +174,10 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) goto nomem; sdev->bios[i] = sbio; - bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); - if (!bio) - goto nomem; - sbio->index = i; sbio->sdev = sdev; - sbio->bio = bio; sbio->count = 0; sbio->work.func = scrub_checksum; - bio->bi_private = sdev->bios[i]; - bio->bi_end_io = scrub_bio_end_io; - bio->bi_sector = 0; - bio->bi_bdev = dev->bdev; - bio->bi_size = 0; - - for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) { - struct page *page; - page = alloc_page(GFP_NOFS); - if (!page) - goto nomem; - - ret = bio_add_page(bio, page, PAGE_SIZE, 0); - if (!ret) - goto nomem; - } - WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO); if (i != SCRUB_BIOS_PER_DEV-1) sdev->bios[i]->next_free = i + 1; @@ -394,6 +373,7 @@ static void scrub_bio_end_io(struct bio *bio, int err) struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; sbio->err = err; + sbio->bio = bio; btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); } @@ -453,6 +433,8 @@ static void scrub_checksum(struct btrfs_work *work) } out: + scrub_free_bio(sbio->bio); + sbio->bio = NULL; spin_lock(&sdev->list_lock); sbio->next_free = sdev->first_free; sdev->first_free = sbio->index; @@ -583,25 +565,50 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) static int scrub_submit(struct scrub_dev *sdev) { struct scrub_bio *sbio; + struct bio *bio; + int i; if (sdev->curr == -1) return 0; sbio = sdev->bios[sdev->curr]; - sbio->bio->bi_sector = sbio->physical >> 9; - sbio->bio->bi_size = sbio->count * PAGE_SIZE; - sbio->bio->bi_next = NULL; - sbio->bio->bi_flags |= 1 << BIO_UPTODATE; - sbio->bio->bi_comp_cpu = -1; - sbio->bio->bi_bdev = sdev->dev->bdev; + bio = bio_alloc(GFP_NOFS, sbio->count); + if (!bio) + goto nomem; + + bio->bi_private = sbio; + bio->bi_end_io = scrub_bio_end_io; + bio->bi_bdev = sdev->dev->bdev; + bio->bi_sector = sbio->physical >> 9; + + for (i = 0; i < sbio->count; ++i) { + struct page *page; + int ret; + + page = alloc_page(GFP_NOFS); + if (!page) + goto nomem; + + ret = bio_add_page(bio, page, PAGE_SIZE, 0); + if (!ret) { + __free_page(page); + goto nomem; + } + } + sbio->err = 0; sdev->curr = -1; atomic_inc(&sdev->in_flight); - submit_bio(0, sbio->bio); + submit_bio(READ, bio); return 0; + +nomem: + scrub_free_bio(bio); + + return -ENOMEM; } static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, @@ -633,7 +640,11 @@ again: sbio->logical = logical; } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || sbio->logical + sbio->count * PAGE_SIZE != logical) { - scrub_submit(sdev); + int ret; + + ret = scrub_submit(sdev); + if (ret) + return ret; goto again; } sbio->spag[sbio->count].flags = flags; @@ -645,8 +656,13 @@ again: memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); } ++sbio->count; - if (sbio->count == SCRUB_PAGES_PER_BIO || force) - scrub_submit(sdev); + if (sbio->count == SCRUB_PAGES_PER_BIO || force) { + int ret; + + ret = scrub_submit(sdev); + if (ret) + return ret; + } return 0; } -- cgit v1.2.3-70-g09d2 From 17aca1c987cff89dc4279371857035da902c8854 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Jun 2011 01:13:45 -0400 Subject: Btrfs: fix uninit variable in the delayed inode code The nitems counter needs to start at zero Signed-off-by: Chris Mason --- fs/btrfs/delayed-inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index b46d94d1dea..c61c32cf0f7 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -678,6 +678,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, INIT_LIST_HEAD(&head); next = item; + nitems = 0; /* * count the number of the continuous items that we can insert in batch -- cgit v1.2.3-70-g09d2 From 211f96c24f117fcc6e9e2431e40d92f4de22625e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Jun 2011 01:26:53 -0400 Subject: Btrfs: make sure we don't overflow the free space cache crc page The free space cache uses only one page for crcs right now, which means we can't have a cache file bigger than the crcs we can fit in the first page. This adds a check to enforce that restriction. Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index dd38d4c3a59..1cb72394498 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -590,10 +590,25 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + /* Since the first page has all of our checksums and our generation we + * need to calculate the offset into the page that we can start writing + * our entries. + */ + first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); + filemap_write_and_wait(inode->i_mapping); btrfs_wait_ordered_range(inode, inode->i_size & ~(root->sectorsize - 1), (u64)-1); + /* make sure we don't overflow that first page */ + if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { + /* this is really the same as running out of space, where we also return 0 */ + printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); + ret = 0; + goto out_update; + } + /* We need a checksum per page. */ crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); if (!crc) @@ -605,12 +620,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, return -1; } - /* Since the first page has all of our checksums and our generation we - * need to calculate the offset into the page that we can start writing - * our entries. - */ - first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); - /* Get the cluster for this block_group if it exists */ if (block_group && !list_empty(&block_group->cluster_list)) cluster = list_entry(block_group->cluster_list.next, @@ -872,12 +881,14 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ret = 1; out_free: + kfree(checksums); + kfree(pages); + +out_update: if (ret != 1) { invalidate_inode_pages2_range(inode->i_mapping, 0, index); BTRFS_I(inode)->generation = 0; } - kfree(checksums); - kfree(pages); btrfs_update_inode(trans, root, inode); return ret; } -- cgit v1.2.3-70-g09d2 From ca456ae280c0646e1e571c3b9a3834c55e90adfe Mon Sep 17 00:00:00 2001 From: liubo Date: Wed, 1 Jun 2011 09:42:49 +0000 Subject: Btrfs: don't save the inode cache in non-FS roots This adds extra checks to make sure the inode map we are caching really belongs to a FS root instead of a special relocation tree. It prevents crashes during balancing operations. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 3262cd17a12..04f7199facb 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -388,6 +388,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root, int prealloc; bool retry = false; + /* only fs tree and subvol/snap needs ino cache */ + if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID && + (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID || + root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID)) + return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 5f3f302a6f4cb74906c05fad1d03fc5e95c7e5af Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Mon, 30 May 2011 08:36:16 +0000 Subject: btrfs: false BUG_ON when degraded In degraded mode the struct btrfs_device of missing devs don't have device->name set. A kstrdup of NULL correctly returns NULL. Don't BUG in this case. Signed-off-by: Arne Jansen Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c48214ef5c0..da541dfca2e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -504,7 +504,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) BUG_ON(!new_device); memcpy(new_device, device, sizeof(*new_device)); new_device->name = kstrdup(device->name, GFP_NOFS); - BUG_ON(!new_device->name); + BUG_ON(device->name && !new_device->name); new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; -- cgit v1.2.3-70-g09d2 From d132a538d258f8f52fd0cd8b5017755f4e915386 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 31 May 2011 19:33:33 +0000 Subject: Btrfs: don't save the inode cache if we are deleting this root With xfstest 254 I can panic the box every time with the inode number caching stuff on. This is because we clean the inodes out when we delete the subvolume, but then we write out the inode cache which adds an inode to the subvolume inode tree, and then when it gets evicted again the root gets added back on the dead roots list and is deleted again, so we have a double free. To stop this from happening just return 0 if refs is 0 (and we're not the tree root since tree root always has refs of 0). With this fix 254 no longer panics. Thanks, Signed-off-by: Josef Bacik Tested-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 04f7199facb..2d0d50067a7 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -394,6 +394,11 @@ int btrfs_save_ino_cache(struct btrfs_root *root, root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID)) return 0; + /* Don't save inode cache if we are deleting this root */ + if (btrfs_root_refs(&root->root_item) == 0 && + root != root->fs_info->tree_root) + return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From a4689d2bd3b00dcf5c4320f06e0ab88810fbff9c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 May 2011 17:08:14 +0000 Subject: btrfs: use btrfs_ino to access inode number commit 4cb5300bc ("Btrfs: add mount -o auto_defrag") accesses inode number directly while it should use the helper with the new inode number allocator. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- fs/btrfs/ioctl.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e3a1b0c2394..982b5ea9762 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -144,7 +144,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, if (!defrag) return -ENOMEM; - defrag->ino = inode->i_ino; + defrag->ino = btrfs_ino(inode); defrag->transid = transid; defrag->root = root->root_key.objectid; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 74c80595d70..ac37040e426 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -706,16 +706,17 @@ static int find_new_extents(struct btrfs_root *root, struct btrfs_file_extent_item *extent; int type; int ret; + u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - min_key.objectid = inode->i_ino; + min_key.objectid = ino; min_key.type = BTRFS_EXTENT_DATA_KEY; min_key.offset = *off; - max_key.objectid = inode->i_ino; + max_key.objectid = ino; max_key.type = (u8)-1; max_key.offset = (u64)-1; @@ -726,7 +727,7 @@ static int find_new_extents(struct btrfs_root *root, path, 0, newer_than); if (ret != 0) goto none; - if (min_key.objectid != inode->i_ino) + if (min_key.objectid != ino) goto none; if (min_key.type != BTRFS_EXTENT_DATA_KEY) goto none; -- cgit v1.2.3-70-g09d2 From e7786c3ae517b2c433edc91714e86be770e9f1ce Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Sat, 28 May 2011 20:58:38 +0000 Subject: btrfs: scrub: add explicit plugging With the removal of the implicit plugging scrub ends up doing more and smaller I/O than necessary. This patch adds explicit plugging per chunk. Signed-off-by: Arne Jansen Signed-off-by: Chris Mason --- fs/btrfs/scrub.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2d1f8909a8e..1204eab9402 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -348,9 +348,6 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, int ret; DECLARE_COMPLETION_ONSTACK(complete); - /* we are going to wait on this IO */ - rw |= REQ_SYNC; - bio = bio_alloc(GFP_NOFS, 1); bio->bi_bdev = bdev; bio->bi_sector = sector; @@ -359,6 +356,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, bio->bi_private = &complete; submit_bio(rw, bio); + /* this will also unplug the queue */ wait_for_completion(&complete); ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -743,6 +741,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, struct btrfs_root *root = fs_info->extent_root; struct btrfs_root *csum_root = fs_info->csum_root; struct btrfs_extent_item *extent; + struct blk_plug plug; u64 flags; int ret; int slot; @@ -847,6 +846,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, * the scrub. This might currently (crc32) end up to be about 1MB */ start_stripe = 0; + blk_start_plug(&plug); again: logical = base + offset + start_stripe * increment; for (i = start_stripe; i < nstripes; ++i) { @@ -988,6 +988,7 @@ next: scrub_submit(sdev); out: + blk_finish_plug(&plug); btrfs_free_path(path); return ret < 0 ? ret : 0; } -- cgit v1.2.3-70-g09d2 From 4b9465cb9e3859186eefa1ca3b990a5849386320 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Jun 2011 09:36:29 -0400 Subject: Btrfs: add mount -o inode_cache This makes the inode map cache default to off until we fix the overflow problem when the free space crcs don't fit inside a single page. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/free-space-cache.c | 6 ++++++ fs/btrfs/inode-map.c | 20 ++++++++++++++++++++ fs/btrfs/super.c | 8 +++++++- 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8f98c200571..4958ef5417d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1340,6 +1340,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) +#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 1cb72394498..bffa5c4a633 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2536,6 +2536,9 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) int ret = 0; u64 root_gen = btrfs_root_generation(&root->root_item); + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return 0; + /* * If we're unmounting then just return, since this does a search on the * normal root and not the commit root and we could deadlock. @@ -2575,6 +2578,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, struct inode *inode; int ret; + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return 0; + inode = lookup_free_ino_inode(root, path); if (IS_ERR(inode)) return 0; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 2d0d50067a7..cb79b8975c9 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -38,6 +38,9 @@ static int caching_kthread(void *data) int slot; int ret; + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -141,6 +144,9 @@ static void start_caching(struct btrfs_root *root) int ret; u64 objectid; + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return; + spin_lock(&root->cache_lock); if (root->cached != BTRFS_CACHE_NO) { spin_unlock(&root->cache_lock); @@ -178,6 +184,9 @@ static void start_caching(struct btrfs_root *root) int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) { + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return btrfs_find_free_objectid(root, objectid); + again: *objectid = btrfs_find_ino_for_alloc(root); @@ -201,6 +210,10 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; + + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return; + again: if (root->cached == BTRFS_CACHE_FINISHED) { __btrfs_add_free_space(ctl, objectid, 1); @@ -250,6 +263,9 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) struct rb_node *n; u64 count; + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return; + while (1) { n = rb_first(rbroot); if (!n) @@ -399,9 +415,13 @@ int btrfs_save_ino_cache(struct btrfs_root *root, root != root->fs_info->tree_root) return 0; + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; + again: inode = lookup_free_ino_inode(root, path); if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 28e3cb2607f..3559d0b3518 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -160,7 +160,8 @@ enum { Opt_compress_type, Opt_compress_force, Opt_compress_force_type, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, - Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err, + Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, + Opt_inode_cache, Opt_err, }; static match_table_t tokens = { @@ -192,6 +193,7 @@ static match_table_t tokens = { {Opt_enospc_debug, "enospc_debug"}, {Opt_subvolrootid, "subvolrootid=%d"}, {Opt_defrag, "autodefrag"}, + {Opt_inode_cache, "inode_cache"}, {Opt_err, NULL}, }; @@ -360,6 +362,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: enabling disk space caching\n"); btrfs_set_opt(info->mount_opt, SPACE_CACHE); break; + case Opt_inode_cache: + printk(KERN_INFO "btrfs: enabling inode map caching\n"); + btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); + break; case Opt_clear_cache: printk(KERN_INFO "btrfs: force clearing of disk cache\n"); btrfs_set_opt(info->mount_opt, CLEAR_CACHE); -- cgit v1.2.3-70-g09d2 From 7841cb2898f66a73062c64d0ef5733dde7279e46 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 May 2011 18:07:27 +0200 Subject: btrfs: add helper for fs_info->closing wrap checking of filesystem 'closing' flag and fix a few missing memory barriers. Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 9 +++++++++ fs/btrfs/extent-tree.c | 3 +-- fs/btrfs/file.c | 4 ++-- fs/btrfs/free-space-cache.c | 10 ++++------ fs/btrfs/inode-map.c | 3 +-- fs/btrfs/inode.c | 3 +-- fs/btrfs/scrub.c | 2 +- fs/btrfs/transaction.c | 2 +- 8 files changed, 20 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4958ef5417d..8490ee06370 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2354,6 +2354,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, struct extent_buffer *parent); +static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) +{ + /* + * Get synced with close_ctree() + */ + smp_mb(); + return fs_info->closing; +} + /* root-item.c */ int btrfs_find_root_ref(struct btrfs_root *tree_root, struct btrfs_path *path, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c9173a7827b..5b9b6b6df24 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -366,8 +366,7 @@ again: nritems = btrfs_header_nritems(leaf); while (1) { - smp_mb(); - if (fs_info->closing > 1) { + if (btrfs_fs_closing(fs_info) > 1) { last = (u64)-1; break; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 982b5ea9762..fa4ef18b66b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -129,7 +129,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, if (!btrfs_test_opt(root, AUTO_DEFRAG)) return 0; - if (root->fs_info->closing) + if (btrfs_fs_closing(root->fs_info)) return 0; if (BTRFS_I(inode)->in_defrag) @@ -229,7 +229,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) first_ino = defrag->ino + 1; rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); - if (fs_info->closing) + if (btrfs_fs_closing(fs_info)) goto next_free; spin_unlock(&fs_info->defrag_inodes_lock); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index bffa5c4a633..ad144736a5f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -98,7 +98,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, return inode; spin_lock(&block_group->lock); - if (!root->fs_info->closing) { + if (!btrfs_fs_closing(root->fs_info)) { block_group->inode = igrab(inode); block_group->iref = 1; } @@ -493,8 +493,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, * If we're unmounting then just return, since this does a search on the * normal root and not the commit root and we could deadlock. */ - smp_mb(); - if (fs_info->closing) + if (btrfs_fs_closing(fs_info)) return 0; /* @@ -2513,7 +2512,7 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, return inode; spin_lock(&root->cache_lock); - if (!root->fs_info->closing) + if (!btrfs_fs_closing(root->fs_info)) root->cache_inode = igrab(inode); spin_unlock(&root->cache_lock); @@ -2543,8 +2542,7 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) * If we're unmounting then just return, since this does a search on the * normal root and not the commit root and we could deadlock. */ - smp_mb(); - if (fs_info->closing) + if (btrfs_fs_closing(fs_info)) return 0; path = btrfs_alloc_path(); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cb79b8975c9..b4087e0fa87 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -62,8 +62,7 @@ again: goto out; while (1) { - smp_mb(); - if (fs_info->closing) + if (btrfs_fs_closing(fs_info)) goto out; leaf = path->nodes[0]; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a83e44bf320..02ff4a1b968 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4266,8 +4266,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (BTRFS_I(inode)->dummy_inode) return 0; - smp_mb(); - if (root->fs_info->closing && is_free_space_inode(root, inode)) + if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) nolock = true; if (wbc->sync_mode == WB_SYNC_ALL) { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 1204eab9402..df50fd1eca8 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1183,7 +1183,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, int ret; struct btrfs_device *dev; - if (root->fs_info->closing) + if (btrfs_fs_closing(root->fs_info)) return -EINVAL; /* diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2d5c6d2aa4e..dd719662340 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -817,7 +817,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) btrfs_btree_balance_dirty(info->tree_root, nr); cond_resched(); - if (root->fs_info->closing || ret != -EAGAIN) + if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) break; } root->defrag_running = 0; -- cgit v1.2.3-70-g09d2 From aa0467d8d2a00e75b2bb6a56a4ee6d70c5d1928f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 3 Jun 2011 16:29:08 +0200 Subject: btrfs: fix uninitialized variable warning With Linus' tree, today's linux-next build (powercp ppc64_defconfig) produced this warning: fs/btrfs/delayed-inode.c: In function 'btrfs_delayed_update_inode': fs/btrfs/delayed-inode.c:1598:6: warning: 'ret' may be used uninitialized in this function Introduced by commit 16cdcec736cd ("btrfs: implement delayed inode items operation"). This fixes a bug in btrfs_update_inode(): if the returned value from btrfs_delayed_update_inode is a nonzero garbage, inode stat data are not updated and several call paths may hit a BUG_ON or fail with strange code. Reported-by: Stephen Rothwell Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c61c32cf0f7..6462c29d2d3 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1595,7 +1595,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { struct btrfs_delayed_node *delayed_node; - int ret; + int ret = 0; delayed_node = btrfs_get_or_create_delayed_node(inode); if (IS_ERR(delayed_node)) -- cgit v1.2.3-70-g09d2 From 942c1a927bf296fd64fd49f04c5a8f66bb14446b Mon Sep 17 00:00:00 2001 From: Per Dalén Date: Thu, 26 May 2011 09:08:53 -0400 Subject: hwmon: (max6642): Better chip detection schema Improve detection of MAX6642 by reading non existing registers (0x04, 0x06 and 0xff). Reading those registers returns the previously read value. Signed-off-by: Per Dalen [guenter.roeck@ericsson.com: added second set of register reads] Signed-off-by: Guenter Roeck --- drivers/hwmon/max6642.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/max6642.c b/drivers/hwmon/max6642.c index 0f30e1bb366..e855d3b0bd1 100644 --- a/drivers/hwmon/max6642.c +++ b/drivers/hwmon/max6642.c @@ -136,15 +136,29 @@ static int max6642_detect(struct i2c_client *client, if (man_id != 0x4D) return -ENODEV; + /* sanity check */ + if (i2c_smbus_read_byte_data(client, 0x04) != 0x4D + || i2c_smbus_read_byte_data(client, 0x06) != 0x4D + || i2c_smbus_read_byte_data(client, 0xff) != 0x4D) + return -ENODEV; + /* * We read the config and status register, the 4 lower bits in the * config register should be zero and bit 5, 3, 1 and 0 should be * zero in the status register. */ reg_config = i2c_smbus_read_byte_data(client, MAX6642_REG_R_CONFIG); + if ((reg_config & 0x0f) != 0x00) + return -ENODEV; + + /* in between, another round of sanity checks */ + if (i2c_smbus_read_byte_data(client, 0x04) != reg_config + || i2c_smbus_read_byte_data(client, 0x06) != reg_config + || i2c_smbus_read_byte_data(client, 0xff) != reg_config) + return -ENODEV; + reg_status = i2c_smbus_read_byte_data(client, MAX6642_REG_R_STATUS); - if (((reg_config & 0x0f) != 0x00) || - ((reg_status & 0x2b) != 0x00)) + if ((reg_status & 0x2b) != 0x00) return -ENODEV; strlcpy(info->type, "max6642", I2C_NAME_SIZE); -- cgit v1.2.3-70-g09d2 From f2a4d8ae4d40f6f5482d207f47fd4d53b3ae0ed4 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Tue, 31 May 2011 15:14:07 -0600 Subject: ARM: Tegra: Harmony: Fix conflicting GPIO numbering Currently, both the WM8903 and TPS6586x chips attempt to register with gpiolib using the same GPIO numbers. This causes the audio driver to fail to initialize. To solve this, add a define to board-harmony.h for the TPS6586x, and make board-harmony-power.c use this define, instead of directly referencing TEGRA_NR_GPIOS. This fixes a regression introduced by commit 6f168f2fa60f87e85e0df25e87e2372f22f5eb7c. ARM: tegra: harmony: initialize the TPS65862 PMIC Signed-off-by: Stephen Warren Signed-off-by: Colin Cross --- arch/arm/mach-tegra/board-harmony-power.c | 4 +++- arch/arm/mach-tegra/board-harmony.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/board-harmony-power.c b/arch/arm/mach-tegra/board-harmony-power.c index c84442cabe0..5ad8b2f94f8 100644 --- a/arch/arm/mach-tegra/board-harmony-power.c +++ b/arch/arm/mach-tegra/board-harmony-power.c @@ -24,6 +24,8 @@ #include +#include "board-harmony.h" + #define PMC_CTRL 0x0 #define PMC_CTRL_INTR_LOW (1 << 17) @@ -98,7 +100,7 @@ static struct tps6586x_platform_data tps_platform = { .irq_base = TEGRA_NR_IRQS, .num_subdevs = ARRAY_SIZE(tps_devs), .subdevs = tps_devs, - .gpio_base = TEGRA_NR_GPIOS, + .gpio_base = HARMONY_GPIO_TPS6586X(0), }; static struct i2c_board_info __initdata harmony_regulators[] = { diff --git a/arch/arm/mach-tegra/board-harmony.h b/arch/arm/mach-tegra/board-harmony.h index 1e57b071f52..d85142edaf6 100644 --- a/arch/arm/mach-tegra/board-harmony.h +++ b/arch/arm/mach-tegra/board-harmony.h @@ -17,7 +17,8 @@ #ifndef _MACH_TEGRA_BOARD_HARMONY_H #define _MACH_TEGRA_BOARD_HARMONY_H -#define HARMONY_GPIO_WM8903(_x_) (TEGRA_NR_GPIOS + (_x_)) +#define HARMONY_GPIO_TPS6586X(_x_) (TEGRA_NR_GPIOS + (_x_)) +#define HARMONY_GPIO_WM8903(_x_) (HARMONY_GPIO_TPS6586X(4) + (_x_)) #define TEGRA_GPIO_SD2_CD TEGRA_GPIO_PI5 #define TEGRA_GPIO_SD2_WP TEGRA_GPIO_PH1 -- cgit v1.2.3-70-g09d2 From e0dcd8a05be438b3d2e49ef61441ea3a463663f8 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 5 Jun 2011 22:03:13 -0700 Subject: mm: fix ENOSPC returned by handle_mm_fault() Al Viro observes that in the hugetlb case, handle_mm_fault() may return a value of the kind ENOSPC when its caller is expecting a value of the kind VM_FAULT_SIGBUS: fix alloc_huge_page()'s failure returns. Signed-off-by: Hugh Dickins Acked-by: Al Viro Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f33bb319b73..6402458fee3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1033,10 +1033,10 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, */ chg = vma_needs_reservation(h, vma, addr); if (chg < 0) - return ERR_PTR(chg); + return ERR_PTR(-VM_FAULT_OOM); if (chg) if (hugetlb_get_quota(inode->i_mapping, chg)) - return ERR_PTR(-ENOSPC); + return ERR_PTR(-VM_FAULT_SIGBUS); spin_lock(&hugetlb_lock); page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve); -- cgit v1.2.3-70-g09d2 From 59c5f46fbe01a00eedf54a23789634438bb80603 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 6 Jun 2011 18:06:33 +0900 Subject: Linux 3.0-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index afb8e0d26f2..0f1db8d9074 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Sneaky Weasel # *DOCUMENTATION* -- cgit v1.2.3-70-g09d2 From c8fb13d04ad0d08772f637bee873e620fcc064c2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 27 May 2011 13:56:12 -0700 Subject: OMAP: PM debug: fix section mismatch warnings WARNING: arch/arm/mach-omap2/built-in.o(.text+0x423c): Section mismatch in reference from the function pm_dbg_regset_init() to the function .init.text:pm_dbg_init() The function pm_dbg_regset_init() references the function __init pm_dbg_init(). This is often because pm_dbg_regset_init lacks a __init annotation or the annotation of pm_dbg_init is wrong. Signed-off-by: Russell King Signed-off-by: Kevin Hilman --- arch/arm/mach-omap2/pm-debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c index a5a83b358dd..e01da45c053 100644 --- a/arch/arm/mach-omap2/pm-debug.c +++ b/arch/arm/mach-omap2/pm-debug.c @@ -189,7 +189,7 @@ static struct dentry *pm_dbg_dir; static int pm_dbg_init_done; -static int __init pm_dbg_init(void); +static int pm_dbg_init(void); enum { DEBUG_FILE_COUNTERS = 0, @@ -595,7 +595,7 @@ static int option_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(pm_dbg_option_fops, option_get, option_set, "%llu\n"); -static int __init pm_dbg_init(void) +static int pm_dbg_init(void) { int i; struct dentry *d; -- cgit v1.2.3-70-g09d2 From 345f79b3de7f6d651e4dba794af7c7303bdfd649 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Tue, 31 May 2011 16:08:09 -0700 Subject: OMAP: PM: omap_device: fix device power domain callbacks After commit 4d27e9dcff00a6425d779b065ec8892e4f391661 (PM: Make power domain callbacks take precedence over subsystem ones), the power domain callbacks need to call the driver callbacks instead of relying on the default subsystem (in this case, platform_bus) to handle the driver callbacks. Validated on 3430/n900, 3530/Overo. Signed-off-by: Kevin Hilman --- arch/arm/plat-omap/omap_device.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c index a37b8eb65b7..49fc0df0c21 100644 --- a/arch/arm/plat-omap/omap_device.c +++ b/arch/arm/plat-omap/omap_device.c @@ -84,6 +84,7 @@ #include #include #include +#include #include #include @@ -539,20 +540,34 @@ int omap_early_device_register(struct omap_device *od) static int _od_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); + int ret; + + ret = pm_generic_runtime_suspend(dev); + + if (!ret) + omap_device_idle(pdev); + + return ret; +} - return omap_device_idle(pdev); +static int _od_runtime_idle(struct device *dev) +{ + return pm_generic_runtime_idle(dev); } static int _od_runtime_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); - return omap_device_enable(pdev); + omap_device_enable(pdev); + + return pm_generic_runtime_resume(dev); } static struct dev_power_domain omap_device_power_domain = { .ops = { .runtime_suspend = _od_runtime_suspend, + .runtime_idle = _od_runtime_idle, .runtime_resume = _od_runtime_resume, USE_PLATFORM_PM_SLEEP_OPS } -- cgit v1.2.3-70-g09d2